From 28e1b544e677dd6c6c6904cc32d5d744a7653bee Mon Sep 17 00:00:00 2001 From: DH Date: Mon, 7 Oct 2024 16:51:23 +0300 Subject: [PATCH] remove rpcsx-gpu-legacy --- CMakeLists.txt | 1 - hw/amdgpu/CMakeLists.txt | 11 - hw/amdgpu/device/CMakeLists.txt | 31 - .../device/include/amdgpu/device/device.hpp | 1322 ---- .../include/amdgpu/device/gpu-scheduler.hpp | 386 -- .../device/include/amdgpu/device/pm4.hpp | 101 - .../include/amdgpu/device/scheduler.hpp | 454 -- .../device/include/amdgpu/device/tiler.hpp | 572 -- hw/amdgpu/device/include/amdgpu/device/vk.hpp | 985 --- hw/amdgpu/device/src/device.cpp | 5115 -------------- hw/amdgpu/device/src/rect_list.geom.glsl | 40 - hw/amdgpu/include/amdgpu/RemoteMemory.hpp | 14 - hw/amdgpu/include/util/SourceLocation.hpp | 31 - hw/amdgpu/include/util/Verify.hpp | 24 - hw/amdgpu/include/util/VerifyVulkan.hpp | 14 - hw/amdgpu/include/util/area.hpp | 7 - hw/amdgpu/include/util/unreachable.hpp | 32 - hw/amdgpu/lib/libspirv/CMakeLists.txt | 4 - .../lib/libspirv/include/spirv/GLSL.std.450.h | 131 - .../libspirv/include/spirv/spirv-builder.hpp | 2248 ------ .../include/spirv/spirv-instruction.hpp | 2414 ------- .../lib/libspirv/include/spirv/spirv.hpp | 4968 ------------- hw/amdgpu/shader/CMakeLists.txt | 22 - .../shader/include/amdgpu/shader/AccessOp.hpp | 21 - .../include/amdgpu/shader/BufferKind.hpp | 5 - .../include/amdgpu/shader/CfBuilder.hpp | 8 - .../include/amdgpu/shader/Converter.hpp | 32 - .../amdgpu/shader/ConverterContext.hpp | 267 - .../shader/include/amdgpu/shader/Fragment.hpp | 85 - .../amdgpu/shader/FragmentTerminator.hpp | 11 - .../shader/include/amdgpu/shader/Function.hpp | 39 - .../include/amdgpu/shader/Instruction.hpp | 1973 ------ .../include/amdgpu/shader/RegisterId.hpp | 102 - .../include/amdgpu/shader/RegisterState.hpp | 27 - .../shader/include/amdgpu/shader/Stage.hpp | 5 - .../shader/include/amdgpu/shader/TypeId.hpp | 58 - .../shader/include/amdgpu/shader/Uniform.hpp | 20 - .../include/amdgpu/shader/UniformBindings.hpp | 72 - .../shader/include/amdgpu/shader/Value.hpp | 15 - hw/amdgpu/shader/include/amdgpu/shader/cf.hpp | 149 - .../shader/include/amdgpu/shader/scf.hpp | 344 - hw/amdgpu/shader/src/CfBuilder.cpp | 178 - hw/amdgpu/shader/src/Converter.cpp | 499 -- hw/amdgpu/shader/src/ConverterContext.cpp | 572 -- hw/amdgpu/shader/src/Fragment.cpp | 6116 ----------------- hw/amdgpu/shader/src/Function.cpp | 274 - hw/amdgpu/shader/src/Instruction.cpp | 3161 --------- hw/amdgpu/shader/src/RegisterState.cpp | 87 - hw/amdgpu/shader/src/TypeId.cpp | 134 - hw/amdgpu/shader/src/cf.cpp | 117 - hw/amdgpu/shader/src/scf.cpp | 249 - rpcsx-gpu-legacy/CMakeLists.txt | 12 - rpcsx-gpu-legacy/main.cpp | 1206 ---- 53 files changed, 34765 deletions(-) delete mode 100644 hw/amdgpu/device/CMakeLists.txt delete mode 100644 hw/amdgpu/device/include/amdgpu/device/device.hpp delete mode 100644 hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp delete mode 100644 hw/amdgpu/device/include/amdgpu/device/pm4.hpp delete mode 100644 hw/amdgpu/device/include/amdgpu/device/scheduler.hpp delete mode 100644 hw/amdgpu/device/include/amdgpu/device/tiler.hpp delete mode 100644 hw/amdgpu/device/include/amdgpu/device/vk.hpp delete mode 100644 hw/amdgpu/device/src/device.cpp delete mode 100644 hw/amdgpu/device/src/rect_list.geom.glsl delete mode 100644 hw/amdgpu/include/amdgpu/RemoteMemory.hpp delete mode 100644 hw/amdgpu/include/util/SourceLocation.hpp delete mode 100644 hw/amdgpu/include/util/Verify.hpp delete mode 100644 hw/amdgpu/include/util/VerifyVulkan.hpp delete mode 100644 hw/amdgpu/include/util/area.hpp delete mode 100644 hw/amdgpu/include/util/unreachable.hpp delete mode 100644 hw/amdgpu/lib/libspirv/CMakeLists.txt delete mode 100644 hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h delete mode 100644 hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp delete mode 100644 hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp delete mode 100644 hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp delete mode 100644 hw/amdgpu/shader/CMakeLists.txt delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/Function.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/Value.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/cf.hpp delete mode 100644 hw/amdgpu/shader/include/amdgpu/shader/scf.hpp delete mode 100644 hw/amdgpu/shader/src/CfBuilder.cpp delete mode 100644 hw/amdgpu/shader/src/Converter.cpp delete mode 100644 hw/amdgpu/shader/src/ConverterContext.cpp delete mode 100644 hw/amdgpu/shader/src/Fragment.cpp delete mode 100644 hw/amdgpu/shader/src/Function.cpp delete mode 100644 hw/amdgpu/shader/src/Instruction.cpp delete mode 100644 hw/amdgpu/shader/src/RegisterState.cpp delete mode 100644 hw/amdgpu/shader/src/TypeId.cpp delete mode 100644 hw/amdgpu/shader/src/cf.cpp delete mode 100644 hw/amdgpu/shader/src/scf.cpp delete mode 100644 rpcsx-gpu-legacy/CMakeLists.txt delete mode 100644 rpcsx-gpu-legacy/main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 65d0777..1b3d339 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,6 @@ add_subdirectory(tools) add_subdirectory(orbis-kernel) add_subdirectory(rpcsx-os) -add_subdirectory(rpcsx-gpu-legacy) add_subdirectory(rpcsx-gpu) add_subdirectory(hw/amdgpu) add_subdirectory(rx) diff --git a/hw/amdgpu/CMakeLists.txt b/hw/amdgpu/CMakeLists.txt index 0e842f0..b4d9065 100644 --- a/hw/amdgpu/CMakeLists.txt +++ b/hw/amdgpu/CMakeLists.txt @@ -4,15 +4,4 @@ set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_EXTENSIONS off) add_subdirectory(bridge) -add_subdirectory(device) -add_subdirectory(shader) -add_subdirectory(lib/libspirv) - -project(amdgpu) - -add_library(${PROJECT_NAME} INTERFACE) -target_link_libraries(${PROJECT_NAME} INTERFACE rx) -target_include_directories(${PROJECT_NAME} INTERFACE include) - -add_library(amdgpu::base ALIAS ${PROJECT_NAME}) diff --git a/hw/amdgpu/device/CMakeLists.txt b/hw/amdgpu/device/CMakeLists.txt deleted file mode 100644 index 74e9b4f..0000000 --- a/hw/amdgpu/device/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -project(libamdgpu-device) -set(PROJECT_PATH amdgpu/device) - -set(SRC - src/device.cpp -) - -add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders - src/rect_list.geom.glsl -) - -add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC}) -target_link_libraries(${PROJECT_NAME} -PUBLIC - spirv - amdgpu::base - amdgpu::bridge - amdgpu::shader - util - SPIRV-Tools - SPIRV-Tools-opt - $<$:spirv-cross-glsl> - -PRIVATE - ${PROJECT_NAME}-shaders -) - -target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH}) -set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "") -add_library(amdgpu::device ALIAS ${PROJECT_NAME}) -set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/hw/amdgpu/device/include/amdgpu/device/device.hpp b/hw/amdgpu/device/include/amdgpu/device/device.hpp deleted file mode 100644 index 909ff2f..0000000 --- a/hw/amdgpu/device/include/amdgpu/device/device.hpp +++ /dev/null @@ -1,1322 +0,0 @@ -#pragma once - -#include "amdgpu/RemoteMemory.hpp" -#include "amdgpu/bridge/bridge.hpp" -#include "amdgpu/shader/Instruction.hpp" -#include "gpu-scheduler.hpp" -#include "util/area.hpp" - -#include -#include -#include - -namespace amdgpu::device { -inline constexpr std::uint32_t getBits(std::uint32_t value, int end, - int begin) { - return (value >> begin) & ((1u << (end - begin + 1)) - 1); -} - -inline constexpr std::uint32_t getBit(std::uint32_t value, int bit) { - return (value >> bit) & 1; -} - -inline constexpr std::uint32_t genMask(std::uint32_t offset, - std::uint32_t bitCount) { - return ((1u << bitCount) - 1u) << offset; -} - -inline constexpr std::uint32_t getMaskEnd(std::uint32_t mask) { - return 32 - std::countl_zero(mask); -} - -inline constexpr std::uint32_t fetchMaskedValue(std::uint32_t hex, - std::uint32_t mask) { - return (hex & mask) >> std::countr_zero(mask); -} - -template >> -inline std::size_t calcStringLen(T value, unsigned base = 10) { - std::size_t n = 1; - std::size_t base2 = base * base; - std::size_t base3 = base2 * base; - std::size_t base4 = base3 * base; - - while (true) { - if (value < base) { - return n; - } - - if (value < base2) { - return n + 1; - } - - if (value < base3) { - return n + 2; - } - - if (value < base4) { - return n + 3; - } - - value /= base4; - n += 4; - } -} - -template >> -inline void toHexString(char *dst, std::size_t len, T value) { - while (len > 0) { - char digit = value % 16; - value /= 16; - - dst[--len] = digit < 10 ? '0' + digit : 'a' + digit - 10; - } -} - -inline std::string toHexString(unsigned value) { - auto len = calcStringLen(value, 16); - - std::string result(len, '\0'); - toHexString(result.data(), len, value); - return result; -} - -inline std::string toHexString(int value) { - bool isNeg = value < 0; - unsigned uval = isNeg ? static_cast(~value) + 1 : value; - auto len = calcStringLen(uval, 16); - - std::string result(len + (isNeg ? 1 : 0), '-'); - toHexString(result.data(), len, uval); - return result; -} - -enum Registers { - SPI_SHADER_PGM_LO_PS = 0x2c08, - SPI_SHADER_PGM_HI_PS = 0x2c09, - SPI_SHADER_PGM_RSRC1_PS = 0x2c0a, - SPI_SHADER_PGM_RSRC2_PS = 0x2c0b, - SPI_SHADER_USER_DATA_PS_0 = 0x2c0c, - SPI_SHADER_USER_DATA_PS_1, - SPI_SHADER_USER_DATA_PS_2, - SPI_SHADER_USER_DATA_PS_3, - SPI_SHADER_USER_DATA_PS_4, - SPI_SHADER_USER_DATA_PS_5, - SPI_SHADER_USER_DATA_PS_6, - SPI_SHADER_USER_DATA_PS_7, - SPI_SHADER_USER_DATA_PS_8, - SPI_SHADER_USER_DATA_PS_9, - SPI_SHADER_USER_DATA_PS_10, - SPI_SHADER_USER_DATA_PS_11, - SPI_SHADER_USER_DATA_PS_12, - SPI_SHADER_USER_DATA_PS_13, - SPI_SHADER_USER_DATA_PS_14, - SPI_SHADER_USER_DATA_PS_15, - - SPI_SHADER_PGM_LO_VS = 0x2c48, - SPI_SHADER_PGM_HI_VS = 0x2c49, - SPI_SHADER_PGM_RSRC1_VS = 0x2c4a, - SPI_SHADER_PGM_RSRC2_VS = 0x2c4b, - SPI_SHADER_USER_DATA_VS_0 = 0x2c4c, - SPI_SHADER_USER_DATA_VS_1 = 0x2c4d, - SPI_SHADER_USER_DATA_VS_2 = 0x2c4e, - SPI_SHADER_USER_DATA_VS_3 = 0x2c4f, - SPI_SHADER_USER_DATA_VS_4, - SPI_SHADER_USER_DATA_VS_5, - SPI_SHADER_USER_DATA_VS_6, - SPI_SHADER_USER_DATA_VS_7, - SPI_SHADER_USER_DATA_VS_8, - SPI_SHADER_USER_DATA_VS_9, - SPI_SHADER_USER_DATA_VS_10, - SPI_SHADER_USER_DATA_VS_11, - SPI_SHADER_USER_DATA_VS_12, - SPI_SHADER_USER_DATA_VS_13, - SPI_SHADER_USER_DATA_VS_14, - SPI_SHADER_USER_DATA_VS_15, - - COMPUTE_NUM_THREAD_X = 0x2e07, - COMPUTE_NUM_THREAD_Y, - COMPUTE_NUM_THREAD_Z, - COMPUTE_PGM_LO = 0x2e0c, - COMPUTE_PGM_HI, - COMPUTE_PGM_RSRC1 = 0x2e12, - COMPUTE_PGM_RSRC2, - COMPUTE_USER_DATA_0 = 0x2e40, - COMPUTE_USER_DATA_1, - COMPUTE_USER_DATA_2, - COMPUTE_USER_DATA_3, - COMPUTE_USER_DATA_4, - COMPUTE_USER_DATA_5, - COMPUTE_USER_DATA_6, - COMPUTE_USER_DATA_7, - COMPUTE_USER_DATA_8, - COMPUTE_USER_DATA_9, - COMPUTE_USER_DATA_10, - COMPUTE_USER_DATA_11, - COMPUTE_USER_DATA_12, - COMPUTE_USER_DATA_13, - COMPUTE_USER_DATA_14, - COMPUTE_USER_DATA_15, - - DB_RENDER_CONTROL = 0xa000, - DB_DEPTH_VIEW = 0xA002, - DB_HTILE_DATA_BASE = 0xA005, - DB_DEPTH_CLEAR = 0xA00B, - PA_SC_SCREEN_SCISSOR_TL = 0xa00c, - PA_SC_SCREEN_SCISSOR_BR = 0xa00d, - DB_DEPTH_INFO = 0xA00F, - DB_Z_INFO = 0xA010, - DB_STENCIL_INFO = 0xA011, - DB_Z_READ_BASE = 0xA012, - DB_STENCIL_READ_BASE = 0xA013, - DB_Z_WRITE_BASE = 0xA014, - DB_STENCIL_WRITE_BASE = 0xA015, - DB_DEPTH_SIZE = 0xA016, - DB_DEPTH_SLICE = 0xA017, - PA_SU_HARDWARE_SCREEN_OFFSET = 0xa08d, - CB_TARGET_MASK = 0xA08e, - CB_SHADER_MASK = 0xa08f, - PA_SC_VPORT_ZMIN_0 = 0xA0b4, - PA_SC_VPORT_ZMAX_0 = 0xA0b5, - PA_CL_VPORT_XSCALE = 0xa10f, - PA_CL_VPORT_XOFFSET, - PA_CL_VPORT_YSCALE, - PA_CL_VPORT_YOFFSET, - PA_CL_VPORT_ZSCALE, - PA_CL_VPORT_ZOFFSET, - SPI_PS_INPUT_CNTL_0 = 0xa191, - SPI_VS_OUT_CONFIG = 0xa1b1, - SPI_PS_INPUT_ENA = 0xa1b3, - SPI_PS_INPUT_ADDR = 0xa1b4, - SPI_PS_IN_CONTROL = 0xa1b6, - SPI_BARYC_CNTL = 0xa1b8, - SPI_SHADER_POS_FORMAT = 0xa1c3, - SPI_SHADER_Z_FORMAT = 0xa1c4, - SPI_SHADER_COL_FORMAT = 0xa1c5, - DB_DEPTH_CONTROL = 0xa200, - CB_COLOR_CONTROL = 0xa202, - DB_SHADER_CONTROL = 0xa203, - PA_CL_CLIP_CNTL = 0xa204, - PA_SU_SC_MODE_CNTL = 0xa205, - PA_CL_VTE_CNTL = 0xa206, - PA_CL_VS_OUT_CNTL = 0xa207, - DB_HTILE_SURFACE = 0xA2AF, - VGT_SHADER_STAGES_EN = 0xa2d5, - PA_CL_GB_VERT_CLIP_ADJ = 0xa2fa, - PA_CL_GB_VERT_DISC_ADJ, - PA_CL_GB_HORZ_CLIP_ADJ, - PA_CL_GB_HORZ_DISC_ADJ, - - CB_COLOR0_BASE = 0xA318, - CB_COLOR0_PITCH, - CB_COLOR0_SLICE, - CB_COLOR0_VIEW, - CB_COLOR0_INFO, - CB_COLOR0_ATTRIB, - CB_COLOR0_DCC_CONTROL, - CB_COLOR0_CMASK, - CB_COLOR0_CMASK_SLICE, - CB_COLOR0_FMASK, - CB_COLOR0_FMASK_SLICE, - CB_COLOR0_CLEAR_WORD0, - CB_COLOR0_CLEAR_WORD1, - CB_COLOR0_DCC_BASE, - CB_COLOR0_UNK0, - - CB_COLOR1_BASE, - CB_COLOR1_PITCH, - CB_COLOR1_SLICE, - CB_COLOR1_VIEW, - CB_COLOR1_INFO, - CB_COLOR1_ATTRIB, - CB_COLOR1_DCC_CONTROL, - CB_COLOR1_CMASK, - CB_COLOR1_CMASK_SLICE, - CB_COLOR1_FMASK, - CB_COLOR1_FMASK_SLICE, - CB_COLOR1_CLEAR_WORD0, - CB_COLOR1_CLEAR_WORD1, - CB_COLOR1_DCC_BASE, - CB_COLOR1_UNK0, - - CB_COLOR2_BASE, - CB_COLOR2_PITCH, - CB_COLOR2_SLICE, - CB_COLOR2_VIEW, - CB_COLOR2_INFO, - CB_COLOR2_ATTRIB, - CB_COLOR2_DCC_CONTROL, - CB_COLOR2_CMASK, - CB_COLOR2_CMASK_SLICE, - CB_COLOR2_FMASK, - CB_COLOR2_FMASK_SLICE, - CB_COLOR2_CLEAR_WORD0, - CB_COLOR2_CLEAR_WORD1, - CB_COLOR2_DCC_BASE, - CB_COLOR2_UNK0, - - CB_COLOR3_BASE, - CB_COLOR3_PITCH, - CB_COLOR3_SLICE, - CB_COLOR3_VIEW, - CB_COLOR3_INFO, - CB_COLOR3_ATTRIB, - CB_COLOR3_DCC_CONTROL, - CB_COLOR3_CMASK, - CB_COLOR3_CMASK_SLICE, - CB_COLOR3_FMASK, - CB_COLOR3_FMASK_SLICE, - CB_COLOR3_CLEAR_WORD0, - CB_COLOR3_CLEAR_WORD1, - CB_COLOR3_DCC_BASE, - CB_COLOR3_UNK0, - - CB_COLOR4_BASE, - CB_COLOR4_PITCH, - CB_COLOR4_SLICE, - CB_COLOR4_VIEW, - CB_COLOR4_INFO, - CB_COLOR4_ATTRIB, - CB_COLOR4_DCC_CONTROL, - CB_COLOR4_CMASK, - CB_COLOR4_CMASK_SLICE, - CB_COLOR4_FMASK, - CB_COLOR4_FMASK_SLICE, - CB_COLOR4_CLEAR_WORD0, - CB_COLOR4_CLEAR_WORD1, - CB_COLOR4_DCC_BASE, - CB_COLOR4_UNK0, - - CB_COLOR5_BASE, - CB_COLOR5_PITCH, - CB_COLOR5_SLICE, - CB_COLOR5_VIEW, - CB_COLOR5_INFO, - CB_COLOR5_ATTRIB, - CB_COLOR5_DCC_CONTROL, - CB_COLOR5_CMASK, - CB_COLOR5_CMASK_SLICE, - CB_COLOR5_FMASK, - CB_COLOR5_FMASK_SLICE, - CB_COLOR5_CLEAR_WORD0, - CB_COLOR5_CLEAR_WORD1, - CB_COLOR5_DCC_BASE, - CB_COLOR5_UNK0, - - CB_COLOR6_BASE, - CB_COLOR6_PITCH, - CB_COLOR6_SLICE, - CB_COLOR6_VIEW, - CB_COLOR6_INFO, - CB_COLOR6_ATTRIB, - CB_COLOR6_DCC_CONTROL, - CB_COLOR6_CMASK, - CB_COLOR6_CMASK_SLICE, - CB_COLOR6_FMASK, - CB_COLOR6_FMASK_SLICE, - CB_COLOR6_CLEAR_WORD0, - CB_COLOR6_CLEAR_WORD1, - CB_COLOR6_DCC_BASE, - CB_COLOR6_UNK0, - - CB_BLEND0_CONTROL = 0xa1e0, - - VGT_PRIMITIVE_TYPE = 0xc242, -}; - -inline std::string registerToString(int reg) { - switch (reg) { - case SPI_SHADER_PGM_LO_PS: - return "SPI_SHADER_PGM_LO_PS"; - case SPI_SHADER_PGM_HI_PS: - return "SPI_SHADER_PGM_HI_PS"; - case SPI_SHADER_PGM_RSRC1_PS: - return "SPI_SHADER_PGM_RSRC1_PS"; - case SPI_SHADER_PGM_RSRC2_PS: - return "SPI_SHADER_PGM_RSRC2_PS"; - case SPI_SHADER_USER_DATA_PS_0: - return "SPI_SHADER_USER_DATA_PS_0"; - case SPI_SHADER_USER_DATA_PS_1: - return "SPI_SHADER_USER_DATA_PS_1"; - case SPI_SHADER_USER_DATA_PS_2: - return "SPI_SHADER_USER_DATA_PS_2"; - case SPI_SHADER_USER_DATA_PS_3: - return "SPI_SHADER_USER_DATA_PS_3"; - case SPI_SHADER_USER_DATA_PS_4: - return "SPI_SHADER_USER_DATA_PS_4"; - case SPI_SHADER_USER_DATA_PS_5: - return "SPI_SHADER_USER_DATA_PS_5"; - case SPI_SHADER_USER_DATA_PS_6: - return "SPI_SHADER_USER_DATA_PS_6"; - case SPI_SHADER_USER_DATA_PS_7: - return "SPI_SHADER_USER_DATA_PS_7"; - case SPI_SHADER_USER_DATA_PS_8: - return "SPI_SHADER_USER_DATA_PS_8"; - case SPI_SHADER_USER_DATA_PS_9: - return "SPI_SHADER_USER_DATA_PS_9"; - case SPI_SHADER_USER_DATA_PS_10: - return "SPI_SHADER_USER_DATA_PS_10"; - case SPI_SHADER_USER_DATA_PS_11: - return "SPI_SHADER_USER_DATA_PS_11"; - case SPI_SHADER_USER_DATA_PS_12: - return "SPI_SHADER_USER_DATA_PS_12"; - case SPI_SHADER_USER_DATA_PS_13: - return "SPI_SHADER_USER_DATA_PS_13"; - case SPI_SHADER_USER_DATA_PS_14: - return "SPI_SHADER_USER_DATA_PS_14"; - case SPI_SHADER_USER_DATA_PS_15: - return "SPI_SHADER_USER_DATA_PS_15"; - case SPI_SHADER_PGM_LO_VS: - return "SPI_SHADER_PGM_LO_VS"; - case SPI_SHADER_PGM_HI_VS: - return "SPI_SHADER_PGM_HI_VS"; - case SPI_SHADER_PGM_RSRC1_VS: - return "SPI_SHADER_PGM_RSRC1_VS"; - case SPI_SHADER_PGM_RSRC2_VS: - return "SPI_SHADER_PGM_RSRC2_VS"; - case SPI_SHADER_USER_DATA_VS_0: - return "SPI_SHADER_USER_DATA_VS_0"; - case SPI_SHADER_USER_DATA_VS_1: - return "SPI_SHADER_USER_DATA_VS_1"; - case SPI_SHADER_USER_DATA_VS_2: - return "SPI_SHADER_USER_DATA_VS_2"; - case SPI_SHADER_USER_DATA_VS_3: - return "SPI_SHADER_USER_DATA_VS_3"; - case SPI_SHADER_USER_DATA_VS_4: - return "SPI_SHADER_USER_DATA_VS_4"; - case SPI_SHADER_USER_DATA_VS_5: - return "SPI_SHADER_USER_DATA_VS_5"; - case SPI_SHADER_USER_DATA_VS_6: - return "SPI_SHADER_USER_DATA_VS_6"; - case SPI_SHADER_USER_DATA_VS_7: - return "SPI_SHADER_USER_DATA_VS_7"; - case SPI_SHADER_USER_DATA_VS_8: - return "SPI_SHADER_USER_DATA_VS_8"; - case SPI_SHADER_USER_DATA_VS_9: - return "SPI_SHADER_USER_DATA_VS_9"; - case SPI_SHADER_USER_DATA_VS_10: - return "SPI_SHADER_USER_DATA_VS_10"; - case SPI_SHADER_USER_DATA_VS_11: - return "SPI_SHADER_USER_DATA_VS_11"; - case SPI_SHADER_USER_DATA_VS_12: - return "SPI_SHADER_USER_DATA_VS_12"; - case SPI_SHADER_USER_DATA_VS_13: - return "SPI_SHADER_USER_DATA_VS_13"; - case SPI_SHADER_USER_DATA_VS_14: - return "SPI_SHADER_USER_DATA_VS_14"; - case SPI_SHADER_USER_DATA_VS_15: - return "SPI_SHADER_USER_DATA_VS_15"; - case COMPUTE_NUM_THREAD_X: - return "COMPUTE_NUM_THREAD_X"; - case COMPUTE_NUM_THREAD_Y: - return "COMPUTE_NUM_THREAD_Y"; - case COMPUTE_NUM_THREAD_Z: - return "COMPUTE_NUM_THREAD_Z"; - case COMPUTE_PGM_LO: - return "COMPUTE_PGM_LO"; - case COMPUTE_PGM_HI: - return "COMPUTE_PGM_HI"; - case COMPUTE_PGM_RSRC1: - return "COMPUTE_PGM_RSRC1"; - case COMPUTE_PGM_RSRC2: - return "COMPUTE_PGM_RSRC2"; - case COMPUTE_USER_DATA_0: - return "COMPUTE_USER_DATA_0"; - case COMPUTE_USER_DATA_1: - return "COMPUTE_USER_DATA_1"; - case COMPUTE_USER_DATA_2: - return "COMPUTE_USER_DATA_2"; - case COMPUTE_USER_DATA_3: - return "COMPUTE_USER_DATA_3"; - case COMPUTE_USER_DATA_4: - return "COMPUTE_USER_DATA_4"; - case COMPUTE_USER_DATA_5: - return "COMPUTE_USER_DATA_5"; - case COMPUTE_USER_DATA_6: - return "COMPUTE_USER_DATA_6"; - case COMPUTE_USER_DATA_7: - return "COMPUTE_USER_DATA_7"; - case COMPUTE_USER_DATA_8: - return "COMPUTE_USER_DATA_8"; - case COMPUTE_USER_DATA_9: - return "COMPUTE_USER_DATA_9"; - case COMPUTE_USER_DATA_10: - return "COMPUTE_USER_DATA_10"; - case COMPUTE_USER_DATA_11: - return "COMPUTE_USER_DATA_11"; - case COMPUTE_USER_DATA_12: - return "COMPUTE_USER_DATA_12"; - case COMPUTE_USER_DATA_13: - return "COMPUTE_USER_DATA_13"; - case COMPUTE_USER_DATA_14: - return "COMPUTE_USER_DATA_14"; - case COMPUTE_USER_DATA_15: - return "COMPUTE_USER_DATA_15"; - case DB_DEPTH_CLEAR: - return "DB_DEPTH_CLEAR"; - case DB_RENDER_CONTROL: - return "DB_RENDER_CONTROL"; - case DB_DEPTH_VIEW: - return "DB_DEPTH_VIEW"; - case DB_HTILE_DATA_BASE: - return "DB_HTILE_DATA_BASE"; - case PA_SC_SCREEN_SCISSOR_TL: - return "PA_SC_SCREEN_SCISSOR_TL"; - case PA_SC_SCREEN_SCISSOR_BR: - return "PA_SC_SCREEN_SCISSOR_BR"; - case DB_DEPTH_INFO: - return "DB_DEPTH_INFO"; - case DB_Z_INFO: - return "DB_Z_INFO"; - case DB_STENCIL_INFO: - return "DB_STENCIL_INFO"; - case DB_Z_READ_BASE: - return "DB_Z_READ_BASE"; - case DB_STENCIL_READ_BASE: - return "DB_STENCIL_READ_BASE"; - case DB_Z_WRITE_BASE: - return "DB_Z_WRITE_BASE"; - case DB_STENCIL_WRITE_BASE: - return "DB_STENCIL_WRITE_BASE"; - case DB_DEPTH_SIZE: - return "DB_DEPTH_SIZE"; - case DB_DEPTH_SLICE: - return "DB_DEPTH_SLICE"; - case PA_SU_HARDWARE_SCREEN_OFFSET: - return "PA_SU_HARDWARE_SCREEN_OFFSET"; - case CB_TARGET_MASK: - return "CB_TARGET_MASK"; - case CB_SHADER_MASK: - return "CB_SHADER_MASK"; - case PA_SC_VPORT_ZMIN_0: - return "PA_SC_VPORT_ZMIN_0"; - case PA_SC_VPORT_ZMAX_0: - return "PA_SC_VPORT_ZMAX_0"; - case PA_CL_VPORT_XSCALE: - return "PA_CL_VPORT_XSCALE"; - case PA_CL_VPORT_XOFFSET: - return "PA_CL_VPORT_XOFFSET"; - case PA_CL_VPORT_YSCALE: - return "PA_CL_VPORT_YSCALE"; - case PA_CL_VPORT_YOFFSET: - return "PA_CL_VPORT_YOFFSET"; - case PA_CL_VPORT_ZSCALE: - return "PA_CL_VPORT_ZSCALE"; - case PA_CL_VPORT_ZOFFSET: - return "PA_CL_VPORT_ZOFFSET"; - case SPI_PS_INPUT_CNTL_0: - return "SPI_PS_INPUT_CNTL_0"; - case SPI_VS_OUT_CONFIG: - return "SPI_VS_OUT_CONFIG"; - case SPI_PS_INPUT_ENA: - return "SPI_PS_INPUT_ENA"; - case SPI_PS_INPUT_ADDR: - return "SPI_PS_INPUT_ADDR"; - case SPI_PS_IN_CONTROL: - return "SPI_PS_IN_CONTROL"; - case SPI_BARYC_CNTL: - return "SPI_BARYC_CNTL"; - case SPI_SHADER_POS_FORMAT: - return "SPI_SHADER_POS_FORMAT"; - case SPI_SHADER_Z_FORMAT: - return "SPI_SHADER_Z_FORMAT"; - case SPI_SHADER_COL_FORMAT: - return "SPI_SHADER_COL_FORMAT"; - case DB_DEPTH_CONTROL: - return "DB_DEPTH_CONTROL"; - case CB_COLOR_CONTROL: - return "DB_COLOR_CONTROL"; - case DB_SHADER_CONTROL: - return "DB_SHADER_CONTROL"; - case PA_CL_CLIP_CNTL: - return "PA_CL_CLIP_CNTL"; - case PA_SU_SC_MODE_CNTL: - return "PA_SU_SC_MODE_CNTL"; - case PA_CL_VTE_CNTL: - return "PA_CL_VTE_CNTL"; - case PA_CL_VS_OUT_CNTL: - return "PA_CL_VS_OUT_CNTL"; - case DB_HTILE_SURFACE: - return "DB_HTILE_SURFACE"; - case VGT_SHADER_STAGES_EN: - return "VGT_SHADER_STAGES_EN"; - case PA_CL_GB_VERT_CLIP_ADJ: - return "PA_CL_GB_VERT_CLIP_ADJ"; - case PA_CL_GB_VERT_DISC_ADJ: - return "PA_CL_GB_VERT_DISC_ADJ"; - case PA_CL_GB_HORZ_CLIP_ADJ: - return "PA_CL_GB_HORZ_CLIP_ADJ"; - case PA_CL_GB_HORZ_DISC_ADJ: - return "PA_CL_GB_HORZ_DISC_ADJ"; - case CB_COLOR0_BASE: - return "CB_COLOR0_BASE"; - case CB_COLOR0_PITCH: - return "CB_COLOR0_PITCH"; - case CB_COLOR0_SLICE: - return "CB_COLOR0_SLICE"; - case CB_COLOR0_VIEW: - return "CB_COLOR0_VIEW"; - case CB_COLOR0_INFO: - return "CB_COLOR0_INFO"; - case CB_COLOR0_ATTRIB: - return "CB_COLOR0_ATTRIB"; - case CB_COLOR0_DCC_CONTROL: - return "CB_COLOR0_DCC_CONTROL"; - case CB_COLOR0_CMASK: - return "CB_COLOR0_CMASK"; - case CB_COLOR0_CMASK_SLICE: - return "CB_COLOR0_CMASK_SLICE"; - case CB_COLOR0_FMASK: - return "CB_COLOR0_FMASK"; - case CB_COLOR0_FMASK_SLICE: - return "CB_COLOR0_FMASK_SLICE"; - case CB_COLOR0_CLEAR_WORD0: - return "CB_COLOR0_CLEAR_WORD0"; - case CB_COLOR0_CLEAR_WORD1: - return "CB_COLOR0_CLEAR_WORD1"; - case CB_COLOR0_DCC_BASE: - return "CB_COLOR0_DCC_BASE"; - case CB_COLOR1_BASE: - return "CB_COLOR1_BASE"; - case CB_COLOR1_PITCH: - return "CB_COLOR1_PITCH"; - case CB_COLOR1_SLICE: - return "CB_COLOR1_SLICE"; - case CB_COLOR1_VIEW: - return "CB_COLOR1_VIEW"; - case CB_COLOR1_INFO: - return "CB_COLOR1_INFO"; - case CB_COLOR1_ATTRIB: - return "CB_COLOR1_ATTRIB"; - case CB_COLOR1_DCC_CONTROL: - return "CB_COLOR1_DCC_CONTROL"; - case CB_COLOR1_CMASK: - return "CB_COLOR1_CMASK"; - case CB_COLOR1_CMASK_SLICE: - return "CB_COLOR1_CMASK_SLICE"; - case CB_COLOR1_FMASK: - return "CB_COLOR1_FMASK"; - case CB_COLOR1_FMASK_SLICE: - return "CB_COLOR1_FMASK_SLICE"; - case CB_COLOR1_CLEAR_WORD0: - return "CB_COLOR1_CLEAR_WORD0"; - case CB_COLOR1_CLEAR_WORD1: - return "CB_COLOR1_CLEAR_WORD1"; - case CB_COLOR1_DCC_BASE: - return "CB_COLOR1_DCC_BASE"; - case CB_COLOR2_BASE: - return "CB_COLOR2_BASE"; - case CB_COLOR2_PITCH: - return "CB_COLOR2_PITCH"; - case CB_COLOR2_SLICE: - return "CB_COLOR2_SLICE"; - case CB_COLOR2_VIEW: - return "CB_COLOR2_VIEW"; - case CB_COLOR2_INFO: - return "CB_COLOR2_INFO"; - case CB_COLOR2_ATTRIB: - return "CB_COLOR2_ATTRIB"; - case CB_COLOR2_DCC_CONTROL: - return "CB_COLOR2_DCC_CONTROL"; - case CB_COLOR2_CMASK: - return "CB_COLOR2_CMASK"; - case CB_COLOR2_CMASK_SLICE: - return "CB_COLOR2_CMASK_SLICE"; - case CB_COLOR2_FMASK: - return "CB_COLOR2_FMASK"; - case CB_COLOR2_FMASK_SLICE: - return "CB_COLOR2_FMASK_SLICE"; - case CB_COLOR2_CLEAR_WORD0: - return "CB_COLOR2_CLEAR_WORD0"; - case CB_COLOR2_CLEAR_WORD1: - return "CB_COLOR2_CLEAR_WORD1"; - case CB_COLOR2_DCC_BASE: - return "CB_COLOR2_DCC_BASE"; - case CB_COLOR3_BASE: - return "CB_COLOR3_BASE"; - case CB_COLOR3_PITCH: - return "CB_COLOR3_PITCH"; - case CB_COLOR3_SLICE: - return "CB_COLOR3_SLICE"; - case CB_COLOR3_VIEW: - return "CB_COLOR3_VIEW"; - case CB_COLOR3_INFO: - return "CB_COLOR3_INFO"; - case CB_COLOR3_ATTRIB: - return "CB_COLOR3_ATTRIB"; - case CB_COLOR3_DCC_CONTROL: - return "CB_COLOR3_DCC_CONTROL"; - case CB_COLOR3_CMASK: - return "CB_COLOR3_CMASK"; - case CB_COLOR3_CMASK_SLICE: - return "CB_COLOR3_CMASK_SLICE"; - case CB_COLOR3_FMASK: - return "CB_COLOR3_FMASK"; - case CB_COLOR3_FMASK_SLICE: - return "CB_COLOR3_FMASK_SLICE"; - case CB_COLOR3_CLEAR_WORD0: - return "CB_COLOR3_CLEAR_WORD0"; - case CB_COLOR3_CLEAR_WORD1: - return "CB_COLOR3_CLEAR_WORD1"; - case CB_COLOR3_DCC_BASE: - return "CB_COLOR3_DCC_BASE"; - case CB_COLOR4_BASE: - return "CB_COLOR4_BASE"; - case CB_COLOR4_PITCH: - return "CB_COLOR4_PITCH"; - case CB_COLOR4_SLICE: - return "CB_COLOR4_SLICE"; - case CB_COLOR4_VIEW: - return "CB_COLOR4_VIEW"; - case CB_COLOR4_INFO: - return "CB_COLOR4_INFO"; - case CB_COLOR4_ATTRIB: - return "CB_COLOR4_ATTRIB"; - case CB_COLOR4_DCC_CONTROL: - return "CB_COLOR4_DCC_CONTROL"; - case CB_COLOR4_CMASK: - return "CB_COLOR4_CMASK"; - case CB_COLOR4_CMASK_SLICE: - return "CB_COLOR4_CMASK_SLICE"; - case CB_COLOR4_FMASK: - return "CB_COLOR4_FMASK"; - case CB_COLOR4_FMASK_SLICE: - return "CB_COLOR4_FMASK_SLICE"; - case CB_COLOR4_CLEAR_WORD0: - return "CB_COLOR4_CLEAR_WORD0"; - case CB_COLOR4_CLEAR_WORD1: - return "CB_COLOR4_CLEAR_WORD1"; - case CB_COLOR4_DCC_BASE: - return "CB_COLOR4_DCC_BASE"; - case CB_COLOR5_BASE: - return "CB_COLOR5_BASE"; - case CB_COLOR5_PITCH: - return "CB_COLOR5_PITCH"; - case CB_COLOR5_SLICE: - return "CB_COLOR5_SLICE"; - case CB_COLOR5_VIEW: - return "CB_COLOR5_VIEW"; - case CB_COLOR5_INFO: - return "CB_COLOR5_INFO"; - case CB_COLOR5_ATTRIB: - return "CB_COLOR5_ATTRIB"; - case CB_COLOR5_DCC_CONTROL: - return "CB_COLOR5_DCC_CONTROL"; - case CB_COLOR5_CMASK: - return "CB_COLOR5_CMASK"; - case CB_COLOR5_CMASK_SLICE: - return "CB_COLOR5_CMASK_SLICE"; - case CB_COLOR5_FMASK: - return "CB_COLOR5_FMASK"; - case CB_COLOR5_FMASK_SLICE: - return "CB_COLOR5_FMASK_SLICE"; - case CB_COLOR5_CLEAR_WORD0: - return "CB_COLOR5_CLEAR_WORD0"; - case CB_COLOR5_CLEAR_WORD1: - return "CB_COLOR5_CLEAR_WORD1"; - case CB_COLOR5_DCC_BASE: - return "CB_COLOR5_DCC_BASE"; - case CB_COLOR6_BASE: - return "CB_COLOR6_BASE"; - case CB_COLOR6_PITCH: - return "CB_COLOR6_PITCH"; - case CB_COLOR6_SLICE: - return "CB_COLOR6_SLICE"; - case CB_COLOR6_VIEW: - return "CB_COLOR6_VIEW"; - case CB_COLOR6_INFO: - return "CB_COLOR6_INFO"; - case CB_COLOR6_ATTRIB: - return "CB_COLOR6_ATTRIB"; - case CB_COLOR6_DCC_CONTROL: - return "CB_COLOR6_DCC_CONTROL"; - case CB_COLOR6_CMASK: - return "CB_COLOR6_CMASK"; - case CB_COLOR6_CMASK_SLICE: - return "CB_COLOR6_CMASK_SLICE"; - case CB_COLOR6_FMASK: - return "CB_COLOR6_FMASK"; - case CB_COLOR6_FMASK_SLICE: - return "CB_COLOR6_FMASK_SLICE"; - case CB_COLOR6_CLEAR_WORD0: - return "CB_COLOR6_CLEAR_WORD0"; - case CB_COLOR6_CLEAR_WORD1: - return "CB_COLOR6_CLEAR_WORD1"; - case CB_COLOR6_DCC_BASE: - return "CB_COLOR6_DCC_BASE"; - case CB_BLEND0_CONTROL: - return "CB_BLEND0_CONTROL"; - - case VGT_PRIMITIVE_TYPE: - return "VGT_PRIMITIVE_TYPE"; - } - - return ""; -} - -enum Opcodes { - kOpcodeNOP = 0x10, - kOpcodeSET_BASE = 0x11, - kOpcodeCLEAR_STATE = 0x12, - kOpcodeINDEX_BUFFER_SIZE = 0x13, - kOpcodeDISPATCH_DIRECT = 0x15, - kOpcodeDISPATCH_INDIRECT = 0x16, - kOpcodeINDIRECT_BUFFER_END = 0x17, - kOpcodeMODE_CONTROL = 0x18, - kOpcodeATOMIC_GDS = 0x1D, - kOpcodeATOMIC_MEM = 0x1E, - kOpcodeOCCLUSION_QUERY = 0x1F, - kOpcodeSET_PREDICATION = 0x20, - kOpcodeREG_RMW = 0x21, - kOpcodeCOND_EXEC = 0x22, - kOpcodePRED_EXEC = 0x23, - kOpcodeDRAW_INDIRECT = 0x24, - kOpcodeDRAW_INDEX_INDIRECT = 0x25, - kOpcodeINDEX_BASE = 0x26, - kOpcodeDRAW_INDEX_2 = 0x27, - kOpcodeCONTEXT_CONTROL = 0x28, - kOpcodeDRAW_INDEX_OFFSET = 0x29, - kOpcodeINDEX_TYPE = 0x2A, - kOpcodeDRAW_INDEX = 0x2B, - kOpcodeDRAW_INDIRECT_MULTI = 0x2C, - kOpcodeDRAW_INDEX_AUTO = 0x2D, - kOpcodeDRAW_INDEX_IMMD = 0x2E, - kOpcodeNUM_INSTANCES = 0x2F, - kOpcodeDRAW_INDEX_MULTI_AUTO = 0x30, - kOpcodeINDIRECT_BUFFER_32 = 0x32, - kOpcodeINDIRECT_BUFFER_CONST = 0x33, - kOpcodeSTRMOUT_BUFFER_UPDATE = 0x34, - kOpcodeDRAW_INDEX_OFFSET_2 = 0x35, - kOpcodeDRAW_PREAMBLE = 0x36, - kOpcodeWRITE_DATA = 0x37, - kOpcodeDRAW_INDEX_INDIRECT_MULTI = 0x38, - kOpcodeMEM_SEMAPHORE = 0x39, - kOpcodeMPEG_INDEX = 0x3A, - kOpcodeCOPY_DW = 0x3B, - kOpcodeWAIT_REG_MEM = 0x3C, - kOpcodeMEM_WRITE = 0x3D, - kOpcodeINDIRECT_BUFFER_3F = 0x3F, - kOpcodeCOPY_DATA = 0x40, - kOpcodeCP_DMA = 0x41, - kOpcodePFP_SYNC_ME = 0x42, - kOpcodeSURFACE_SYNC = 0x43, - kOpcodeME_INITIALIZE = 0x44, - kOpcodeCOND_WRITE = 0x45, - kOpcodeEVENT_WRITE = 0x46, - kOpcodeEVENT_WRITE_EOP = 0x47, - kOpcodeEVENT_WRITE_EOS = 0x48, - kOpcodeRELEASE_MEM = 0x49, - kOpcodePREAMBLE_CNTL = 0x4A, - kOpcodeRB_OFFSET = 0x4B, - kOpcodeALU_PS_CONST_BUFFER_COPY = 0x4C, - kOpcodeALU_VS_CONST_BUFFER_COPY = 0x4D, - kOpcodeALU_PS_CONST_UPDATE = 0x4E, - kOpcodeALU_VS_CONST_UPDATE = 0x4F, - kOpcodeDMA_DATA = 0x50, - kOpcodeONE_REG_WRITE = 0x57, - kOpcodeACQUIRE_MEM = 0x58, - kOpcodeREWIND = 0x59, - kOpcodeLOAD_UCONFIG_REG = 0x5E, - kOpcodeLOAD_SH_REG = 0x5F, - kOpcodeLOAD_CONFIG_REG = 0x60, - kOpcodeLOAD_CONTEXT_REG = 0x61, - kOpcodeSET_CONFIG_REG = 0x68, - kOpcodeSET_CONTEXT_REG = 0x69, - kOpcodeSET_ALU_CONST = 0x6A, - kOpcodeSET_BOOL_CONST = 0x6B, - kOpcodeSET_LOOP_CONST = 0x6C, - kOpcodeSET_RESOURCE = 0x6D, - kOpcodeSET_SAMPLER = 0x6E, - kOpcodeSET_CTL_CONST = 0x6F, - kOpcodeSET_RESOURCE_OFFSET = 0x70, - kOpcodeSET_ALU_CONST_VS = 0x71, - kOpcodeSET_ALU_CONST_DI = 0x72, - kOpcodeSET_CONTEXT_REG_INDIRECT = 0x73, - kOpcodeSET_RESOURCE_INDIRECT = 0x74, - kOpcodeSET_APPEND_CNT = 0x75, - kOpcodeSET_SH_REG = 0x76, - kOpcodeSET_SH_REG_OFFSET = 0x77, - kOpcodeSET_QUEUE_REG = 0x78, - kOpcodeSET_UCONFIG_REG = 0x79, - kOpcodeSCRATCH_RAM_WRITE = 0x7D, - kOpcodeSCRATCH_RAM_READ = 0x7E, - kOpcodeLOAD_CONST_RAM = 0x80, - kOpcodeWRITE_CONST_RAM = 0x81, - kOpcodeDUMP_CONST_RAM = 0x83, - kOpcodeINCREMENT_CE_COUNTER = 0x84, - kOpcodeINCREMENT_DE_COUNTER = 0x85, - kOpcodeWAIT_ON_CE_COUNTER = 0x86, - kOpcodeWAIT_ON_DE_COUNTER_DIFF = 0x88, - kOpcodeSWITCH_BUFFER = 0x8B, -}; - -inline const std::string opcodeToString(int op) { - switch (op) { - case kOpcodeNOP: - return "IT_NOP"; - case kOpcodeSET_BASE: - return "IT_SET_BASE"; - case kOpcodeCLEAR_STATE: - return "IT_CLEAR_STATE"; - case kOpcodeINDEX_BUFFER_SIZE: - return "IT_INDEX_BUFFER_SIZE"; - case kOpcodeDISPATCH_DIRECT: - return "IT_DISPATCH_DIRECT"; - case kOpcodeDISPATCH_INDIRECT: - return "IT_DISPATCH_INDIRECT"; - case kOpcodeINDIRECT_BUFFER_END: - return "IT_INDIRECT_BUFFER_END"; - case kOpcodeATOMIC_GDS: - return "IT_ATOMIC_GDS"; - case kOpcodeATOMIC_MEM: - return "IT_ATOMIC_MEM"; - case kOpcodeOCCLUSION_QUERY: - return "IT_OCCLUSION_QUERY"; - case kOpcodeSET_PREDICATION: - return "IT_SET_PREDICATION"; - case kOpcodeREG_RMW: - return "IT_REG_RMW"; - case kOpcodeCOND_EXEC: - return "IT_COND_EXEC"; - case kOpcodePRED_EXEC: - return "IT_PRED_EXEC"; - case kOpcodeDRAW_INDIRECT: - return "IT_DRAW_INDIRECT"; - case kOpcodeDRAW_INDEX_INDIRECT: - return "IT_DRAW_INDEX_INDIRECT"; - case kOpcodeINDEX_BASE: - return "IT_INDEX_BASE"; - case kOpcodeDRAW_INDEX_2: - return "IT_DRAW_INDEX_2"; - case kOpcodeCONTEXT_CONTROL: - return "IT_CONTEXT_CONTROL"; - case kOpcodeINDEX_TYPE: - return "IT_INDEX_TYPE"; - case kOpcodeDRAW_INDEX: - return "IT_DRAW_INDEX"; - case kOpcodeDRAW_INDIRECT_MULTI: - return "IT_DRAW_INDIRECT_MULTI"; - case kOpcodeDRAW_INDEX_AUTO: - return "IT_DRAW_INDEX_AUTO"; - case kOpcodeDRAW_INDEX_IMMD: - return "IT_DRAW_INDEX_IMMD"; - case kOpcodeNUM_INSTANCES: - return "IT_NUM_INSTANCES"; - case kOpcodeDRAW_INDEX_MULTI_AUTO: - return "IT_DRAW_INDEX_MULTI_AUTO"; - case kOpcodeINDIRECT_BUFFER_32: - return "IT_INDIRECT_BUFFER_32"; - case kOpcodeINDIRECT_BUFFER_CONST: - return "IT_INDIRECT_BUFFER_CONST"; - case kOpcodeSTRMOUT_BUFFER_UPDATE: - return "IT_STRMOUT_BUFFER_UPDATE"; - case kOpcodeDRAW_INDEX_OFFSET_2: - return "IT_DRAW_INDEX_OFFSET_2"; - case kOpcodeDRAW_PREAMBLE: - return "IT_DRAW_PREAMBLE"; - case kOpcodeWRITE_DATA: - return "IT_WRITE_DATA"; - case kOpcodeDRAW_INDEX_INDIRECT_MULTI: - return "IT_DRAW_INDEX_INDIRECT_MULTI"; - case kOpcodeMEM_SEMAPHORE: - return "IT_MEM_SEMAPHORE"; - case kOpcodeMPEG_INDEX: - return "IT_MPEG_INDEX"; - case kOpcodeCOPY_DW: - return "IT_COPY_DW"; - case kOpcodeWAIT_REG_MEM: - return "IT_WAIT_REG_MEM"; - case kOpcodeMEM_WRITE: - return "IT_MEM_WRITE"; - case kOpcodeINDIRECT_BUFFER_3F: - return "IT_INDIRECT_BUFFER_3F"; - case kOpcodeCOPY_DATA: - return "IT_COPY_DATA"; - case kOpcodeCP_DMA: - return "IT_CP_DMA"; - case kOpcodePFP_SYNC_ME: - return "IT_PFP_SYNC_ME"; - case kOpcodeSURFACE_SYNC: - return "IT_SURFACE_SYNC"; - case kOpcodeME_INITIALIZE: - return "IT_ME_INITIALIZE"; - case kOpcodeCOND_WRITE: - return "IT_COND_WRITE"; - case kOpcodeEVENT_WRITE: - return "IT_EVENT_WRITE"; - case kOpcodeEVENT_WRITE_EOP: - return "IT_EVENT_WRITE_EOP"; - case kOpcodeEVENT_WRITE_EOS: - return "IT_EVENT_WRITE_EOS"; - case kOpcodeRELEASE_MEM: - return "IT_RELEASE_MEM"; - case kOpcodePREAMBLE_CNTL: - return "IT_PREAMBLE_CNTL"; - case kOpcodeDMA_DATA: - return "IT_DMA_DATA"; - case kOpcodeONE_REG_WRITE: - return "IT_ONE_REG_WRITE"; - case kOpcodeACQUIRE_MEM: - return "IT_ACQUIRE_MEM"; - case kOpcodeREWIND: - return "IT_REWIND"; - case kOpcodeLOAD_UCONFIG_REG: - return "IT_LOAD_UCONFIG_REG"; - case kOpcodeLOAD_SH_REG: - return "IT_LOAD_SH_REG"; - case kOpcodeLOAD_CONFIG_REG: - return "IT_LOAD_CONFIG_REG"; - case kOpcodeLOAD_CONTEXT_REG: - return "IT_LOAD_CONTEXT_REG"; - case kOpcodeSET_CONFIG_REG: - return "IT_SET_CONFIG_REG"; - case kOpcodeSET_CONTEXT_REG: - return "IT_SET_CONTEXT_REG"; - case kOpcodeSET_ALU_CONST: - return "IT_SET_ALU_CONST"; - case kOpcodeSET_BOOL_CONST: - return "IT_SET_BOOL_CONST"; - case kOpcodeSET_LOOP_CONST: - return "IT_SET_LOOP_CONST"; - case kOpcodeSET_RESOURCE: - return "IT_SET_RESOURCE"; - case kOpcodeSET_SAMPLER: - return "IT_SET_SAMPLER"; - case kOpcodeSET_CTL_CONST: - return "IT_SET_CTL_CONST"; - case kOpcodeSET_CONTEXT_REG_INDIRECT: - return "IT_SET_CONTEXT_REG_INDIRECT"; - case kOpcodeSET_SH_REG: - return "IT_SET_SH_REG"; - case kOpcodeSET_SH_REG_OFFSET: - return "IT_SET_SH_REG_OFFSET"; - case kOpcodeSET_QUEUE_REG: - return "IT_SET_QUEUE_REG"; - case kOpcodeSET_UCONFIG_REG: - return "IT_SET_UCONFIG_REG"; - case kOpcodeSCRATCH_RAM_WRITE: - return "IT_SCRATCH_RAM_WRITE"; - case kOpcodeSCRATCH_RAM_READ: - return "IT_SCRATCH_RAM_READ"; - case kOpcodeLOAD_CONST_RAM: - return "IT_LOAD_CONST_RAM"; - case kOpcodeWRITE_CONST_RAM: - return "IT_WRITE_CONST_RAM"; - case kOpcodeDUMP_CONST_RAM: - return "IT_DUMP_CONST_RAM"; - case kOpcodeINCREMENT_CE_COUNTER: - return "IT_INCREMENT_CE_COUNTER"; - case kOpcodeINCREMENT_DE_COUNTER: - return "IT_INCREMENT_DE_COUNTER"; - case kOpcodeWAIT_ON_CE_COUNTER: - return "IT_WAIT_ON_CE_COUNTER"; - case kOpcodeWAIT_ON_DE_COUNTER_DIFF: - return "IT_WAIT_ON_DE_COUNTER_DIFF"; - case kOpcodeSWITCH_BUFFER: - return "IT_SWITCH_BUFFER"; - } - - return ""; -} - -inline void dumpShader(const std::uint32_t *data) { - flockfile(stdout); - while (true) { - auto instHex = *data; - bool isEnd = instHex == 0xBF810000 || instHex == 0xBE802000; - - shader::Instruction inst(data); - - for (int i = 0; i < inst.size(); ++i) { - std::printf("%08X ", data[i]); - } - - inst.dump(); - printf("\n"); - data += inst.size(); - - if (isEnd) { - break; - } - } - funlockfile(stdout); -} - -enum BlendMultiplier { - kBlendMultiplierZero = 0x00000000, - kBlendMultiplierOne = 0x00000001, - kBlendMultiplierSrcColor = 0x00000002, - kBlendMultiplierOneMinusSrcColor = 0x00000003, - kBlendMultiplierSrcAlpha = 0x00000004, - kBlendMultiplierOneMinusSrcAlpha = 0x00000005, - kBlendMultiplierDestAlpha = 0x00000006, - kBlendMultiplierOneMinusDestAlpha = 0x00000007, - kBlendMultiplierDestColor = 0x00000008, - kBlendMultiplierOneMinusDestColor = 0x00000009, - kBlendMultiplierSrcAlphaSaturate = 0x0000000a, - kBlendMultiplierConstantColor = 0x0000000d, - kBlendMultiplierOneMinusConstantColor = 0x0000000e, - kBlendMultiplierSrc1Color = 0x0000000f, - kBlendMultiplierInverseSrc1Color = 0x00000010, - kBlendMultiplierSrc1Alpha = 0x00000011, - kBlendMultiplierInverseSrc1Alpha = 0x00000012, - kBlendMultiplierConstantAlpha = 0x00000013, - kBlendMultiplierOneMinusConstantAlpha = 0x00000014, -}; - -enum BlendFunc { - kBlendFuncAdd = 0x00000000, - kBlendFuncSubtract = 0x00000001, - kBlendFuncMin = 0x00000002, - kBlendFuncMax = 0x00000003, - kBlendFuncReverseSubtract = 0x00000004, -}; - -enum PrimitiveType : unsigned { - kPrimitiveTypeNone = 0x00000000, - kPrimitiveTypePointList = 0x00000001, - kPrimitiveTypeLineList = 0x00000002, - kPrimitiveTypeLineStrip = 0x00000003, - kPrimitiveTypeTriList = 0x00000004, - kPrimitiveTypeTriFan = 0x00000005, - kPrimitiveTypeTriStrip = 0x00000006, - kPrimitiveTypePatch = 0x00000009, - kPrimitiveTypeLineListAdjacency = 0x0000000a, - kPrimitiveTypeLineStripAdjacency = 0x0000000b, - kPrimitiveTypeTriListAdjacency = 0x0000000c, - kPrimitiveTypeTriStripAdjacency = 0x0000000d, - kPrimitiveTypeRectList = 0x00000011, - kPrimitiveTypeLineLoop = 0x00000012, - kPrimitiveTypeQuadList = 0x00000013, - kPrimitiveTypeQuadStrip = 0x00000014, - kPrimitiveTypePolygon = 0x00000015 -}; - -enum SurfaceFormat : unsigned { - kSurfaceFormatInvalid = 0x00000000, - kSurfaceFormat8 = 0x00000001, - kSurfaceFormat16 = 0x00000002, - kSurfaceFormat8_8 = 0x00000003, - kSurfaceFormat32 = 0x00000004, - kSurfaceFormat16_16 = 0x00000005, - kSurfaceFormat10_11_11 = 0x00000006, - kSurfaceFormat11_11_10 = 0x00000007, - kSurfaceFormat10_10_10_2 = 0x00000008, - kSurfaceFormat2_10_10_10 = 0x00000009, - kSurfaceFormat8_8_8_8 = 0x0000000a, - kSurfaceFormat32_32 = 0x0000000b, - kSurfaceFormat16_16_16_16 = 0x0000000c, - kSurfaceFormat32_32_32 = 0x0000000d, - kSurfaceFormat32_32_32_32 = 0x0000000e, - kSurfaceFormat5_6_5 = 0x00000010, - kSurfaceFormat1_5_5_5 = 0x00000011, - kSurfaceFormat5_5_5_1 = 0x00000012, - kSurfaceFormat4_4_4_4 = 0x00000013, - kSurfaceFormat8_24 = 0x00000014, - kSurfaceFormat24_8 = 0x00000015, - kSurfaceFormatX24_8_32 = 0x00000016, - kSurfaceFormatGB_GR = 0x00000020, - kSurfaceFormatBG_RG = 0x00000021, - kSurfaceFormat5_9_9_9 = 0x00000022, - kSurfaceFormatBc1 = 0x00000023, - kSurfaceFormatBc2 = 0x00000024, - kSurfaceFormatBc3 = 0x00000025, - kSurfaceFormatBc4 = 0x00000026, - kSurfaceFormatBc5 = 0x00000027, - kSurfaceFormatBc6 = 0x00000028, - kSurfaceFormatBc7 = 0x00000029, - kSurfaceFormatFmask8_S2_F1 = 0x0000002C, - kSurfaceFormatFmask8_S4_F1 = 0x0000002D, - kSurfaceFormatFmask8_S8_F1 = 0x0000002E, - kSurfaceFormatFmask8_S2_F2 = 0x0000002F, - kSurfaceFormatFmask8_S4_F2 = 0x00000030, - kSurfaceFormatFmask8_S4_F4 = 0x00000031, - kSurfaceFormatFmask16_S16_F1 = 0x00000032, - kSurfaceFormatFmask16_S8_F2 = 0x00000033, - kSurfaceFormatFmask32_S16_F2 = 0x00000034, - kSurfaceFormatFmask32_S8_F4 = 0x00000035, - kSurfaceFormatFmask32_S8_F8 = 0x00000036, - kSurfaceFormatFmask64_S16_F4 = 0x00000037, - kSurfaceFormatFmask64_S16_F8 = 0x00000038, - kSurfaceFormat4_4 = 0x00000039, - kSurfaceFormat6_5_5 = 0x0000003A, - kSurfaceFormat1 = 0x0000003B, - kSurfaceFormat1Reversed = 0x0000003C, -}; - -enum TextureChannelType : unsigned { - kTextureChannelTypeUNorm = 0x00000000, - kTextureChannelTypeSNorm = 0x00000001, - kTextureChannelTypeUScaled = 0x00000002, - kTextureChannelTypeSScaled = 0x00000003, - kTextureChannelTypeUInt = 0x00000004, - kTextureChannelTypeSInt = 0x00000005, - kTextureChannelTypeSNormNoZero = 0x00000006, - kTextureChannelTypeFloat = 0x00000007, - kTextureChannelTypeSrgb = 0x00000009, - kTextureChannelTypeUBNorm = 0x0000000A, - kTextureChannelTypeUBNormNoZero = 0x0000000B, - kTextureChannelTypeUBInt = 0x0000000C, - kTextureChannelTypeUBScaled = 0x0000000D, -}; - -struct GnmVBuffer { - uint64_t base : 44; - uint64_t mtype_L1s : 2; - uint64_t mtype_L2 : 2; - uint64_t stride : 14; - uint64_t cache_swizzle : 1; - uint64_t swizzle_en : 1; - - uint32_t num_records; - - uint32_t dst_sel_x : 3; - uint32_t dst_sel_y : 3; - uint32_t dst_sel_z : 3; - uint32_t dst_sel_w : 3; - - uint32_t nfmt : 3; - uint32_t dfmt : 4; - uint32_t element_size : 2; - uint32_t index_stride : 2; - uint32_t addtid_en : 1; - uint32_t reserved0 : 1; - uint32_t hash_en : 1; - uint32_t reserved1 : 1; - uint32_t mtype : 3; - uint32_t type : 2; - - std::uint64_t getAddress() const { return base; } - - uint32_t getStride() const { return stride; } - - uint32_t getSize() const { - uint32_t stride = getStride(); - uint32_t numElements = getNumRecords(); - return stride ? numElements * stride : numElements; - } - - uint32_t getNumRecords() const { return num_records; } - uint32_t getElementSize() const { return element_size; } - uint32_t getIndexStrideSize() const { return index_stride; } - SurfaceFormat getSurfaceFormat() const { return (SurfaceFormat)dfmt; } - TextureChannelType getChannelType() const { return (TextureChannelType)nfmt; } -}; - -static_assert(sizeof(GnmVBuffer) == sizeof(std::uint64_t) * 2); - -enum class TextureType : uint64_t { - Dim1D = 8, - Dim2D, - Dim3D, - Cube, - Array1D, - Array2D, - Msaa2D, - MsaaArray2D, -}; - -struct GnmTBuffer { - uint64_t baseaddr256 : 38; - uint64_t mtype_L2 : 2; - uint64_t min_lod : 12; - SurfaceFormat dfmt : 6; - TextureChannelType nfmt : 4; - uint64_t mtype01 : 2; - - uint64_t width : 14; - uint64_t height : 14; - uint64_t perfMod : 3; - uint64_t interlaced : 1; - uint64_t dst_sel_x : 3; - uint64_t dst_sel_y : 3; - uint64_t dst_sel_z : 3; - uint64_t dst_sel_w : 3; - uint64_t base_level : 4; - uint64_t last_level : 4; - uint64_t tiling_idx : 5; - uint64_t pow2pad : 1; - uint64_t mtype2 : 1; - uint64_t : 1; // reserved - TextureType type : 4; - - uint64_t depth : 13; - uint64_t pitch : 14; - uint64_t : 5; // reserved - uint64_t base_array : 13; - uint64_t last_array : 13; - uint64_t : 6; // reserved - - uint64_t min_lod_warn : 12; // fixed point 4.8 - uint64_t counter_bank_id : 8; - uint64_t LOD_hdw_cnt_en : 1; - uint64_t : 42; // reserved - - std::uint64_t getAddress() const { - return static_cast(static_cast(baseaddr256)) - << 8; - } -}; - -static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4); - -struct GnmSSampler { - int32_t clamp_x : 3; - int32_t clamp_y : 3; - int32_t clamp_z : 3; - int32_t max_aniso_ratio : 3; - int32_t depth_compare_func : 3; - int32_t force_unorm_coords : 1; - int32_t aniso_threshold : 3; - int32_t mc_coord_trunc : 1; - int32_t force_degamma : 1; - int32_t aniso_bias : 6; - int32_t trunc_coord : 1; - int32_t disable_cube_wrap : 1; - int32_t filter_mode : 2; - int32_t : 1; - int32_t min_lod : 12; - int32_t max_lod : 12; - int32_t perf_mip : 4; - int32_t perf_z : 4; - int32_t lod_bias : 14; - int32_t lod_bias_sec : 6; - int32_t xy_mag_filter : 2; - int32_t xy_min_filter : 2; - int32_t z_filter : 2; - int32_t mip_filter : 2; - int32_t : 4; - int32_t border_color_ptr : 12; - int32_t : 18; - int32_t border_color_type : 2; - - auto operator<=>(const GnmSSampler &) const = default; - bool operator==(const GnmSSampler &) const = default; -}; - -static_assert(sizeof(GnmSSampler) == sizeof(std::uint32_t) * 4); - -constexpr auto kPageSize = 0x4000; - -void setVkDevice(VkDevice device, - VkPhysicalDeviceMemoryProperties memProperties, - VkPhysicalDeviceProperties devProperties); - -struct AmdgpuDevice { - void handleProtectMemory(RemoteMemory memory, std::uint64_t address, - std::uint64_t size, std::uint32_t prot); - void handleCommandBuffer(RemoteMemory memory, std::uint64_t queueId, - std::uint64_t address, std::uint64_t size); - bool handleFlip(RemoteMemory memory, VkQueue queue, VkCommandBuffer cmdBuffer, - TaskChain &initTaskChain, std::uint32_t bufferIndex, - std::uint64_t arg, VkImage targetImage, - VkExtent2D targetExtent, VkSemaphore waitSemaphore, - VkSemaphore signalSemaphore, VkFence fence, - bridge::CmdBuffer *buffers, - bridge::CmdBufferAttribute *bufferAttributes); - - AmdgpuDevice(amdgpu::bridge::BridgeHeader *bridge); - - ~AmdgpuDevice(); -}; -} // namespace amdgpu::device diff --git a/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp b/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp deleted file mode 100644 index efe3eb3..0000000 --- a/hw/amdgpu/device/include/amdgpu/device/gpu-scheduler.hpp +++ /dev/null @@ -1,386 +0,0 @@ -#pragma once - -#include "scheduler.hpp" -#include "vk.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace amdgpu::device { -enum class ProcessQueue { - Graphics = 1 << 1, - Compute = 1 << 2, - Transfer = 1 << 3, - Any = Graphics | Compute | Transfer -}; - -inline ProcessQueue operator|(ProcessQueue lhs, ProcessQueue rhs) { - return static_cast(std::to_underlying(lhs) | - std::to_underlying(rhs)); -} - -inline ProcessQueue operator&(ProcessQueue lhs, ProcessQueue rhs) { - return static_cast(std::to_underlying(lhs) & - std::to_underlying(rhs)); -} - -struct TaskChain; -class GpuScheduler; - -Scheduler &getCpuScheduler(); -GpuScheduler &getGpuScheduler(ProcessQueue queue); - -struct GpuTaskLayout { - static constexpr auto kInvalidId = 0; //~static_cast(0); - - Ref chain; - std::uint64_t id; - std::uint64_t waitId = kInvalidId; - VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - - std::function invoke; - std::function submit; -}; - -struct TaskChain { - vk::Semaphore semaphore; - std::uint64_t nextTaskId = 1; - std::atomic refs{0}; - std::vector taskLocations; - - void incRef() { refs.fetch_add(1, std::memory_order::relaxed); } - void decRef() { - if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) { - delete this; - } - } - - static Ref Create() { - auto result = new TaskChain(); - result->semaphore = vk::Semaphore::Create(); - return result; - } - - std::uint64_t add(ProcessQueue queue, std::uint64_t waitId, - std::function invoke); - - std::uint64_t add(ProcessQueue queue, - std::function invoke) { - return add(queue, GpuTaskLayout::kInvalidId, std::move(invoke)); - } - - template - requires requires(T &&t) { - { t() } -> std::same_as; - } - std::uint64_t add(std::uint64_t waitId, T &&task) { - auto prevTaskId = getLastTaskId(); - auto id = nextTaskId++; - enum class State { - WaitTask, - PrevTask, - }; - auto cpuTask = createCpuTask([=, task = std::forward(task), - self = Ref(this), state = State::WaitTask]( - const AsyncTaskCtl &) mutable { - if (state == State::WaitTask) { - if (waitId != GpuTaskLayout::kInvalidId) { - if (self->semaphore.getCounterValue() < waitId) { - return TaskResult::Reschedule; - } - } - - auto result = task(); - - if (result != TaskResult::Complete) { - return result; - } - state = State::PrevTask; - } - - if (state == State::PrevTask) { - if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) { - if (self->semaphore.getCounterValue() < prevTaskId) { - return TaskResult::Reschedule; - } - } - - self->semaphore.signal(id); - } - - return TaskResult::Complete; - }); - getCpuScheduler().enqueue(std::move(cpuTask)); - return id; - } - - template - requires requires(T &&t) { - { t() } -> std::same_as; - } - std::uint64_t add(std::uint64_t waitId, T &&task) { - auto prevTaskId = getLastTaskId(); - auto id = nextTaskId++; - enum class State { - WaitTask, - PrevTask, - }; - auto cpuTask = createCpuTask([=, task = std::forward(task), - self = Ref(this), state = State::WaitTask]( - const AsyncTaskCtl &) mutable { - if (state == State::WaitTask) { - if (waitId != GpuTaskLayout::kInvalidId) { - if (self->semaphore.getCounterValue() < waitId) { - return TaskResult::Reschedule; - } - } - - task(); - state = State::PrevTask; - } - - if (state == State::PrevTask) { - if (prevTaskId != GpuTaskLayout::kInvalidId && waitId != prevTaskId) { - if (self->semaphore.getCounterValue() < prevTaskId) { - return TaskResult::Reschedule; - } - } - - self->semaphore.signal(id); - } - return TaskResult::Complete; - }); - getCpuScheduler().enqueue(std::move(cpuTask)); - return id; - } - - template - requires requires(T &&t) { - { t() } -> std::same_as; - } - std::uint64_t add(T &&task) { - return add(GpuTaskLayout::kInvalidId, std::forward(task)); - } - - template - requires requires(T &&t) { - { t() } -> std::same_as; - } - std::uint64_t add(T &&task) { - return add(GpuTaskLayout::kInvalidId, std::forward(task)); - } - - std::uint64_t getLastTaskId() const { return nextTaskId - 1; } - - std::uint64_t createExternalTask() { return nextTaskId++; } - void notifyExternalTaskComplete(std::uint64_t id) { semaphore.signal(id); } - - bool isComplete() const { return isComplete(getLastTaskId()); } - - bool isComplete(std::uint64_t task) const { - return semaphore.getCounterValue() >= task; - } - - bool empty() const { return getLastTaskId() == GpuTaskLayout::kInvalidId; } - - void wait(std::uint64_t task = GpuTaskLayout::kInvalidId) const { - if (empty()) { - return; - } - - if (task == GpuTaskLayout::kInvalidId) { - task = getLastTaskId(); - } - - Verify() << semaphore.wait(task, UINT64_MAX); - } -}; - -class GpuScheduler { - std::list workThreads; - std::deque tasks; - std::deque delayedTasks; - std::mutex taskMtx; - std::condition_variable taskCv; - std::atomic exit{false}; - std::string debugName; - -public: - explicit GpuScheduler(std::span> queues, - std::string debugName) - : debugName(debugName) { - for (std::size_t index = 0; auto [queue, queueFamilyIndex] : queues) { - workThreads.push_back(std::thread{[=, this] { - setThreadName( - ("GPU " + std::to_string(index) + " " + debugName).c_str()); - entry(queue, queueFamilyIndex); - }}); - - ++index; - } - } - - ~GpuScheduler() { - exit = true; - taskCv.notify_all(); - - for (auto &thread : workThreads) { - thread.join(); - } - } - - void enqueue(GpuTaskLayout &&task) { - std::lock_guard lock(taskMtx); - tasks.push_back(std::move(task)); - taskCv.notify_one(); - } - -private: - void submitTask(VkCommandPool pool, VkQueue queue, GpuTaskLayout &task) { - VkCommandBuffer cmdBuffer; - { - VkCommandBufferAllocateInfo allocateInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .commandPool = pool, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = 1, - }; - - Verify() << vkAllocateCommandBuffers(vk::g_vkDevice, &allocateInfo, - &cmdBuffer); - - VkCommandBufferBeginInfo beginInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - vkBeginCommandBuffer(cmdBuffer, &beginInfo); - } - - task.invoke(cmdBuffer); - - vkEndCommandBuffer(cmdBuffer); - - if (task.submit) { - task.submit(queue, cmdBuffer); - return; - } - - VkSemaphoreSubmitInfo signalSemSubmitInfo = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = task.chain->semaphore.getHandle(), - .value = task.id, - .stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, - }; - - VkSemaphoreSubmitInfo waitSemSubmitInfo = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = task.chain->semaphore.getHandle(), - .value = task.waitId, - .stageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, - }; - - VkCommandBufferSubmitInfo cmdBufferSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = cmdBuffer, - }; - - VkSubmitInfo2 submitInfo{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .waitSemaphoreInfoCount = - static_cast(task.waitId ? 1 : 0), - .pWaitSemaphoreInfos = &waitSemSubmitInfo, - .commandBufferInfoCount = 1, - .pCommandBufferInfos = &cmdBufferSubmitInfo, - .signalSemaphoreInfoCount = 1, - .pSignalSemaphoreInfos = &signalSemSubmitInfo, - }; - - Verify() << vkQueueSubmit2(queue, 1, &submitInfo, VK_NULL_HANDLE); - - // if (task.signalChain->semaphore.wait( - // task.id, std::chrono::duration_cast( - // std::chrono::seconds(10)) - // .count())) { - // util::unreachable("gpu operation takes too long time. wait id = %lu\n", - // task.waitId); - // } - } - - void entry(VkQueue queue, std::uint32_t queueFamilyIndex) { - VkCommandPool pool; - { - VkCommandPoolCreateInfo poolCreateInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .queueFamilyIndex = queueFamilyIndex}; - - Verify() << vkCreateCommandPool(vk::g_vkDevice, &poolCreateInfo, - vk::g_vkAllocator, &pool); - } - - while (!exit.load(std::memory_order::relaxed)) { - GpuTaskLayout task; - - { - std::unique_lock lock(taskMtx); - - while (tasks.empty()) { - if (tasks.empty() && delayedTasks.empty()) { - taskCv.wait(lock); - } - - if (tasks.empty()) { - std::swap(delayedTasks, tasks); - } - } - - task = std::move(tasks.front()); - tasks.pop_front(); - } - - if (task.waitId != GpuTaskLayout::kInvalidId && - !task.chain->isComplete(task.waitId)) { - std::unique_lock lock(taskMtx); - delayedTasks.push_front(std::move(task)); - taskCv.notify_one(); - continue; - } - - submitTask(pool, queue, task); - } - - vkDestroyCommandPool(vk::g_vkDevice, pool, vk::g_vkAllocator); - } -}; - -inline std::uint64_t -TaskChain::add(ProcessQueue queue, std::uint64_t waitId, - std::function invoke) { - VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - if (waitId == GpuTaskLayout::kInvalidId) { - waitId = getLastTaskId(); - waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - } - auto id = nextTaskId++; - - getGpuScheduler(queue).enqueue({ - .chain = Ref(this), - .id = id, - .waitId = waitId, - .waitStage = waitStage, - .invoke = std::move(invoke), - }); - - return id; -} - -GpuScheduler &getTransferQueueScheduler(); -GpuScheduler &getComputeQueueScheduler(); -GpuScheduler &getGraphicsQueueScheduler(); -} // namespace amdgpu::device diff --git a/hw/amdgpu/device/include/amdgpu/device/pm4.hpp b/hw/amdgpu/device/include/amdgpu/device/pm4.hpp deleted file mode 100644 index 6d9cf25..0000000 --- a/hw/amdgpu/device/include/amdgpu/device/pm4.hpp +++ /dev/null @@ -1,101 +0,0 @@ -#pragma once - -namespace amdgpu { -enum PM4Opcodes { - NOP = 0x10, - SET_BASE = 0x11, - CLEAR_STATE = 0x12, - INDEX_BUFFER_SIZE = 0x13, - DISPATCH_DIRECT = 0x15, - DISPATCH_INDIRECT = 0x16, - INDIRECT_BUFFER_END = 0x17, - MODE_CONTROL = 0x18, - ATOMIC_GDS = 0x1D, - ATOMIC_MEM = 0x1E, - OCCLUSION_QUERY = 0x1F, - SET_PREDICATION = 0x20, - REG_RMW = 0x21, - COND_EXEC = 0x22, - PRED_EXEC = 0x23, - DRAW_INDIRECT = 0x24, - DRAW_INDEX_INDIRECT = 0x25, - INDEX_BASE = 0x26, - DRAW_INDEX_2 = 0x27, - CONTEXT_CONTROL = 0x28, - DRAW_INDEX_OFFSET = 0x29, - INDEX_TYPE = 0x2A, - DRAW_INDEX = 0x2B, - DRAW_INDIRECT_MULTI = 0x2C, - DRAW_INDEX_AUTO = 0x2D, - DRAW_INDEX_IMMD = 0x2E, - NUM_INSTANCES = 0x2F, - DRAW_INDEX_MULTI_AUTO = 0x30, - INDIRECT_BUFFER_32 = 0x32, - INDIRECT_BUFFER_CONST = 0x33, - STRMOUT_BUFFER_UPDATE = 0x34, - DRAW_INDEX_OFFSET_2 = 0x35, - DRAW_PREAMBLE = 0x36, - WRITE_DATA = 0x37, - DRAW_INDEX_INDIRECT_MULTI = 0x38, - MEM_SEMAPHORE = 0x39, - MPEG_INDEX = 0x3A, - COPY_DW = 0x3B, - WAIT_REG_MEM = 0x3C, - MEM_WRITE = 0x3D, - INDIRECT_BUFFER_3F = 0x3F, - COPY_DATA = 0x40, - CP_DMA = 0x41, - PFP_SYNC_ME = 0x42, - SURFACE_SYNC = 0x43, - ME_INITIALIZE = 0x44, - COND_WRITE = 0x45, - EVENT_WRITE = 0x46, - EVENT_WRITE_EOP = 0x47, - EVENT_WRITE_EOS = 0x48, - RELEASE_MEM = 0x49, - PREAMBLE_CNTL = 0x4A, - RB_OFFSET = 0x4B, - ALU_PS_CONST_BUFFER_COPY = 0x4C, - ALU_VS_CONST_BUFFER_COPY = 0x4D, - ALU_PS_CONST_UPDATE = 0x4E, - ALU_VS_CONST_UPDATE = 0x4F, - DMA_DATA = 0x50, - ONE_REG_WRITE = 0x57, - AQUIRE_MEM = 0x58, - REWIND = 0x59, - LOAD_UCONFIG_REG = 0x5E, - LOAD_SH_REG = 0x5F, - LOAD_CONFIG_REG = 0x60, - LOAD_CONTEXT_REG = 0x61, - SET_CONFIG_REG = 0x68, - SET_CONTEXT_REG = 0x69, - SET_ALU_CONST = 0x6A, - SET_BOOL_CONST = 0x6B, - SET_LOOP_CONST = 0x6C, - SET_RESOURCE = 0x6D, - SET_SAMPLER = 0x6E, - SET_CTL_CONST = 0x6F, - SET_RESOURCE_OFFSET = 0x70, - SET_ALU_CONST_VS = 0x71, - SET_ALU_CONST_DI = 0x72, - SET_CONTEXT_REG_INDIRECT = 0x73, - SET_RESOURCE_INDIRECT = 0x74, - SET_APPEND_CNT = 0x75, - SET_SH_REG = 0x76, - SET_SH_REG_OFFSET = 0x77, - SET_QUEUE_REG = 0x78, - SET_UCONFIG_REG = 0x79, - SCRATCH_RAM_WRITE = 0x7D, - SCRATCH_RAM_READ = 0x7E, - LOAD_CONST_RAM = 0x80, - WRITE_CONST_RAM = 0x81, - DUMP_CONST_RAM = 0x83, - INCREMENT_CE_COUNTER = 0x84, - INCREMENT_DE_COUNTER = 0x85, - WAIT_ON_CE_COUNTER = 0x86, - WAIT_ON_DE_COUNTER_DIFF = 0x88, - SWITCH_BUFFER = 0x8B, -}; - -const char *pm4OpcodeToString(int opcode); -} // namespace amdgpu diff --git a/hw/amdgpu/device/include/amdgpu/device/scheduler.hpp b/hw/amdgpu/device/include/amdgpu/device/scheduler.hpp deleted file mode 100644 index 37fc97f..0000000 --- a/hw/amdgpu/device/include/amdgpu/device/scheduler.hpp +++ /dev/null @@ -1,454 +0,0 @@ -#pragma once - -#include "util/unreachable.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace amdgpu::device { -inline void setThreadName(const char *name) { - pthread_setname_np(pthread_self(), name); -} - -template class Ref { - T *m_ref = nullptr; - -public: - Ref() = default; - Ref(std::nullptr_t) {} - - template - requires(std::is_base_of_v) - Ref(OT *ref) : m_ref(ref) { - if (m_ref != nullptr) { - ref->incRef(); - } - } - - template - requires(std::is_base_of_v) - Ref(const Ref &other) : m_ref(other.get()) { - if (m_ref != nullptr) { - m_ref->incRef(); - } - } - - template - requires(std::is_base_of_v) - Ref(Ref &&other) : m_ref(other.release()) {} - - Ref(const Ref &other) : m_ref(other.get()) { - if (m_ref != nullptr) { - m_ref->incRef(); - } - } - Ref(Ref &&other) : m_ref(other.release()) {} - - template - requires(std::is_base_of_v) - Ref &operator=(Ref &&other) { - other.swap(*this); - return *this; - } - - template - requires(std::is_base_of_v) - Ref &operator=(OT *other) { - *this = Ref(other); - return *this; - } - - template - requires(std::is_base_of_v) - Ref &operator=(const Ref &other) { - *this = Ref(other); - return *this; - } - - Ref &operator=(const Ref &other) { - *this = Ref(other); - return *this; - } - - Ref &operator=(Ref &&other) { - other.swap(*this); - return *this; - } - - ~Ref() { - if (m_ref != nullptr) { - m_ref->decRef(); - } - } - - void swap(Ref &other) { std::swap(m_ref, other.m_ref); } - T *get() const { return m_ref; } - T *release() { return std::exchange(m_ref, nullptr); } - T *operator->() const { return m_ref; } - explicit operator bool() const { return m_ref != nullptr; } - bool operator==(std::nullptr_t) const { return m_ref == nullptr; } - bool operator==(const Ref &other) const = default; - bool operator==(const T *other) const { return m_ref == other; } - auto operator<=>(const T *other) const { return m_ref <=> other; } - auto operator<=>(const Ref &other) const = default; -}; - -template Ref(T *) -> Ref; -template Ref(Ref) -> Ref; - -enum class TaskState { Created, InProgress, Complete, Canceled }; -enum class TaskResult { Complete, Canceled, Reschedule }; - -struct AsyncTaskCtl { - std::atomic refs{0}; - std::atomic stateStorage{TaskState::Created}; - std::atomic cancelRequested{false}; - - virtual ~AsyncTaskCtl() = default; - - void incRef() { refs.fetch_add(1, std::memory_order::relaxed); } - void decRef() { - if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) { - delete this; - } - } - - bool isCancelRequested() const { - return cancelRequested.load(std::memory_order::relaxed) == true; - } - bool isCanceled() const { return getState() == TaskState::Canceled; } - bool isComplete() const { return getState() == TaskState::Complete; } - bool isInProgress() const { return getState() == TaskState::InProgress; } - - TaskState getState() const { - return stateStorage.load(std::memory_order::relaxed); - } - - void cancel() { cancelRequested.store(true, std::memory_order::relaxed); } - - void wait() { - if (stateStorage.load(std::memory_order::relaxed) == TaskState::Created) { - util::unreachable("attempt to wait task that wasn't scheduled\n"); - } - stateStorage.wait(TaskState::InProgress, std::memory_order::relaxed); - } -}; - -struct CpuTaskCtl : AsyncTaskCtl { - virtual TaskResult invoke() = 0; -}; - -namespace detail { -template -concept LambdaWithoutClosure = requires(T t) { +t; }; -} - -template struct AsyncCpuTask; - -template - requires requires(T t, const AsyncTaskCtl &ctl) { - { t(ctl) } -> std::same_as; - requires detail::LambdaWithoutClosure; - } -struct AsyncCpuTask : CpuTaskCtl { - static constexpr TaskResult (*fn)(const AsyncTaskCtl &) = +std::declval(); - - AsyncCpuTask() = default; - AsyncCpuTask(T &&) {} - - TaskResult invoke() override { - auto &base = *static_cast(this); - - return fn(base); - } -}; - -template - requires requires(T t, const AsyncTaskCtl &ctl) { - { t(ctl) } -> std::same_as; - requires !detail::LambdaWithoutClosure; - } -struct AsyncCpuTask : CpuTaskCtl { - alignas(T) std::byte taskStorage[sizeof(T)]; - - AsyncCpuTask(T &&t) { new (taskStorage) T(std::forward(t)); } - ~AsyncCpuTask() { std::bit_cast(&taskStorage)->~T(); } - - TaskResult invoke() override { - auto &lambda = *std::bit_cast(&taskStorage); - auto &base = *static_cast(this); - return lambda(base); - } -}; - -template - requires requires(T t, const AsyncTaskCtl &ctl) { - { t(ctl) } -> std::same_as; - } -Ref createCpuTask(T &&task) { - return Ref(new AsyncCpuTask(std::forward(task))); -} - -template - requires requires(T t) { - { t() } -> std::same_as; - } -Ref createCpuTask(T &&task) { - return createCpuTask( - [task = std::forward(task)]( - const AsyncTaskCtl &) mutable -> TaskResult { return task(); }); -} - -template - requires requires(T t) { - { t() } -> std::same_as; - } -Ref createCpuTask(T &&task) { - return createCpuTask([task = std::forward(task)]( - const AsyncTaskCtl &ctl) mutable -> TaskResult { - if (ctl.isCancelRequested()) { - return TaskResult::Canceled; - } - - task(); - return TaskResult::Complete; - }); -} - -template - requires requires(T t, const AsyncTaskCtl &ctl) { - { t(ctl) } -> std::same_as; - } -Ref createCpuTask(T &&task) { - return createCpuTask([task = std::forward(task)](const AsyncTaskCtl &ctl) { - if (ctl.isCancelRequested()) { - return TaskResult::Canceled; - } - - task(ctl); - return TaskResult::Complete; - }); -} - -class Scheduler; - -class CpuTaskSet { - std::vector> tasks; - -public: - void append(Ref task) { tasks.push_back(std::move(task)); } - - void wait() { - for (auto task : tasks) { - task->wait(); - } - - tasks.clear(); - } - - void enqueue(Scheduler &scheduler); -}; - -class TaskSet { - struct TaskEntry { - Ref ctl; - std::function schedule; - }; - - std::vector tasks; - -public: - template - requires requires(Scheduler &sched, Ref task) { - sched.enqueue(std::move(task)); - task->wait(); - static_cast>(task); - } - void append(Scheduler &sched, Ref task) { - Ref rawTask = task; - auto schedFn = [sched = &sched, task = std::move(task)] { - sched->enqueue(std::move(task)); - }; - - tasks.push_back({ - .ctl = std::move(rawTask), - .schedule = std::move(schedFn), - }); - } - - void schedule() { - for (auto &task : tasks) { - if (auto schedule = std::exchange(task.schedule, nullptr)) { - schedule(); - } - } - } - - bool isCanceled() const { - for (auto &task : tasks) { - if (task.ctl->isCanceled()) { - return true; - } - } - - return false; - } - - bool isComplete() const { - for (auto &task : tasks) { - if (!task.ctl->isComplete()) { - return false; - } - } - - return true; - } - - bool isInProgress() const { - for (auto &task : tasks) { - if (task.ctl->isInProgress()) { - return true; - } - } - - return false; - } - - void clear() { tasks.clear(); } - - void wait() const { - for (auto &task : tasks) { - assert(task.schedule == nullptr); - task.ctl->wait(); - } - } - - void cancel() { - for (auto &task : tasks) { - task.ctl->cancel(); - } - } -}; - -class Scheduler { - std::vector workThreads; - std::vector> tasks; - std::vector> rescheduleTasks; - std::mutex taskMtx; - std::condition_variable taskCv; - std::atomic exit{false}; - -public: - explicit Scheduler(std::size_t threadCount) { - for (std::size_t i = 0; i < threadCount; ++i) { - workThreads.push_back(std::thread{[this, i] { - setThreadName(("CPU " + std::to_string(i)).c_str()); - entry(); - }}); - } - } - - ~Scheduler() { - exit = true; - taskCv.notify_all(); - - for (auto &thread : workThreads) { - thread.join(); - } - } - - void enqueue(Ref task) { - std::lock_guard lock(taskMtx); - TaskState prevState = TaskState::Created; - if (!task->stateStorage.compare_exchange_strong( - prevState, TaskState::InProgress, std::memory_order::relaxed)) { - util::unreachable("attempt to schedule cpu task in wrong state %u", - (unsigned)prevState); - } - tasks.push_back(std::move(task)); - taskCv.notify_one(); - } - - template - requires requires(T &&task) { createCpuTask(std::forward(task)); } - Ref enqueue(T &&task) { - auto taskHandle = createCpuTask(std::forward(task)); - enqueue(taskHandle); - return taskHandle; - } - - template - requires requires(T &&task) { createCpuTask(std::forward(task)); } - void enqueue(CpuTaskSet &set, T &&task) { - auto taskCtl = enqueue(std::forward(task)); - set.append(taskCtl); - } - -private: - Ref fetchTask() { - std::unique_lock lock(taskMtx); - - while (tasks.empty()) { - if (rescheduleTasks.empty() && tasks.empty()) { - taskCv.wait(lock); - } - - if (tasks.empty()) { - std::swap(rescheduleTasks, tasks); - } - } - - auto result = std::move(tasks.back()); - tasks.pop_back(); - return result; - } - - Ref invokeTask(Ref task) { - switch (task->invoke()) { - case TaskResult::Complete: - task->stateStorage.store(TaskState::Complete, std::memory_order::relaxed); - task->stateStorage.notify_all(); - return {}; - - case TaskResult::Canceled: - task->stateStorage.store(TaskState::Canceled, std::memory_order::relaxed); - task->stateStorage.notify_all(); - return {}; - - case TaskResult::Reschedule: - return task; - } - - std::abort(); - } - - void entry() { - while (!exit.load(std::memory_order::relaxed)) { - Ref task = fetchTask(); - - auto rescheduleTask = invokeTask(std::move(task)); - if (rescheduleTask == nullptr) { - continue; - } - - std::unique_lock lock(taskMtx); - rescheduleTasks.push_back(std::move(rescheduleTask)); - taskCv.notify_one(); - } - } -}; - -inline void CpuTaskSet::enqueue(Scheduler &scheduler) { - for (auto task : tasks) { - scheduler.enqueue(std::move(task)); - } -} -} // namespace amdgpu::device diff --git a/hw/amdgpu/device/include/amdgpu/device/tiler.hpp b/hw/amdgpu/device/include/amdgpu/device/tiler.hpp deleted file mode 100644 index 5afcdd8..0000000 --- a/hw/amdgpu/device/include/amdgpu/device/tiler.hpp +++ /dev/null @@ -1,572 +0,0 @@ -#pragma once - -#include "util/unreachable.hpp" -#include -#include -#include - -namespace amdgpu::device { -enum TileMode { - kTileModeDepth_2dThin_64, - kTileModeDepth_2dThin_128, - kTileModeDepth_2dThin_256, - kTileModeDepth_2dThin_512, - kTileModeDepth_2dThin_1K, - kTileModeDepth_1dThin, - kTileModeDepth_2dThinPrt_256, - kTileModeDepth_2dThinPrt_1K, - - kTileModeDisplay_LinearAligned, - kTileModeDisplay_1dThin, - kTileModeDisplay_2dThin, - kTileModeDisplay_ThinPrt, - kTileModeDisplay_2dThinPrt, - - kTileModeThin_1dThin, - kTileModeThin_2dThin, - kTileModeThin_3dThin, - kTileModeThin_ThinPrt, - kTileModeThin_2dThinPrt, - kTileModeThin_3dThinPrt, - - kTileModeThick_1dThick, - kTileModeThick_2dThick, - kTileModeThick_3dThick, - kTileModeThick_ThickPrt, - kTileModeThick_2dThickPrt, - kTileModeThick_3dThickPrt, - kTileModeThick_2dXThick, - kTileModeThick_3dXThick, -}; - -enum MacroTileMode { - kMacroTileMode_1x4_16, - kMacroTileMode_1x2_16, - kMacroTileMode_1x1_16, - kMacroTileMode_1x1_16_dup, - kMacroTileMode_1x1_8, - kMacroTileMode_1x1_4, - kMacroTileMode_1x1_2, - kMacroTileMode_1x1_2_dup, - kMacroTileMode_1x8_16, - kMacroTileMode_1x4_16_dup, - kMacroTileMode_1x2_16_dup, - kMacroTileMode_1x1_16_dup2, - kMacroTileMode_1x1_8_dup, - kMacroTileMode_1x1_4_dup, - kMacroTileMode_1x1_2_dup2, - kMacroTileMode_1x1_2_dup3, -}; - -inline constexpr auto kMicroTileWidth = 8; -inline constexpr auto kMicroTileHeight = 8; - -inline uint64_t computeLinearElementByteOffset( - uint32_t x, uint32_t y, uint32_t z, uint32_t fragmentIndex, uint32_t pitch, - uint32_t slicePitchElems, uint32_t bitsPerElement, - uint32_t numFragmentsPerPixel) { - uint64_t absoluteElementIndex = z * slicePitchElems + y * pitch + x; - return (absoluteElementIndex * bitsPerElement * numFragmentsPerPixel) + - (bitsPerElement * fragmentIndex); -} - -inline uint32_t get1dThickElementIndex(uint32_t x, uint32_t y, uint32_t z, - uint32_t bpp) { - uint32_t elem = 0; - - switch (bpp) { - case 8: - case 16: - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((y >> 0) & 0x1) << 1; - elem |= ((x >> 1) & 0x1) << 2; - elem |= ((y >> 1) & 0x1) << 3; - elem |= ((z >> 0) & 0x1) << 4; - elem |= ((z >> 1) & 0x1) << 5; - elem |= ((x >> 2) & 0x1) << 6; - elem |= ((y >> 2) & 0x1) << 7; - break; - case 32: - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((y >> 0) & 0x1) << 1; - elem |= ((x >> 1) & 0x1) << 2; - elem |= ((z >> 0) & 0x1) << 3; - elem |= ((y >> 1) & 0x1) << 4; - elem |= ((z >> 1) & 0x1) << 5; - elem |= ((x >> 2) & 0x1) << 6; - elem |= ((y >> 2) & 0x1) << 7; - break; - - case 64: - case 128: - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((y >> 0) & 0x1) << 1; - elem |= ((z >> 0) & 0x1) << 2; - elem |= ((x >> 1) & 0x1) << 3; - elem |= ((y >> 1) & 0x1) << 4; - elem |= ((z >> 1) & 0x1) << 5; - elem |= ((x >> 2) & 0x1) << 6; - elem |= ((y >> 2) & 0x1) << 7; - break; - - default: - util::unreachable(); - } - - return elem; -} - -inline uint32_t getThinElementIndex(uint32_t x, uint32_t y) { - uint32_t elem = 0; - - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((y >> 0) & 0x1) << 1; - elem |= ((x >> 1) & 0x1) << 2; - elem |= ((y >> 1) & 0x1) << 3; - elem |= ((x >> 2) & 0x1) << 4; - elem |= ((y >> 2) & 0x1) << 5; - - return elem; -} - -inline uint32_t getDisplayElementIndex(uint32_t x, uint32_t y, uint32_t bpp) { - uint32_t elem = 0; - switch (bpp) { - case 8: - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((x >> 1) & 0x1) << 1; - elem |= ((x >> 2) & 0x1) << 2; - elem |= ((y >> 1) & 0x1) << 3; - elem |= ((y >> 0) & 0x1) << 4; - elem |= ((y >> 2) & 0x1) << 5; - break; - case 16: - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((x >> 1) & 0x1) << 1; - elem |= ((x >> 2) & 0x1) << 2; - elem |= ((y >> 0) & 0x1) << 3; - elem |= ((y >> 1) & 0x1) << 4; - elem |= ((y >> 2) & 0x1) << 5; - break; - case 32: - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((x >> 1) & 0x1) << 1; - elem |= ((y >> 0) & 0x1) << 2; - elem |= ((x >> 2) & 0x1) << 3; - elem |= ((y >> 1) & 0x1) << 4; - elem |= ((y >> 2) & 0x1) << 5; - break; - case 64: - elem |= ((x >> 0) & 0x1) << 0; - elem |= ((y >> 0) & 0x1) << 1; - elem |= ((x >> 1) & 0x1) << 2; - elem |= ((x >> 2) & 0x1) << 3; - elem |= ((y >> 1) & 0x1) << 4; - elem |= ((y >> 2) & 0x1) << 5; - break; - default: - std::abort(); - } - - return elem; -} -inline uint64_t computeThin1dThinTileElementOffset(std::uint32_t bpp, - uint32_t x, uint32_t y, - uint32_t z, - std::uint64_t height, - std::uint64_t pitch) { - uint64_t elementIndex = getThinElementIndex(x, y); - - auto tileBytes = kMicroTileWidth * kMicroTileHeight * bpp; - - auto paddedWidth = pitch; - - auto tilesPerRow = paddedWidth / kMicroTileWidth; - auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL); - - uint64_t sliceOffset = z * tilesPerSlice * tileBytes; - - uint64_t tileRowIndex = y / kMicroTileHeight; - uint64_t tileColumnIndex = x / kMicroTileWidth; - uint64_t tileOffset = - (tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes; - - return (sliceOffset + tileOffset) + elementIndex * bpp; -} -inline uint64_t computeThick1dThickTileElementOffset(std::uint32_t bpp, - uint32_t x, uint32_t y, - uint32_t z, - std::uint64_t height, - std::uint64_t pitch) { - uint64_t elementIndex = get1dThickElementIndex(x, y, z, bpp * 8); - - auto tileBytes = (kMicroTileWidth * kMicroTileHeight * bpp * 8 * 4 + 7) / 8; - - auto paddedWidth = pitch; - - auto tilesPerRow = paddedWidth / kMicroTileWidth; - auto tilesPerSlice = std::max(tilesPerRow * (height / kMicroTileHeight), 1UL); - - uint64_t sliceOffset = (z / 4) * tilesPerSlice * tileBytes; - - uint64_t tileRowIndex = y / kMicroTileHeight; - uint64_t tileColumnIndex = x / kMicroTileWidth; - uint64_t tileOffset = - (tileRowIndex * tilesPerRow + tileColumnIndex) * tileBytes; - - return (sliceOffset + tileOffset) + elementIndex * bpp; -} - -static constexpr auto kPipeInterleaveBytes = 256; - -inline void getMacroTileData(MacroTileMode macroTileMode, uint32_t &bankWidth, - uint32_t &bankHeight, uint32_t ¯oTileAspect, - uint32_t &numBanks) { - switch (macroTileMode) { - case kMacroTileMode_1x4_16: - bankWidth = 1; - bankHeight = 4; - macroTileAspect = 4; - numBanks = 16; - break; - case kMacroTileMode_1x2_16: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 2; - numBanks = 16; - break; - case kMacroTileMode_1x1_16: - bankWidth = 1; - bankHeight = 2; - macroTileAspect = 2; - numBanks = 16; - break; - case kMacroTileMode_1x1_16_dup: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 2; - numBanks = 16; - break; - case kMacroTileMode_1x1_8: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 8; - break; - case kMacroTileMode_1x1_4: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 4; - break; - case kMacroTileMode_1x1_2: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 2; - break; - case kMacroTileMode_1x1_2_dup: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 2; - break; - case kMacroTileMode_1x8_16: - bankWidth = 1; - bankHeight = 8; - macroTileAspect = 4; - numBanks = 16; - break; - case kMacroTileMode_1x4_16_dup: - bankWidth = 1; - bankHeight = 4; - macroTileAspect = 4; - numBanks = 16; - break; - case kMacroTileMode_1x2_16_dup: - bankWidth = 1; - bankHeight = 2; - macroTileAspect = 2; - numBanks = 16; - break; - case kMacroTileMode_1x1_16_dup2: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 2; - numBanks = 16; - break; - case kMacroTileMode_1x1_8_dup: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 8; - break; - case kMacroTileMode_1x1_4_dup: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 4; - break; - case kMacroTileMode_1x1_2_dup2: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 2; - break; - case kMacroTileMode_1x1_2_dup3: - bankWidth = 1; - bankHeight = 1; - macroTileAspect = 1; - numBanks = 2; - break; - default: - util::unreachable(); - } -} - -static constexpr uint32_t log2(uint32_t i) { return 31 - __builtin_clz(i | 1); } - -inline constexpr uint32_t kDramRowSize = 0x400; - -inline constexpr uint32_t getPipeP8_32x32_8x16Index(uint32_t x, uint32_t y) { - std::uint32_t pipe = 0; - pipe |= (((x >> 4) ^ (y >> 3) ^ (x >> 5)) & 0x1) << 0; - pipe |= (((x >> 3) ^ (y >> 4)) & 0x1) << 1; - pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2; - return pipe; -} - -inline constexpr uint32_t getPipeP8_32x32_16x16Index(uint32_t x, uint32_t y) { - std::uint32_t pipe = 0; - pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0; - pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1; - pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2; - return pipe; -} - -inline constexpr uint32_t getPipeP16Index(uint32_t x, uint32_t y) { - std::uint32_t pipe = 0; - pipe |= (((x >> 3) ^ (y >> 3) ^ (x >> 4)) & 0x1) << 0; - pipe |= (((x >> 4) ^ (y >> 4)) & 0x1) << 1; - pipe |= (((x >> 5) ^ (y >> 5)) & 0x1) << 2; - pipe |= (((x >> 6) ^ (y >> 5)) & 0x1) << 3; - return pipe; -} - -inline constexpr uint32_t getBankIndex(uint32_t x, uint32_t y, - uint32_t bankWidth, uint32_t bankHeight, - uint32_t numBanks, uint32_t numPipes) { - const uint32_t xShiftOffset = log2(bankWidth * numPipes); - const uint32_t yShiftOffset = log2(bankHeight); - const uint32_t xs = x >> xShiftOffset; - const uint32_t ys = y >> yShiftOffset; - - uint32_t bank = 0; - switch (numBanks) { - case 2: - bank |= (((xs >> 3) ^ (ys >> 3)) & 0x1) << 0; - break; - case 4: - bank |= (((xs >> 3) ^ (ys >> 4)) & 0x1) << 0; - bank |= (((xs >> 4) ^ (ys >> 3)) & 0x1) << 1; - break; - case 8: - bank |= (((xs >> 3) ^ (ys >> 5)) & 0x1) << 0; - bank |= (((xs >> 4) ^ (ys >> 4) ^ (ys >> 5)) & 0x1) << 1; - bank |= (((xs >> 5) ^ (ys >> 3)) & 0x1) << 2; - break; - case 16: - bank |= (((xs >> 3) ^ (ys >> 6)) & 0x1) << 0; - bank |= (((xs >> 4) ^ (ys >> 5) ^ (ys >> 6)) & 0x1) << 1; - bank |= (((xs >> 5) ^ (ys >> 4)) & 0x1) << 2; - bank |= (((xs >> 6) ^ (ys >> 3)) & 0x1) << 3; - break; - default: - util::unreachable(); - } - - return bank; -} - -inline uint64_t compute2dThinTileElementOffset( - std::uint32_t bpp, MacroTileMode macroTileMode, uint64_t elementIndex, - std::uint8_t tileSwizzleMask, std::uint32_t fragmentIndex, - std::uint32_t arraySlice, uint32_t x, uint32_t y, uint32_t z, - std::uint64_t height, std::uint64_t pitch) { - // P8_32x32_8x16 - constexpr auto numPipes = 8; - constexpr auto pipeInterleaveBytes = 256; - - std::uint32_t bankWidth; - std::uint32_t bankHeight; - std::uint32_t macroTileAspect; - std::uint32_t numBanks; - - getMacroTileData(macroTileMode, bankWidth, bankHeight, macroTileAspect, - numBanks); - - uint32_t tileBytes1x = (bpp * kMicroTileWidth * kMicroTileHeight + 7) / 8; - constexpr auto sampleSplit = 1 << 2; - auto tileSplitC = std::max(256, tileBytes1x * sampleSplit); - auto tileSplitBytes = std::min(kDramRowSize, tileSplitC); - std::uint32_t numFragmentsPerPixel = 1; // TODO - - constexpr auto pipeInterleaveBits = log2(pipeInterleaveBytes); - constexpr auto pipeInterleaveMask = (1 << (pipeInterleaveBits)) - 1; - constexpr auto pipeBits = log2(numPipes); - auto bankBits = log2(numBanks); - auto bankSwizzleMask = tileSwizzleMask; - constexpr auto pipeSwizzleMask = 0; - auto macroTileWidth = - (kMicroTileWidth * bankWidth * numPipes) * macroTileAspect; - auto macroTileHeight = - (kMicroTileHeight * bankHeight * numBanks) / macroTileAspect; - - uint64_t pipe = getPipeP8_32x32_8x16Index(x, y); - uint64_t bank = getBankIndex(x, y, bankWidth, bankHeight, numBanks, numPipes); - - uint32_t tileBytes = - (kMicroTileWidth * kMicroTileHeight * bpp * numFragmentsPerPixel + 7) / 8; - - uint64_t fragmentOffset = - fragmentIndex * (tileBytes / numFragmentsPerPixel) * 8; - uint64_t elementOffset = fragmentOffset + (elementIndex * bpp); - - uint64_t slicesPerTile = 1; - uint64_t tileSplitSlice = 0; - if (tileBytes > tileSplitBytes) { - slicesPerTile = tileBytes / tileSplitBytes; - tileSplitSlice = elementOffset / (tileSplitBytes * 8); - elementOffset %= (tileSplitBytes * 8); - tileBytes = tileSplitBytes; - } - - uint64_t macroTileBytes = (macroTileWidth / kMicroTileWidth) * - (macroTileHeight / kMicroTileHeight) * tileBytes / - (numPipes * numBanks); - uint64_t macroTilesPerRow = pitch / macroTileWidth; - uint64_t macroTileRowIndex = y / macroTileHeight; - uint64_t macroTileColumnIndex = x / macroTileWidth; - uint64_t macroTileIndex = - (macroTileRowIndex * macroTilesPerRow) + macroTileColumnIndex; - uint64_t macroTileOffset = macroTileIndex * macroTileBytes; - uint64_t macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight); - uint64_t sliceBytes = macroTilesPerSlice * macroTileBytes; - uint32_t slice = z; - uint64_t sliceOffset = (tileSplitSlice + slicesPerTile * slice) * sliceBytes; - if (arraySlice != 0) { - slice = arraySlice; - } - - uint64_t tileRowIndex = (y / kMicroTileHeight) % bankHeight; - uint64_t tileColumnIndex = ((x / kMicroTileWidth) / numPipes) % bankWidth; - uint64_t tileIndex = (tileRowIndex * bankWidth) + tileColumnIndex; - uint64_t tileOffset = tileIndex * tileBytes; - - uint64_t bankSwizzle = bankSwizzleMask; - uint64_t pipeSwizzle = pipeSwizzleMask; - - uint64_t pipe_slice_rotation = 0; - pipeSwizzle += pipe_slice_rotation; - pipeSwizzle &= (numPipes - 1); - pipe = pipe ^ pipeSwizzle; - - uint32_t sliceRotation = ((numBanks / 2) - 1) * slice; - uint64_t tileSplitSliceRotation = ((numBanks / 2) + 1) * tileSplitSlice; - - bank ^= bankSwizzle + sliceRotation; - bank ^= tileSplitSliceRotation; - bank &= (numBanks - 1); - - uint64_t totalOffset = - (sliceOffset + macroTileOffset + tileOffset) * 8 + elementOffset; - uint64_t bitOffset = totalOffset & 0x7; - totalOffset /= 8; - - uint64_t pipeInterleaveOffset = totalOffset & pipeInterleaveMask; - uint64_t offset = totalOffset >> pipeInterleaveBits; - - uint64_t byteOffset = pipeInterleaveOffset | (pipe << (pipeInterleaveBits)) | - (bank << (pipeInterleaveBits + pipeBits)) | - (offset << (pipeInterleaveBits + pipeBits + bankBits)); - - return (byteOffset << 3) | bitOffset; -} - -inline uint64_t computeTiledElementByteOffset( - TileMode tileMode, std::uint32_t bpp, uint32_t x, uint32_t y, uint32_t z, - MacroTileMode macroTileMode, std::uint8_t tileSwizzleMask, - std::uint32_t fragmentIndex, std::uint32_t mipLevel, - std::uint32_t arraySlice, uint64_t width, std::uint64_t height, - std::uint64_t depth, std::uint64_t pitch, std::uint64_t depthPitch) { - switch (tileMode) { - case kTileModeDepth_2dThin_64: - util::unreachable(); - case kTileModeDepth_2dThin_128: - util::unreachable(); - case kTileModeDepth_2dThin_256: - util::unreachable(); - case kTileModeDepth_2dThin_512: - util::unreachable(); - case kTileModeDepth_2dThin_1K: - util::unreachable(); - case kTileModeDepth_1dThin: - util::unreachable(); - case kTileModeDepth_2dThinPrt_256: - util::unreachable(); - case kTileModeDepth_2dThinPrt_1K: - util::unreachable(); - - case kTileModeDisplay_LinearAligned: - return x * y * z * ((bpp + 7) / 8); - - case kTileModeDisplay_1dThin: - util::unreachable(); - case kTileModeDisplay_2dThin: - return compute2dThinTileElementOffset(bpp, macroTileMode, - getDisplayElementIndex(x, y, bpp), - tileSwizzleMask, fragmentIndex, - arraySlice, x, y, z, height, pitch) / - 8; - case kTileModeDisplay_ThinPrt: - util::unreachable(); - case kTileModeDisplay_2dThinPrt: - util::unreachable(); - case kTileModeThin_1dThin: - return computeThin1dThinTileElementOffset(((bpp + 7) / 8), x, y, z, height, - pitch); - case kTileModeThin_2dThin: - return compute2dThinTileElementOffset( - bpp, macroTileMode, getThinElementIndex(x, y), tileSwizzleMask, - fragmentIndex, arraySlice, x, y, z, height, pitch) / - 8; - case kTileModeThin_3dThin: - util::unreachable(); - case kTileModeThin_ThinPrt: - util::unreachable(); - case kTileModeThin_2dThinPrt: - util::unreachable(); - case kTileModeThin_3dThinPrt: - util::unreachable(); - case kTileModeThick_1dThick: - return computeThick1dThickTileElementOffset(((bpp + 7) / 8), x, y, z, - height, pitch); - case kTileModeThick_2dThick: - util::unreachable(); - case kTileModeThick_3dThick: - util::unreachable(); - case kTileModeThick_ThickPrt: - util::unreachable(); - case kTileModeThick_2dThickPrt: - util::unreachable(); - case kTileModeThick_3dThickPrt: - util::unreachable(); - case kTileModeThick_2dXThick: - util::unreachable(); - case kTileModeThick_3dXThick: - util::unreachable(); - } - - util::unreachable(); -} -} // namespace amdgpu::device diff --git a/hw/amdgpu/device/include/amdgpu/device/vk.hpp b/hw/amdgpu/device/include/amdgpu/device/vk.hpp deleted file mode 100644 index 0de04cf..0000000 --- a/hw/amdgpu/device/include/amdgpu/device/vk.hpp +++ /dev/null @@ -1,985 +0,0 @@ -#pragma once - -#include "tiler.hpp" -#include "util/VerifyVulkan.hpp" -#include "util/area.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace amdgpu::device::vk { -extern VkDevice g_vkDevice; -extern VkAllocationCallbacks *g_vkAllocator; -extern std::vector> g_computeQueues; -extern std::vector> g_graphicsQueues; - -std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits, - VkMemoryPropertyFlags properties); - -class DeviceMemory { - VkDeviceMemory mDeviceMemory = VK_NULL_HANDLE; - VkDeviceSize mSize = 0; - unsigned mMemoryTypeIndex = 0; - -public: - DeviceMemory(DeviceMemory &) = delete; - DeviceMemory(DeviceMemory &&other) { *this = std::move(other); } - DeviceMemory() = default; - - ~DeviceMemory() { - if (mDeviceMemory != nullptr) { - vkFreeMemory(g_vkDevice, mDeviceMemory, g_vkAllocator); - } - } - - DeviceMemory &operator=(DeviceMemory &&other) { - std::swap(mDeviceMemory, other.mDeviceMemory); - std::swap(mSize, other.mSize); - std::swap(mMemoryTypeIndex, other.mMemoryTypeIndex); - return *this; - } - - VkDeviceMemory getHandle() const { return mDeviceMemory; } - VkDeviceSize getSize() const { return mSize; } - unsigned getMemoryTypeIndex() const { return mMemoryTypeIndex; } - - static DeviceMemory AllocateFromType(std::size_t size, - unsigned memoryTypeIndex) { - VkMemoryAllocateInfo allocInfo{}; - allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocInfo.allocationSize = size; - allocInfo.memoryTypeIndex = memoryTypeIndex; - - DeviceMemory result; - Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator, - &result.mDeviceMemory); - result.mSize = size; - result.mMemoryTypeIndex = memoryTypeIndex; - return result; - } - - static DeviceMemory Allocate(std::size_t size, unsigned memoryTypeBits, - VkMemoryPropertyFlags properties) { - return AllocateFromType( - size, findPhysicalMemoryTypeIndex(memoryTypeBits, properties)); - } - - static DeviceMemory Allocate(VkMemoryRequirements requirements, - VkMemoryPropertyFlags properties) { - return AllocateFromType( - requirements.size, - findPhysicalMemoryTypeIndex(requirements.memoryTypeBits, properties)); - } - - static DeviceMemory CreateExternalFd(int fd, std::size_t size, - unsigned memoryTypeIndex) { - VkImportMemoryFdInfoKHR importMemoryInfo{ - VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - nullptr, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, - fd, - }; - - VkMemoryAllocateInfo allocInfo{ - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &importMemoryInfo, - .allocationSize = size, - .memoryTypeIndex = memoryTypeIndex, - }; - - DeviceMemory result; - Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator, - &result.mDeviceMemory); - result.mSize = size; - result.mMemoryTypeIndex = memoryTypeIndex; - return result; - } - static DeviceMemory - CreateExternalHostMemory(void *hostPointer, std::size_t size, - VkMemoryPropertyFlags properties) { - VkMemoryHostPointerPropertiesEXT hostPointerProperties = { - .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT}; - - auto vkGetMemoryHostPointerPropertiesEXT = - (PFN_vkGetMemoryHostPointerPropertiesEXT)vkGetDeviceProcAddr( - g_vkDevice, "vkGetMemoryHostPointerPropertiesEXT"); - - Verify() << vkGetMemoryHostPointerPropertiesEXT( - g_vkDevice, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, - hostPointer, &hostPointerProperties); - - auto memoryTypeBits = hostPointerProperties.memoryTypeBits; - - VkImportMemoryHostPointerInfoEXT importMemoryInfo = { - VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, - nullptr, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, - hostPointer, - }; - - auto memoryTypeIndex = - findPhysicalMemoryTypeIndex(memoryTypeBits, properties); - - VkMemoryAllocateInfo allocInfo{ - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &importMemoryInfo, - .allocationSize = size, - .memoryTypeIndex = memoryTypeIndex, - }; - - DeviceMemory result; - Verify() << vkAllocateMemory(g_vkDevice, &allocInfo, g_vkAllocator, - &result.mDeviceMemory); - result.mSize = size; - result.mMemoryTypeIndex = memoryTypeIndex; - return result; - } - - void *map(VkDeviceSize offset, VkDeviceSize size) { - void *result = 0; - Verify() << vkMapMemory(g_vkDevice, mDeviceMemory, offset, size, 0, - &result); - - return result; - } - - void unmap() { vkUnmapMemory(g_vkDevice, mDeviceMemory); } -}; - -struct DeviceMemoryRef { - VkDeviceMemory deviceMemory = VK_NULL_HANDLE; - VkDeviceSize offset = 0; - VkDeviceSize size = 0; - void *data = nullptr; - void *allocator = nullptr; - - void (*release)(DeviceMemoryRef &memoryRef) = nullptr; -}; - -class MemoryResource { - DeviceMemory mMemory; - char *mData = nullptr; - util::MemoryAreaTable<> table; - const char *debugName = ""; - - std::mutex mMtx; - -public: - MemoryResource() = default; - ~MemoryResource() { - if (mMemory.getHandle() != nullptr && mData != nullptr) { - vkUnmapMemory(g_vkDevice, mMemory.getHandle()); - } - } - - void initFromHost(void *data, std::size_t size) { - assert(mMemory.getHandle() == nullptr); - auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - mMemory = DeviceMemory::CreateExternalHostMemory(data, size, properties); - table.map(0, size); - debugName = "direct"; - } - - void initHostVisible(std::size_t size) { - assert(mMemory.getHandle() == nullptr); - auto properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - auto memory = DeviceMemory::Allocate(size, ~0, properties); - - void *data = nullptr; - Verify() << vkMapMemory(g_vkDevice, memory.getHandle(), 0, size, 0, &data); - - mMemory = std::move(memory); - table.map(0, size); - mData = reinterpret_cast(data); - debugName = "host"; - } - - void initDeviceLocal(std::size_t size) { - assert(mMemory.getHandle() == nullptr); - auto properties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - - mMemory = DeviceMemory::Allocate(size, ~0, properties); - table.map(0, size); - debugName = "local"; - } - - DeviceMemoryRef allocate(VkMemoryRequirements requirements) { - if ((requirements.memoryTypeBits & (1 << mMemory.getMemoryTypeIndex())) == - 0) { - util::unreachable(); - } - - std::lock_guard lock(mMtx); - - for (auto elem : table) { - auto offset = (elem.beginAddress + requirements.alignment - 1) & - ~(requirements.alignment - 1); - - if (offset >= elem.endAddress) { - continue; - } - - auto blockSize = elem.endAddress - offset; - - if (blockSize < requirements.size) { - continue; - } - - if (debugName == std::string_view{"local"}) { - std::printf("memory: allocation %s memory %lx-%lx\n", debugName, offset, - offset + requirements.size); - } - - table.unmap(offset, offset + requirements.size); - return {mMemory.getHandle(), - offset, - requirements.size, - mData, - this, - [](DeviceMemoryRef &memoryRef) { - auto self = - reinterpret_cast(memoryRef.allocator); - self->deallocate(memoryRef); - }}; - } - - util::unreachable("out of memory resource"); - } - - void deallocate(DeviceMemoryRef memory) { - std::lock_guard lock(mMtx); - table.map(memory.offset, memory.offset + memory.size); - std::printf("memory: free %s memory %lx-%lx\n", debugName, memory.offset, - memory.offset + memory.size); - } - - void dump() { - std::lock_guard lock(mMtx); - - for (auto elem : table) { - std::fprintf(stderr, "%zu - %zu\n", elem.beginAddress, elem.endAddress); - } - } - - DeviceMemoryRef getFromOffset(std::uint64_t offset, std::size_t size) { - return {mMemory.getHandle(), offset, size, nullptr, nullptr, nullptr}; - } - - explicit operator bool() const { return mMemory.getHandle() != nullptr; } -}; - -struct Semaphore { - VkSemaphore mSemaphore = VK_NULL_HANDLE; - -public: - Semaphore(const Semaphore &) = delete; - - Semaphore() = default; - Semaphore(Semaphore &&other) { *this = std::move(other); } - - Semaphore &operator=(Semaphore &&other) { - std::swap(mSemaphore, other.mSemaphore); - return *this; - } - - ~Semaphore() { - if (mSemaphore != VK_NULL_HANDLE) { - vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr); - } - } - - static Semaphore Create(std::uint64_t initialValue = 0) { - VkSemaphoreTypeCreateInfo typeCreateInfo = { - VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, - VK_SEMAPHORE_TYPE_TIMELINE, initialValue}; - - VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - &typeCreateInfo, 0}; - - Semaphore result; - Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr, - &result.mSemaphore); - return result; - } - - VkResult wait(std::uint64_t value, uint64_t timeout) const { - VkSemaphoreWaitInfo waitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, - nullptr, - VK_SEMAPHORE_WAIT_ANY_BIT, - 1, - &mSemaphore, - &value}; - - return vkWaitSemaphores(g_vkDevice, &waitInfo, timeout); - } - - void signal(std::uint64_t value) { - VkSemaphoreSignalInfo signalInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, - nullptr, mSemaphore, value}; - - Verify() << vkSignalSemaphore(g_vkDevice, &signalInfo); - } - - std::uint64_t getCounterValue() const { - std::uint64_t result = 0; - Verify() << vkGetSemaphoreCounterValue(g_vkDevice, mSemaphore, &result); - return result; - } - - VkSemaphore getHandle() const { return mSemaphore; } - - bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; } - bool operator!=(std::nullptr_t) const { return mSemaphore != nullptr; } -}; - -struct BinSemaphore { - VkSemaphore mSemaphore = VK_NULL_HANDLE; - -public: - BinSemaphore(const BinSemaphore &) = delete; - - BinSemaphore() = default; - BinSemaphore(BinSemaphore &&other) { *this = std::move(other); } - - BinSemaphore &operator=(BinSemaphore &&other) { - std::swap(mSemaphore, other.mSemaphore); - return *this; - } - - ~BinSemaphore() { - if (mSemaphore != VK_NULL_HANDLE) { - vkDestroySemaphore(g_vkDevice, mSemaphore, nullptr); - } - } - - static BinSemaphore Create() { - VkSemaphoreTypeCreateInfo typeCreateInfo = { - VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, - VK_SEMAPHORE_TYPE_BINARY, 0}; - - VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - &typeCreateInfo, 0}; - - BinSemaphore result; - Verify() << vkCreateSemaphore(g_vkDevice, &createInfo, nullptr, - &result.mSemaphore); - return result; - } - - VkSemaphore getHandle() const { return mSemaphore; } - - bool operator==(std::nullptr_t) const { return mSemaphore == nullptr; } -}; - -struct Fence { - VkFence mFence = VK_NULL_HANDLE; - -public: - Fence(const Fence &) = delete; - - Fence() = default; - Fence(Fence &&other) { *this = std::move(other); } - - Fence &operator=(Fence &&other) { - std::swap(mFence, other.mFence); - return *this; - } - - ~Fence() { - if (mFence != VK_NULL_HANDLE) { - vkDestroyFence(g_vkDevice, mFence, nullptr); - } - } - - static Fence Create() { - VkFenceCreateInfo fenceCreateInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - nullptr, 0}; - Fence result; - Verify() << vkCreateFence(g_vkDevice, &fenceCreateInfo, nullptr, - &result.mFence); - return result; - } - - void wait() const { - Verify() << vkWaitForFences(g_vkDevice, 1, &mFence, 1, UINT64_MAX); - } - - bool isComplete() const { - return vkGetFenceStatus(g_vkDevice, mFence) == VK_SUCCESS; - } - - void reset() { vkResetFences(g_vkDevice, 1, &mFence); } - - VkFence getHandle() const { return mFence; } - - bool operator==(std::nullptr_t) const { return mFence == nullptr; } -}; - -struct CommandBuffer { - VkCommandBuffer mCmdBuffer = VK_NULL_HANDLE; - -public: - CommandBuffer(const CommandBuffer &) = delete; - - CommandBuffer() = default; - CommandBuffer(CommandBuffer &&other) { *this = std::move(other); } - - CommandBuffer &operator=(CommandBuffer &&other) { - std::swap(mCmdBuffer, other.mCmdBuffer); - return *this; - } - - CommandBuffer(VkCommandPool commandPool, - VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - VkCommandBufferUsageFlagBits flags = {}) { - VkCommandBufferAllocateInfo allocInfo{}; - allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - allocInfo.level = level; - allocInfo.commandPool = commandPool; - allocInfo.commandBufferCount = 1; - - VkCommandBuffer commandBuffer; - vkAllocateCommandBuffers(g_vkDevice, &allocInfo, &commandBuffer); - - VkCommandBufferBeginInfo beginInfo{}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - beginInfo.flags = flags; - - vkBeginCommandBuffer(commandBuffer, &beginInfo); - } - - void end() { vkEndCommandBuffer(mCmdBuffer); } - - bool operator==(std::nullptr_t) const { return mCmdBuffer == nullptr; } - bool operator!=(std::nullptr_t) const { return mCmdBuffer != nullptr; } -}; - -class Buffer { - VkBuffer mBuffer = VK_NULL_HANDLE; - DeviceMemoryRef mMemory; - -public: - Buffer(const Buffer &) = delete; - - Buffer() = default; - Buffer(Buffer &&other) { *this = std::move(other); } - ~Buffer() { - if (mBuffer != nullptr) { - vkDestroyBuffer(g_vkDevice, mBuffer, g_vkAllocator); - - if (mMemory.release != nullptr) { - mMemory.release(mMemory); - } - } - } - - Buffer &operator=(Buffer &&other) { - std::swap(mBuffer, other.mBuffer); - std::swap(mMemory, other.mMemory); - return *this; - } - - Buffer(std::size_t size, VkBufferUsageFlags usage, - VkBufferCreateFlags flags = 0, - VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, - std::span queueFamilyIndices = {}) { - VkBufferCreateInfo bufferInfo{}; - bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - bufferInfo.flags = flags; - bufferInfo.size = size; - bufferInfo.usage = usage; - bufferInfo.sharingMode = sharingMode; - bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size(); - bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data(); - - Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator, - &mBuffer); - } - - void *getData() const { - return reinterpret_cast(mMemory.data) + mMemory.offset; - } - - static Buffer - CreateExternal(std::size_t size, VkBufferUsageFlags usage, - VkBufferCreateFlags flags = 0, - VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, - std::span queueFamilyIndices = {}) { - VkExternalMemoryBufferCreateInfo info{ - VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, nullptr, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT}; - - VkBufferCreateInfo bufferInfo{}; - bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - bufferInfo.pNext = &info; - bufferInfo.flags = flags; - bufferInfo.size = size; - bufferInfo.usage = usage; - bufferInfo.sharingMode = sharingMode; - bufferInfo.queueFamilyIndexCount = queueFamilyIndices.size(); - bufferInfo.pQueueFamilyIndices = queueFamilyIndices.data(); - - Buffer result; - - Verify() << vkCreateBuffer(g_vkDevice, &bufferInfo, g_vkAllocator, - &result.mBuffer); - - return result; - } - - static Buffer - Allocate(MemoryResource &pool, std::size_t size, VkBufferUsageFlags usage, - VkBufferCreateFlags flags = 0, - VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, - std::span queueFamilyIndices = {}) { - Buffer result(size, usage, flags, sharingMode, queueFamilyIndices); - result.allocateAndBind(pool); - - return result; - } - - VkBuffer getHandle() const { return mBuffer; } - [[nodiscard]] VkBuffer release() { return std::exchange(mBuffer, nullptr); } - - VkMemoryRequirements getMemoryRequirements() const { - VkMemoryRequirements requirements{}; - vkGetBufferMemoryRequirements(g_vkDevice, mBuffer, &requirements); - return requirements; - } - - void allocateAndBind(MemoryResource &pool) { - auto memory = pool.allocate(getMemoryRequirements()); - bindMemory(memory); - } - - void bindMemory(DeviceMemoryRef memory) { - Verify() << vkBindBufferMemory(g_vkDevice, mBuffer, memory.deviceMemory, - memory.offset); - mMemory = memory; - } - - void copyTo(VkCommandBuffer cmdBuffer, VkBuffer dstBuffer, - std::span regions) { - vkCmdCopyBuffer(cmdBuffer, mBuffer, dstBuffer, regions.size(), - regions.data()); - - VkDependencyInfo depInfo = {.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO}; - vkCmdPipelineBarrier2(cmdBuffer, &depInfo); - } - - void readFromImage(const void *address, std::uint32_t pixelSize, - TileMode tileMode, uint32_t width, uint32_t height, - uint32_t depth, uint32_t pitch) { - if (address == nullptr || tileMode == 0 || getData() == nullptr) { - return; - } - - if (tileMode == kTileModeDisplay_LinearAligned) { - // std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode); - if (pitch == width) { - auto imageSize = width * height * depth * pixelSize; - std::memcpy(getData(), address, imageSize); - return; - } - - auto src = reinterpret_cast(address); - auto dst = reinterpret_cast(getData()); - - for (std::uint32_t y = 0; y < height; ++y) { - std::memcpy(dst + y * width * pixelSize, src + y * pitch * pixelSize, - width * pixelSize); - } - - return; - } - - auto src = reinterpret_cast(address); - auto dst = reinterpret_cast(getData()); - - for (uint32_t y = 0; y < height; ++y) { - auto linearOffset = - computeLinearElementByteOffset(0, y, 0, 0, pitch, 1, pixelSize, 1); - - for (std::uint32_t x = 0; x + 1 < width; x += 2) { - auto tiledOffset = computeTiledElementByteOffset( - tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0, - width, height, 1, pitch, 1); - - std::memcpy(dst + linearOffset, src + tiledOffset, pixelSize * 2); - linearOffset += pixelSize * 2; - } - } - } - - void writeAsImageTo(void *address, std::uint32_t pixelSize, TileMode tileMode, - uint32_t width, uint32_t height, uint32_t depth, - uint32_t pitch) { - if (address == nullptr || tileMode == 0) { - return; - } - - if (tileMode == kTileModeDisplay_LinearAligned) { - // std::fprintf(stderr, "Unsupported tile mode %x\n", tileMode); - if (pitch == width) { - auto bufferSize = width * height * depth * pixelSize; - std::memcpy(address, getData(), bufferSize); - return; - } - - auto src = reinterpret_cast(getData()); - auto dst = reinterpret_cast(address); - - for (std::uint32_t y = 0; y < height; ++y) { - std::memcpy(dst + y * pitch * pixelSize, src + y * width * pixelSize, - width * pixelSize); - } - return; - } - - auto src = reinterpret_cast(getData()); - auto dst = reinterpret_cast(address); - - for (uint32_t y = 0; y < height; ++y) { - for (uint32_t x = 0; x < width; ++x) { - auto tiledOffset = computeTiledElementByteOffset( - tileMode, pixelSize * 8, x, y, 0, kMacroTileMode_1x2_16, 0, 0, 0, 0, - width, height, 1, pitch, 1); - - auto linearOffset = - computeLinearElementByteOffset(x, y, 0, 0, pitch, 1, pixelSize, 1); - - std::memcpy(dst + tiledOffset, src + linearOffset, pixelSize); - } - } - } - - // const DeviceMemoryRef &getMemory() const { return mMemory; } - bool operator==(std::nullptr_t) const { return mBuffer == nullptr; } - bool operator!=(std::nullptr_t) const { return mBuffer != nullptr; } -}; - -class Image2D; - -class ImageRef { - VkImage mImage = VK_NULL_HANDLE; - VkFormat mFormat = {}; - VkImageAspectFlags mAspects = {}; - VkImageLayout *mLayout = {}; - unsigned mWidth = 0; - unsigned mHeight = 0; - unsigned mDepth = 0; - -public: - ImageRef() = default; - ImageRef(Image2D &); - - static ImageRef Create(VkImage image, VkFormat format, - VkImageAspectFlags aspects, VkImageLayout *layout, - unsigned width, unsigned height, unsigned depth) { - ImageRef result; - result.mImage = image; - result.mFormat = format; - result.mAspects = aspects; - result.mLayout = layout; - result.mWidth = width; - result.mHeight = height; - result.mDepth = depth; - return result; - } - - unsigned getWidth() const { return mWidth; } - unsigned getHeight() const { return mHeight; } - unsigned getDepth() const { return mDepth; } - VkImage getHandle() const { return mImage; } - - VkMemoryRequirements getMemoryRequirements() const { - VkMemoryRequirements requirements{}; - vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements); - return requirements; - } - - VkSubresourceLayout getSubresourceLayout(VkImageAspectFlags aspectMask, - uint32_t mipLevel = 0, - uint32_t arrayLayer = 0) const { - VkImageSubresource subResource{.aspectMask = aspectMask, - .mipLevel = mipLevel, - .arrayLayer = arrayLayer}; - VkSubresourceLayout subResourceLayout; - vkGetImageSubresourceLayout(g_vkDevice, mImage, &subResource, - &subResourceLayout); - - return subResourceLayout; - } - - void readFromBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer, - VkImageAspectFlags destAspect, - VkDeviceSize bufferOffset = 0) { - transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL); - - VkBufferImageCopy region{}; - region.bufferOffset = bufferOffset; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageSubresource.aspectMask = destAspect; - region.imageSubresource.mipLevel = 0; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageOffset = {0, 0, 0}; - region.imageExtent = {mWidth, mHeight, 1}; - - vkCmdCopyBufferToImage(cmdBuffer, buffer, mImage, VK_IMAGE_LAYOUT_GENERAL, - 1, ®ion); - } - - void writeToBuffer(VkCommandBuffer cmdBuffer, VkBuffer buffer, - VkImageAspectFlags sourceAspect) { - transitionLayout(cmdBuffer, VK_IMAGE_LAYOUT_GENERAL); - - VkBufferImageCopy region{}; - region.bufferOffset = 0; - region.bufferRowLength = 0; - region.bufferImageHeight = 0; - region.imageSubresource.aspectMask = sourceAspect; - region.imageSubresource.mipLevel = 0; - region.imageSubresource.baseArrayLayer = 0; - region.imageSubresource.layerCount = 1; - region.imageOffset = {0, 0, 0}; - region.imageExtent = {mWidth, mHeight, 1}; - - vkCmdCopyImageToBuffer(cmdBuffer, mImage, VK_IMAGE_LAYOUT_GENERAL, buffer, - 1, ®ion); - } - - [[nodiscard]] Buffer writeToBuffer(VkCommandBuffer cmdBuffer, - MemoryResource &pool, - VkImageAspectFlags sourceAspect) { - auto transferBuffer = Buffer::Allocate( - pool, getMemoryRequirements().size, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - writeToBuffer(cmdBuffer, transferBuffer.getHandle(), sourceAspect); - return transferBuffer; - } - - [[nodiscard]] Buffer read(VkCommandBuffer cmdBuffer, MemoryResource &pool, - const void *address, TileMode tileMode, - VkImageAspectFlags destAspect, std::uint32_t bpp, - std::size_t width = 0, std::size_t height = 0, - std::size_t pitch = 0) { - if (width == 0) { - width = mWidth; - } - if (height == 0) { - height = mHeight; - } - if (pitch == 0) { - pitch = width; - } - auto memSize = getMemoryRequirements().size; - auto transferBuffer = Buffer::Allocate( - pool, memSize, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - transferBuffer.readFromImage(address, bpp, tileMode, width, height, 1, - pitch); - - readFromBuffer(cmdBuffer, transferBuffer.getHandle(), destAspect); - - return transferBuffer; - } - - void transitionLayout(VkCommandBuffer cmdBuffer, VkImageLayout newLayout) { - if (*mLayout == newLayout) { - return; - } - - VkImageMemoryBarrier barrier{}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.oldLayout = *mLayout; - barrier.newLayout = newLayout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = mImage; - barrier.subresourceRange.aspectMask = mAspects; - barrier.subresourceRange.baseMipLevel = 0; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.baseArrayLayer = 0; - barrier.subresourceRange.layerCount = 1; - - auto layoutToStageAccess = [](VkImageLayout layout) - -> std::pair { - switch (layout) { - case VK_IMAGE_LAYOUT_UNDEFINED: - case VK_IMAGE_LAYOUT_GENERAL: - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0}; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT}; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT}; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT}; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT}; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT}; - - default: - util::unreachable("unsupported layout transition! %d", layout); - } - }; - - auto [sourceStage, sourceAccess] = layoutToStageAccess(*mLayout); - auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout); - - barrier.srcAccessMask = sourceAccess; - barrier.dstAccessMask = destinationAccess; - - vkCmdPipelineBarrier(cmdBuffer, sourceStage, destinationStage, 0, 0, - nullptr, 0, nullptr, 1, &barrier); - - *mLayout = newLayout; - } -}; - -class Image2D { - VkImage mImage = VK_NULL_HANDLE; - VkFormat mFormat = {}; - VkImageAspectFlags mAspects = {}; - VkImageLayout mLayout = {}; - unsigned mWidth = 0; - unsigned mHeight = 0; - DeviceMemoryRef mMemory; - -public: - Image2D(const Image2D &) = delete; - - Image2D() = default; - Image2D(Image2D &&other) { *this = std::move(other); } - - ~Image2D() { - if (mImage != nullptr) { - vkDestroyImage(g_vkDevice, mImage, g_vkAllocator); - - if (mMemory.release != nullptr) { - mMemory.release(mMemory); - } - } - } - - Image2D &operator=(Image2D &&other) { - std::swap(mImage, other.mImage); - std::swap(mFormat, other.mFormat); - std::swap(mAspects, other.mAspects); - std::swap(mLayout, other.mLayout); - std::swap(mWidth, other.mWidth); - std::swap(mHeight, other.mHeight); - return *this; - } - - Image2D(uint32_t width, uint32_t height, VkFormat format, - VkImageUsageFlags usage, - VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL, - VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT, - VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, - uint32_t mipLevels = 1, uint32_t arrayLevels = 1, - VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) { - VkImageCreateInfo imageInfo{}; - imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - imageInfo.imageType = VK_IMAGE_TYPE_2D; - imageInfo.extent.width = width; - imageInfo.extent.height = height; - imageInfo.extent.depth = 1; - imageInfo.mipLevels = mipLevels; - imageInfo.arrayLayers = arrayLevels; - imageInfo.format = format; - imageInfo.tiling = tiling; - imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - imageInfo.usage = usage; - imageInfo.samples = samples; - imageInfo.sharingMode = sharingMode; - - mFormat = format; - - if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - mAspects |= VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } else { - mAspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - - mLayout = initialLayout; - mWidth = width; - mHeight = height; - - Verify() << vkCreateImage(g_vkDevice, &imageInfo, nullptr, &mImage); - } - - static Image2D - Allocate(MemoryResource &pool, uint32_t width, uint32_t height, - VkFormat format, VkImageUsageFlags usage, - VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL, - VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT, - VkSharingMode sharingMode = VK_SHARING_MODE_EXCLUSIVE, - uint32_t mipLevels = 1, uint32_t arrayLevels = 1, - VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED) { - - Image2D result(width, height, format, usage, tiling, samples, sharingMode, - mipLevels, arrayLevels, initialLayout); - - result.allocateAndBind(pool); - return result; - } - - VkImage getHandle() const { return mImage; } - [[nodiscard]] VkImage release() { return std::exchange(mImage, nullptr); } - - VkMemoryRequirements getMemoryRequirements() const { - VkMemoryRequirements requirements{}; - vkGetImageMemoryRequirements(g_vkDevice, mImage, &requirements); - return requirements; - } - - void allocateAndBind(MemoryResource &pool) { - auto memory = pool.allocate(getMemoryRequirements()); - bindMemory(memory); - } - - void bindMemory(DeviceMemoryRef memory) { - Verify() << vkBindImageMemory(g_vkDevice, mImage, memory.deviceMemory, - memory.offset); - mMemory = memory; - } - - const DeviceMemoryRef &getMemory() const { return mMemory; } - friend ImageRef; -}; - -inline ImageRef::ImageRef(Image2D &image) { - mImage = image.mImage; - mFormat = image.mFormat; - mAspects = image.mAspects; - mLayout = &image.mLayout; - mWidth = image.mWidth; - mHeight = image.mHeight; - mDepth = 1; -} -} // namespace amdgpu::device::vk diff --git a/hw/amdgpu/device/src/device.cpp b/hw/amdgpu/device/src/device.cpp deleted file mode 100644 index 356d3bd..0000000 --- a/hw/amdgpu/device/src/device.cpp +++ /dev/null @@ -1,5115 +0,0 @@ -#include "device.hpp" -#include "amdgpu/bridge/bridge.hpp" -#include "amdgpu/shader/AccessOp.hpp" -#include "amdgpu/shader/Converter.hpp" -#include "gpu-scheduler.hpp" -#include "scheduler.hpp" -#include "tiler.hpp" - -#include "spirv-tools/optimizer.hpp" -#include "util/area.hpp" -#include "util/unreachable.hpp" -#include "vk.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef NDEBUG -#include -#endif - -using namespace amdgpu; -using namespace amdgpu::device; - -static const bool kUseDirectMemory = false; -static amdgpu::bridge::BridgeHeader *g_bridge; - -namespace amdgpu::device::vk { -VkDevice g_vkDevice = VK_NULL_HANDLE; -VkAllocationCallbacks *g_vkAllocator = nullptr; -std::vector> g_computeQueues; -std::vector> g_graphicsQueues; - -static VkPhysicalDeviceMemoryProperties g_physicalMemoryProperties; -static VkPhysicalDeviceProperties g_physicalDeviceProperties; -std::uint32_t findPhysicalMemoryTypeIndex(std::uint32_t typeBits, - VkMemoryPropertyFlags properties) { - typeBits &= (1 << g_physicalMemoryProperties.memoryTypeCount) - 1; - - while (typeBits != 0) { - auto typeIndex = std::countr_zero(typeBits); - - if ((g_physicalMemoryProperties.memoryTypes[typeIndex].propertyFlags & - properties) == properties) { - return typeIndex; - } - - typeBits &= ~(1 << typeIndex); - } - - util::unreachable("Failed to find memory type with properties %x", - properties); -} -} // namespace amdgpu::device::vk - -namespace amdgpu::device { -GpuScheduler &getComputeQueueScheduler() { - static GpuScheduler result{vk::g_computeQueues, "compute"}; - return result; -} -GpuScheduler &getGraphicsQueueScheduler() { - static GpuScheduler result{vk::g_graphicsQueues, "graphics"}; - return result; -} - -Scheduler &getCpuScheduler() { - static Scheduler result{4}; - return result; -} - -GpuScheduler &getGpuScheduler(ProcessQueue queue) { - // TODO: compute scheduler load factor - if ((queue & ProcessQueue::Transfer) == ProcessQueue::Transfer) { - return getComputeQueueScheduler(); - } - - if ((queue & ProcessQueue::Compute) == ProcessQueue::Compute) { - return getComputeQueueScheduler(); - } - - if ((queue & ProcessQueue::Graphics) == ProcessQueue::Graphics) { - return getGraphicsQueueScheduler(); - } - - std::abort(); -} - -} // namespace amdgpu::device - -static VkResult _vkCreateShadersEXT(VkDevice device, uint32_t createInfoCount, - const VkShaderCreateInfoEXT *pCreateInfos, - const VkAllocationCallbacks *pAllocator, - VkShaderEXT *pShaders) { - static auto fn = (PFN_vkCreateShadersEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCreateShadersEXT"); - return fn(device, createInfoCount, pCreateInfos, pAllocator, pShaders); -} - -static void _vkDestroyShaderEXT(VkDevice device, VkShaderEXT shader, - const VkAllocationCallbacks *pAllocator) { - static auto fn = (PFN_vkDestroyShaderEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkDestroyShaderEXT"); - - fn(device, shader, pAllocator); -} - -static void _vkCmdBindShadersEXT(VkCommandBuffer commandBuffer, - uint32_t stageCount, - const VkShaderStageFlagBits *pStages, - const VkShaderEXT *pShaders) { - static PFN_vkCmdBindShadersEXT fn = - (PFN_vkCmdBindShadersEXT)vkGetDeviceProcAddr(vk::g_vkDevice, - "vkCmdBindShadersEXT"); - - return fn(commandBuffer, stageCount, pStages, pShaders); -} - -static void _vkCmdSetColorBlendEnableEXT(VkCommandBuffer commandBuffer, - uint32_t firstAttachment, - uint32_t attachmentCount, - const VkBool32 *pColorBlendEnables) { - static PFN_vkCmdSetColorBlendEnableEXT fn; - - if (fn == nullptr) { - fn = (PFN_vkCmdSetColorBlendEnableEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetColorBlendEnableEXT"); - } - - return fn(commandBuffer, firstAttachment, attachmentCount, - pColorBlendEnables); -} -static void _vkCmdSetColorBlendEquationEXT( - VkCommandBuffer commandBuffer, uint32_t firstAttachment, - uint32_t attachmentCount, - const VkColorBlendEquationEXT *pColorBlendEquations) { - static PFN_vkCmdSetColorBlendEquationEXT fn; - - if (fn == nullptr) { - fn = (PFN_vkCmdSetColorBlendEquationEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetColorBlendEquationEXT"); - } - - return fn(commandBuffer, firstAttachment, attachmentCount, - pColorBlendEquations); -} - -static void _vkCmdSetDepthClampEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 depthClampEnable) { - static PFN_vkCmdSetDepthClampEnableEXT fn; - - if (fn == nullptr) { - fn = (PFN_vkCmdSetDepthClampEnableEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetDepthClampEnableEXT"); - } - - return fn(commandBuffer, depthClampEnable); -} - -static void _vkCmdSetLogicOpEXT(VkCommandBuffer commandBuffer, - VkLogicOp logicOp) { - static PFN_vkCmdSetLogicOpEXT fn; - - if (fn == nullptr) { - fn = (PFN_vkCmdSetLogicOpEXT)vkGetDeviceProcAddr(vk::g_vkDevice, - "vkCmdSetLogicOpEXT"); - } - - return fn(commandBuffer, logicOp); -} - -static void _vkCmdSetPolygonModeEXT(VkCommandBuffer commandBuffer, - VkPolygonMode polygonMode) { - static PFN_vkCmdSetPolygonModeEXT fn; - - if (fn == nullptr) { - fn = (PFN_vkCmdSetPolygonModeEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetPolygonModeEXT"); - } - - return fn(commandBuffer, polygonMode); -} - -static void _vkCmdSetLogicOpEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 logicOpEnable) { - static PFN_vkCmdSetLogicOpEnableEXT fn; - if (fn == nullptr) { - fn = (PFN_vkCmdSetLogicOpEnableEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetLogicOpEnableEXT"); - } - - return fn(commandBuffer, logicOpEnable); -} -static void -_vkCmdSetRasterizationSamplesEXT(VkCommandBuffer commandBuffer, - VkSampleCountFlagBits rasterizationSamples) { - static PFN_vkCmdSetRasterizationSamplesEXT fn; - if (fn == nullptr) { - fn = (PFN_vkCmdSetRasterizationSamplesEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetRasterizationSamplesEXT"); - } - - return fn(commandBuffer, rasterizationSamples); -} -static void _vkCmdSetSampleMaskEXT(VkCommandBuffer commandBuffer, - VkSampleCountFlagBits samples, - const VkSampleMask *pSampleMask) { - static PFN_vkCmdSetSampleMaskEXT fn; - if (fn == nullptr) { - fn = (PFN_vkCmdSetSampleMaskEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetSampleMaskEXT"); - } - - return fn(commandBuffer, samples, pSampleMask); -} -static void -_vkCmdSetTessellationDomainOriginEXT(VkCommandBuffer commandBuffer, - VkTessellationDomainOrigin domainOrigin) { - static PFN_vkCmdSetTessellationDomainOriginEXT fn; - if (fn == nullptr) { - fn = (PFN_vkCmdSetTessellationDomainOriginEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetTessellationDomainOriginEXT"); - } - - return fn(commandBuffer, domainOrigin); -} -static void _vkCmdSetAlphaToCoverageEnableEXT(VkCommandBuffer commandBuffer, - VkBool32 alphaToCoverageEnable) { - static PFN_vkCmdSetAlphaToCoverageEnableEXT fn; - if (fn == nullptr) { - fn = (PFN_vkCmdSetAlphaToCoverageEnableEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetAlphaToCoverageEnableEXT"); - } - - return fn(commandBuffer, alphaToCoverageEnable); -} -static void _vkCmdSetVertexInputEXT( - VkCommandBuffer commandBuffer, uint32_t vertexBindingDescriptionCount, - const VkVertexInputBindingDescription2EXT *pVertexBindingDescriptions, - uint32_t vertexAttributeDescriptionCount, - const VkVertexInputAttributeDescription2EXT *pVertexAttributeDescriptions) { - static PFN_vkCmdSetVertexInputEXT fn; - if (fn == nullptr) { - fn = (PFN_vkCmdSetVertexInputEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetVertexInputEXT"); - } - - return fn(commandBuffer, vertexBindingDescriptionCount, - pVertexBindingDescriptions, vertexAttributeDescriptionCount, - pVertexAttributeDescriptions); -} -static void -_vkCmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, - uint32_t firstAttachment, uint32_t attachmentCount, - const VkColorComponentFlags *pColorWriteMasks) { - static PFN_vkCmdSetColorWriteMaskEXT fn; - if (fn == nullptr) { - fn = (PFN_vkCmdSetColorWriteMaskEXT)vkGetDeviceProcAddr( - vk::g_vkDevice, "vkCmdSetColorWriteMaskEXT"); - } - - return fn(commandBuffer, firstAttachment, attachmentCount, pColorWriteMasks); -} - -static util::MemoryAreaTable memoryAreaTable[6]; - -void device::setVkDevice(VkDevice device, - VkPhysicalDeviceMemoryProperties memProperties, - VkPhysicalDeviceProperties devProperties) { - vk::g_vkDevice = device; - vk::g_physicalMemoryProperties = memProperties; - vk::g_physicalDeviceProperties = devProperties; -} - -static VkBlendFactor blendMultiplierToVkBlendFactor(BlendMultiplier mul) { - switch (mul) { - case kBlendMultiplierZero: - return VK_BLEND_FACTOR_ZERO; - case kBlendMultiplierOne: - return VK_BLEND_FACTOR_ONE; - case kBlendMultiplierSrcColor: - return VK_BLEND_FACTOR_SRC_COLOR; - case kBlendMultiplierOneMinusSrcColor: - return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; - case kBlendMultiplierSrcAlpha: - return VK_BLEND_FACTOR_SRC_ALPHA; - case kBlendMultiplierOneMinusSrcAlpha: - return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - case kBlendMultiplierDestAlpha: - return VK_BLEND_FACTOR_DST_ALPHA; - case kBlendMultiplierOneMinusDestAlpha: - return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; - case kBlendMultiplierDestColor: - return VK_BLEND_FACTOR_DST_COLOR; - case kBlendMultiplierOneMinusDestColor: - return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; - case kBlendMultiplierSrcAlphaSaturate: - return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; - case kBlendMultiplierConstantColor: - return VK_BLEND_FACTOR_CONSTANT_COLOR; - case kBlendMultiplierOneMinusConstantColor: - return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; - case kBlendMultiplierSrc1Color: - return VK_BLEND_FACTOR_SRC1_COLOR; - case kBlendMultiplierInverseSrc1Color: - return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; - case kBlendMultiplierSrc1Alpha: - return VK_BLEND_FACTOR_SRC1_ALPHA; - case kBlendMultiplierInverseSrc1Alpha: - return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; - case kBlendMultiplierConstantAlpha: - return VK_BLEND_FACTOR_CONSTANT_ALPHA; - case kBlendMultiplierOneMinusConstantAlpha: - return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; - } - - util::unreachable(); -} - -static VkBlendOp blendFuncToVkBlendOp(BlendFunc func) { - switch (func) { - case kBlendFuncAdd: - return VK_BLEND_OP_ADD; - case kBlendFuncSubtract: - return VK_BLEND_OP_SUBTRACT; - case kBlendFuncMin: - return VK_BLEND_OP_MIN; - case kBlendFuncMax: - return VK_BLEND_OP_MAX; - case kBlendFuncReverseSubtract: - return VK_BLEND_OP_REVERSE_SUBTRACT; - } - - util::unreachable(); -} - -#define GNM_GET_FIELD(src, registername, field) \ - (((src) & (GNM_##registername##__##field##__MASK)) >> \ - (GNM_##registername##__##field##__SHIFT)) - -#define mmSQ_BUF_RSRC_WORD0 0x23C0 -#define GNM_SQ_BUF_RSRC_WORD0__BASE_ADDRESS__MASK 0xffffffffL // size:32 -#define GNM_SQ_BUF_RSRC_WORD0__BASE_ADDRESS__SHIFT 0 - -#define mmSQ_BUF_RSRC_WORD1 0x23C1 -#define GNM_SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI__MASK 0x00000fffL // size:12 -#define GNM_SQ_BUF_RSRC_WORD1__STRIDE__MASK 0x3fff0000L // size:14 -#define GNM_SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE__MASK 0x80000000L // size: 1 -#define GNM_SQ_BUF_RSRC_WORD1__BASE_ADDRESS_HI__SHIFT 0 -#define GNM_SQ_BUF_RSRC_WORD1__STRIDE__SHIFT 16 -#define GNM_SQ_BUF_RSRC_WORD1__SWIZZLE_ENABLE__SHIFT 31 - -#define mmSQ_BUF_RSRC_WORD2 0x23C2 -#define GNM_SQ_BUF_RSRC_WORD2__NUM_RECORDS__MASK 0xffffffffL // size:32 -#define GNM_SQ_BUF_RSRC_WORD2__NUM_RECORDS__SHIFT 0 - -#define mmSQ_BUF_RSRC_WORD3 0x23C3 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_X__MASK 0x00000007L // size: 3 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Y__MASK 0x00000038L // size: 3 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Z__MASK 0x000001c0L // size: 3 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_W__MASK 0x00000e00L // size: 3 -#define GNM_SQ_BUF_RSRC_WORD3__ELEMENT_SIZE__MASK 0x00180000L // size: 2 -#define GNM_SQ_BUF_RSRC_WORD3__INDEX_STRIDE__MASK 0x00600000L // size: 2 -#define GNM_SQ_BUF_RSRC_WORD3__TYPE__MASK 0xc0000000L // size: 2 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_X__SHIFT 0 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Y__SHIFT 3 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_Z__SHIFT 6 -#define GNM_SQ_BUF_RSRC_WORD3__DST_SEL_W__SHIFT 9 -#define GNM_SQ_BUF_RSRC_WORD3__ELEMENT_SIZE__SHIFT 19 -#define GNM_SQ_BUF_RSRC_WORD3__INDEX_STRIDE__SHIFT 21 -#define GNM_SQ_BUF_RSRC_WORD3__TYPE__SHIFT 30 - -#define mmCB_COLOR0_PITCH 0xA319 -#define GNM_CB_COLOR0_PITCH__TILE_MAX__MASK 0x000007ffL // size:11 -#define GNM_CB_COLOR0_PITCH__FMASK_TILE_MAX__MASK 0x7ff00000L // size:11 -#define GNM_CB_COLOR0_PITCH__TILE_MAX__SHIFT 0 -#define GNM_CB_COLOR0_PITCH__FMASK_TILE_MAX__SHIFT 20 - -#define mmCB_COLOR0_SLICE 0xA31A -#define GNM_CB_COLOR0_SLICE__TILE_MAX__MASK 0x003fffffL // size:22 -#define GNM_CB_COLOR0_SLICE__TILE_MAX__SHIFT 0 - -#define mmCB_COLOR0_VIEW 0xA31B -#define GNM_CB_COLOR0_VIEW__SLICE_START__MASK 0x000007ffL // size:11 -#define GNM_CB_COLOR0_VIEW__SLICE_MAX__MASK 0x00ffe000L // size:11 -#define GNM_CB_COLOR0_VIEW__SLICE_START__SHIFT 0 -#define GNM_CB_COLOR0_VIEW__SLICE_MAX__SHIFT 13 - -#define mmCB_COLOR0_INFO 0xA31C -#define GNM_CB_COLOR0_INFO__FAST_CLEAR__MASK 0x00002000L // size: 1 -#define GNM_CB_COLOR0_INFO__COMPRESSION__MASK 0x00004000L // size: 1 -#define GNM_CB_COLOR0_INFO__CMASK_IS_LINEAR__MASK 0x00080000L // size: 1 -#define GNM_CB_COLOR0_INFO__FMASK_COMPRESSION_MODE__MASK 0x0C000000L // size: 2 -#define GNM_CB_COLOR0_INFO__DCC_ENABLE__MASK 0x10000000L // size: 1 -#define GNM_CB_COLOR0_INFO__CMASK_ADDR_TYPE__MASK 0x60000000L // size: 2 -#define GNM_CB_COLOR0_INFO__ALT_TILE_MODE__MASK 0x80000000L // size: 1 -#define GNM_CB_COLOR0_INFO__FAST_CLEAR__SHIFT 13 -#define GNM_CB_COLOR0_INFO__COMPRESSION__SHIFT 14 -#define GNM_CB_COLOR0_INFO__CMASK_IS_LINEAR__SHIFT 19 -#define GNM_CB_COLOR0_INFO__FMASK_COMPRESSION_MODE__SHIFT 26 -#define GNM_CB_COLOR0_INFO__DCC_ENABLE__SHIFT 28 -#define GNM_CB_COLOR0_INFO__CMASK_ADDR_TYPE__SHIFT 29 -#define GNM_CB_COLOR0_INFO__ALT_TILE_MODE__SHIFT 31 -#define GNM_CB_COLOR0_INFO__FORMAT__MASK 0x3f << 2 -#define GNM_CB_COLOR0_INFO__FORMAT__SHIFT 2 - -#define GNM_CB_COLOR0_INFO__ARRAY_MODE__MASK 0x0f << 8 -#define GNM_CB_COLOR0_INFO__ARRAY_MODE__SHIFT 8 - -enum { - ARRAY_LINEAR_GENERAL = 0x00, // Unaligned linear array - ARRAY_LINEAR_ALIGNED = 0x01, // Aligned linear array -}; - -#define GNM_CB_COLOR0_INFO__NUMBER_TYPE__MASK 0x07 << 12 -#define GNM_CB_COLOR0_INFO__NUMBER_TYPE__SHIFT 12 - -enum { - NUMBER_UNORM = 0x00, // unsigned repeating fraction (urf): range [0..1], scale - // factor (2^n)-1 - NUMBER_SNORM = 0x01, // Microsoft-style signed rf: range [-1..1], scale factor - // (2^(n-1))-1 - NUMBER_USCALED = 0x02, // unsigned integer, converted to float in shader: - // range [0..(2^n)-1] - NUMBER_SSCALED = 0x03, // signed integer, converted to float in shader: range - // [-2^(n-1)..2^(n-1)-1] - NUMBER_UINT = 0x04, // zero-extended bit field, int in shader: not blendable - // or filterable - NUMBER_SINT = 0x05, // sign-extended bit field, int in shader: not blendable - // or filterable - NUMBER_SRGB = 0x06, // gamma corrected, range [0..1] (only suported for 8-bit - // components (always rounds color channels) - NUMBER_FLOAT = - 0x07, // floating point, depends on component size: 32-bit: IEEE float, - // SE8M23, bias 127, range (- 2^129..2^129) 24-bit: Depth float, - // E4M20, bias 15, range [0..1] 16-bit: Short float SE5M10, bias 15, - // range (-2^17..2^17) 11-bit: Packed float, E5M6 bias 15, range - // [0..2^17) 10-bit: Packed float, E5M5 bias 15, range [0..2^17) all - // other component sizes are treated as UINT -}; - -#define GNM_CB_COLOR0_INFO__READ_SIZE__MASK 1 << 15 -#define GNM_CB_COLOR0_INFO__READ_SIZE__SHIFT 15 - -// Specifies how to map the red, green, blue, and alpha components from the -// shader to the components in the frame buffer pixel format. There are four -// choices for each number of components. With one component, the four modes -// select any one component. With 2-4 components, SWAP_STD selects the low order -// shader components in little-endian order; SWAP_ALT selects an alternate order -// (for 4 compoents) or inclusion of alpha (for 2 or 3 components); and the -// other two reverse the component orders for use on big-endian machines. The -// following table specifies the exact component mappings: -// -// 1 comp std alt std_rev alt_rev -// ----------- ------- ------- ------- ------- -// comp 0: red green blue alpha -// -// 3 comps std alt std_rev alt_rev -// ----------- ------- ------- ------- ------- -// comp 0: red red green alpha -// comp 1: green alpha red red -// -// 3 comps std alt std_rev alt_rev -// ----------- ------- ------- ------- ------- -// comp 0: red red blue alpha -// comp 1: green green green green -// comp 2: blue alpha red red -// -// 4 comps std alt std_rev alt_rev -// ----------- ------- ------- ------- ------- -// comp 0: red blue alpha alpha -// comp 1: green green blue red -// comp 2: blue red green green -// comp 3: alpha alpha red blue -// -#define GNM_CB_COLOR0_INFO__COMP_SWAP__MASK 0x03 << 16 -#define GNM_CB_COLOR0_INFO__COMP_SWAP__SHIFT 16 -enum { - SWAP_STD = 0x00, // standard little-endian comp order - SWAP_ALT = 0x01, // alternate components or order - SWAP_STD_REV = 0x02, // reverses SWAP_STD order - SWAP_ALT_REV = 0x03, // reverses SWAP_ALT order -}; - -// Specifies whether to clamp source data to the render target range prior to -// blending, in addition to the post- blend clamp. This bit must be zero for -// uscaled, sscaled and float number types and when blend_bypass is set. -#define GNM_CB_COLOR0_INFO__BLEND_CLAMP__MASK 1 << 20 -#define GNM_CB_COLOR0_INFO__BLEND_CLAMP__SHIFT 20 - -// If false, use RGB=0.0 and A=1.0 (0x3f800000) to expand fast-cleared tiles. If -// true, use the CB_CLEAR register values to expand fast-cleared tiles. -#define GNM_CB_COLOR0_INFO__CLEAR_COLOR__MASK 1 << 21 -#define GNM_CB_COLOR0_INFO__CLEAR_COLOR__SHIFT 21 - -// If false, blending occurs normaly as specified in CB_BLEND#_CONTROL. If true, -// blending (but not fog) is disabled. This must be set for the 24_8 and 8_24 -// formats and when the number type is uint or sint. It should also be set for -// number types that are required to ignore the blend state in a specific -// aplication interface. -#define GNM_CB_COLOR0_INFO__BLEND_BYPASS__MASK 1 << 22 -#define GNM_CB_COLOR0_INFO__BLEND_BYPASS__SHIFT 22 - -// If true, use 32-bit float precision for source colors, else truncate to -// 12-bit mantissa precision. This applies even if blending is disabled so that -// a null blend and blend disable produce the same result. This field is ignored -// for NUMBER_UINT and NUMBER_SINT. It must be one for floating point components -// larger than 16-bits or non- floating components larger than 12-bits, -// otherwise it must be 0. -#define GNM_CB_COLOR0_INFO__BLEND_FLOAT32__MASK 1 << 23 -#define GNM_CB_COLOR0_INFO__BLEND_FLOAT32__SHIFT 23 - -// If false, floating point processing follows full IEEE rules for INF, NaN, and -// -0. If true, 0*anything produces 0 and no operation produces -0. -#define GNM_CB_COLOR0_INFO__SIMPLE_FLOAT__MASK 1 << 24 -#define GNM_CB_COLOR0_INFO__SIMPLE_FLOAT__SHIFT 24 - -// This field selects between truncating (standard for floats) and rounding -// (standard for most other cases) to convert blender results to frame buffer -// components. The ROUND_BY_HALF setting can be over-riden by the DITHER_ENABLE -// field in CB_COLOR_CONTROL. -#define GNM_CB_COLOR0_INFO__ROUND_MODE__MASK 1 << 25 -#define GNM_CB_COLOR0_INFO__ROUND_MODE__SHIFT 25 - -// This field indicates the allowed format for color data being exported from -// the pixel shader into the output merge block. This field may only be set to -// EXPORT_NORM if BLEND_CLAMP is enabled, BLEND_FLOAT32 is disabled, and the -// render target has only 11-bit or smaller UNORM or SNORM components. Selecting -// EXPORT_NORM flushes to zero values with exponent less than 0x70 (values less -// than 2^-15). -#define GNM_CB_COLOR0_INFO__SOURCE_FORMAT__MASK 1 << 27 -#define GNM_CB_COLOR0_INFO__SOURCE_FORMAT__SHIFT 27 - -#define mmCB_COLOR0_ATTRIB 0xA31D -#define GNM_CB_COLOR0_ATTRIB__TILE_MODE_INDEX__MASK 0x0000001fL // size: 5 -#define GNM_CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX__MASK 0x000003e0L // size: 5 -#define GNM_CB_COLOR0_ATTRIB__NUM_SAMPLES__MASK 0x00007000L // size: 3 -#define GNM_CB_COLOR0_ATTRIB__NUM_FRAGMENTS__MASK 0x00018000L // size: 2 -#define GNM_CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1__MASK 0x00020000L // size: 1 -#define GNM_CB_COLOR0_ATTRIB__TILE_MODE_INDEX__SHIFT 0 -#define GNM_CB_COLOR0_ATTRIB__FMASK_TILE_MODE_INDEX__SHIFT 5 -#define GNM_CB_COLOR0_ATTRIB__NUM_SAMPLES__SHIFT 12 -#define GNM_CB_COLOR0_ATTRIB__NUM_FRAGMENTS__SHIFT 15 -#define GNM_CB_COLOR0_ATTRIB__FORCE_DST_ALPHA_1__SHIFT 17 - -#define mmCB_COLOR0_DCC_CONTROL 0xA31E -#define GNM_CB_COLOR0_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__MASK \ - 0x00000001L // size: 1 -#define GNM_CB_COLOR0_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__MASK \ - 0x0000000cL // size: 2 -#define GNM_CB_COLOR0_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__MASK \ - 0x00000010L // size: 1 -#define GNM_CB_COLOR0_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__MASK \ - 0x00000060L // size: 2 -#define GNM_CB_COLOR0_DCC_CONTROL__COLOR_TRANSFORM__MASK 0x00000180L // size: 2 -#define GNM_CB_COLOR0_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__MASK \ - 0x00000200L // size: 1 -#define GNM_CB_COLOR0_DCC_CONTROL__OVERWRITE_COMBINER_DISABLE__SHIFT 0 -#define GNM_CB_COLOR0_DCC_CONTROL__MAX_UNCOMPRESSED_BLOCK_SIZE__SHIFT 2 -#define GNM_CB_COLOR0_DCC_CONTROL__MIN_COMPRESSED_BLOCK_SIZE__SHIFT 4 -#define GNM_CB_COLOR0_DCC_CONTROL__MAX_COMPRESSED_BLOCK_SIZE__SHIFT 5 -#define GNM_CB_COLOR0_DCC_CONTROL__COLOR_TRANSFORM__SHIFT 7 -#define GNM_CB_COLOR0_DCC_CONTROL__INDEPENDENT_64B_BLOCKS__SHIFT 9 - -#define mmCB_COLOR0_CMASK 0xA31F -#define GNM_CB_COLOR0_CMASK__BASE_256B__MASK 0xffffffffL // size:32 -#define GNM_CB_COLOR0_CMASK__BASE_256B__SHIFT 0 - -#define mmCB_COLOR0_CMASK_SLICE 0xA320 -#define GNM_CB_COLOR0_CMASK_SLICE__TILE_MAX__MASK 0x00003fffL // size:14 -#define GNM_CB_COLOR0_CMASK_SLICE__TILE_MAX__SHIFT 0 - -#define mmCB_COLOR0_FMASK 0xA321 -#define GNM_CB_COLOR0_FMASK__BASE_256B__MASK 0xffffffffL // size:32 -#define GNM_CB_COLOR0_FMASK__BASE_256B__SHIFT 0 - -#define mmCB_COLOR0_FMASK_SLICE 0xA322 -#define GNM_CB_COLOR0_FMASK_SLICE__TILE_MAX__MASK 0x003fffffL // size:22 -#define GNM_CB_COLOR0_FMASK_SLICE__TILE_MAX__SHIFT 0 - -#define mmCB_COLOR0_CLEAR_WORD0 0xA323 -#define GNM_CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0__MASK 0xffffffffL // size:32 -#define GNM_CB_COLOR0_CLEAR_WORD0__CLEAR_WORD0__SHIFT 0 - -#define mmCB_COLOR0_CLEAR_WORD1 0xA324 -#define GNM_CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1__MASK 0xffffffffL // size:32 -#define GNM_CB_COLOR0_CLEAR_WORD1__CLEAR_WORD1__SHIFT 0 - -#define mmCB_COLOR0_DCC_BASE 0xA325 -#define GNM_CB_COLOR0_DCC_BASE__BASE_256B__MASK 0xffffffffL // size:32 -#define GNM_CB_COLOR0_DCC_BASE__BASE_256B__SHIFT 0 - -static constexpr auto CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK = genMask(0, 5); -static constexpr auto CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK), 3); -static constexpr auto CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK), 5); -static constexpr auto CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK), 1); -static constexpr auto CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK) + 2, 5); -static constexpr auto CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK), 3); -static constexpr auto CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK), 5); -static constexpr auto CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK), 1); -static constexpr auto CB_BLEND0_CONTROL_BLEND_ENABLE_MASK = - genMask(getMaskEnd(CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK), 1); - -struct ColorBuffer { - std::uint64_t base; - std::uint8_t format; - std::uint8_t tileModeIndex; - - void setRegister(unsigned index, std::uint32_t value) { - switch (index) { - case CB_COLOR0_BASE - CB_COLOR0_BASE: - base = static_cast(value) << 8; - // std::printf(" * base = %lx\n", base); - break; - - case CB_COLOR0_PITCH - CB_COLOR0_BASE: { - auto pitchTileMax = GNM_GET_FIELD(value, CB_COLOR0_PITCH, TILE_MAX); - auto pitchFmaskTileMax = - GNM_GET_FIELD(value, CB_COLOR0_PITCH, FMASK_TILE_MAX); - // std::printf(" * TILE_MAX = %lx\n", pitchTileMax); - // std::printf(" * FMASK_TILE_MAX = %lx\n", pitchFmaskTileMax); - break; - } - case CB_COLOR0_SLICE - CB_COLOR0_BASE: { // SLICE - auto sliceTileMax = GNM_GET_FIELD(value, CB_COLOR0_SLICE, TILE_MAX); - // std::printf(" * TILE_MAX = %lx\n", sliceTileMax); - break; - } - case CB_COLOR0_VIEW - CB_COLOR0_BASE: { // VIEW - auto viewSliceStart = GNM_GET_FIELD(value, CB_COLOR0_VIEW, SLICE_START); - auto viewSliceMax = GNM_GET_FIELD(value, CB_COLOR0_VIEW, SLICE_MAX); - - // std::printf(" * SLICE_START = %lx\n", viewSliceStart); - // std::printf(" * SLICE_MAX = %lx\n", viewSliceMax); - break; - } - case CB_COLOR0_INFO - CB_COLOR0_BASE: { // INFO - auto fastClear = GNM_GET_FIELD(value, CB_COLOR0_INFO, FAST_CLEAR); - auto compression = GNM_GET_FIELD(value, CB_COLOR0_INFO, COMPRESSION); - auto cmaskIsLinear = - GNM_GET_FIELD(value, CB_COLOR0_INFO, CMASK_IS_LINEAR); - auto fmaskCompressionMode = - GNM_GET_FIELD(value, CB_COLOR0_INFO, FMASK_COMPRESSION_MODE); - auto dccEnable = GNM_GET_FIELD(value, CB_COLOR0_INFO, DCC_ENABLE); - auto cmaskAddrType = - GNM_GET_FIELD(value, CB_COLOR0_INFO, CMASK_ADDR_TYPE); - auto altTileMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ALT_TILE_MODE); - format = GNM_GET_FIELD(value, CB_COLOR0_INFO, FORMAT); - auto arrayMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ARRAY_MODE); - auto numberType = GNM_GET_FIELD(value, CB_COLOR0_INFO, NUMBER_TYPE); - auto readSize = GNM_GET_FIELD(value, CB_COLOR0_INFO, READ_SIZE); - auto compSwap = GNM_GET_FIELD(value, CB_COLOR0_INFO, COMP_SWAP); - auto blendClamp = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_CLAMP); - auto clearColor = GNM_GET_FIELD(value, CB_COLOR0_INFO, CLEAR_COLOR); - auto blendBypass = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_BYPASS); - auto blendFloat32 = GNM_GET_FIELD(value, CB_COLOR0_INFO, BLEND_FLOAT32); - auto simpleFloat = GNM_GET_FIELD(value, CB_COLOR0_INFO, SIMPLE_FLOAT); - auto roundMode = GNM_GET_FIELD(value, CB_COLOR0_INFO, ROUND_MODE); - auto sourceFormat = GNM_GET_FIELD(value, CB_COLOR0_INFO, SOURCE_FORMAT); - - // std::printf(" * FAST_CLEAR = %lu\n", fastClear); - // std::printf(" * COMPRESSION = %lu\n", compression); - // std::printf(" * CMASK_IS_LINEAR = %lu\n", cmaskIsLinear); - // std::printf(" * FMASK_COMPRESSION_MODE = %lu\n", - // fmaskCompressionMode); std::printf(" * DCC_ENABLE = %lu\n", - // dccEnable); std::printf(" * CMASK_ADDR_TYPE = %lu\n", cmaskAddrType); - // std::printf(" * ALT_TILE_MODE = %lu\n", altTileMode); - // std::printf(" * FORMAT = %x\n", format); - // std::printf(" * ARRAY_MODE = %u\n", arrayMode); - // std::printf(" * NUMBER_TYPE = %u\n", numberType); - // std::printf(" * READ_SIZE = %u\n", readSize); - // std::printf(" * COMP_SWAP = %u\n", compSwap); - // std::printf(" * BLEND_CLAMP = %u\n", blendClamp); - // std::printf(" * CLEAR_COLOR = %u\n", clearColor); - // std::printf(" * BLEND_BYPASS = %u\n", blendBypass); - // std::printf(" * BLEND_FLOAT32 = %u\n", blendFloat32); - // std::printf(" * SIMPLE_FLOAT = %u\n", simpleFloat); - // std::printf(" * ROUND_MODE = %u\n", roundMode); - // std::printf(" * SOURCE_FORMAT = %u\n", sourceFormat); - break; - } - - case CB_COLOR0_ATTRIB - CB_COLOR0_BASE: { // ATTRIB - tileModeIndex = GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, TILE_MODE_INDEX); - auto fmaskTileModeIndex = - GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, FMASK_TILE_MODE_INDEX); - auto numSamples = GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, NUM_SAMPLES); - auto numFragments = GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, NUM_FRAGMENTS); - auto forceDstAlpha1 = - GNM_GET_FIELD(value, CB_COLOR0_ATTRIB, FORCE_DST_ALPHA_1); - - // std::printf(" * TILE_MODE_INDEX = %u\n", tileModeIndex); - // std::printf(" * FMASK_TILE_MODE_INDEX = %lu\n", fmaskTileModeIndex); - // std::printf(" * NUM_SAMPLES = %lu\n", numSamples); - // std::printf(" * NUM_FRAGMENTS = %lu\n", numFragments); - // std::printf(" * FORCE_DST_ALPHA_1 = %lu\n", forceDstAlpha1); - break; - } - case CB_COLOR0_CMASK - CB_COLOR0_BASE: { // CMASK - auto cmaskBase = GNM_GET_FIELD(value, CB_COLOR0_CMASK, BASE_256B) << 8; - // std::printf(" * cmaskBase = %lx\n", cmaskBase); - break; - } - case CB_COLOR0_CMASK_SLICE - CB_COLOR0_BASE: { // CMASK_SLICE - auto cmaskSliceTileMax = - GNM_GET_FIELD(value, CB_COLOR0_CMASK_SLICE, TILE_MAX); - // std::printf(" * cmaskSliceTileMax = %lx\n", cmaskSliceTileMax); - break; - } - case CB_COLOR0_FMASK - CB_COLOR0_BASE: { // FMASK - auto fmaskBase = GNM_GET_FIELD(value, CB_COLOR0_FMASK, BASE_256B) << 8; - // std::printf(" * fmaskBase = %lx\n", fmaskBase); - break; - } - case CB_COLOR0_FMASK_SLICE - CB_COLOR0_BASE: { // FMASK_SLICE - auto fmaskSliceTileMax = - GNM_GET_FIELD(value, CB_COLOR0_FMASK_SLICE, TILE_MAX); - // std::printf(" * fmaskSliceTileMax = %lx\n", fmaskSliceTileMax); - break; - } - case CB_COLOR0_CLEAR_WORD0 - CB_COLOR0_BASE: // CLEAR_WORD0 - break; - case CB_COLOR1_CLEAR_WORD0 - CB_COLOR0_BASE: // CLEAR_WORD1 - break; - } - } -}; - -static constexpr std::size_t colorBuffersCount = 6; - -enum class CbRasterOp { - Blackness = 0x00, - Nor = 0x05, // ~(src | dst) - AndInverted = 0x0a, // ~src & dst - CopyInverted = 0x0f, // ~src - NotSrcErase = 0x11, // ~src & ~dst - SrcErase = 0x44, // src & ~dst - DstInvert = 0x55, // ~dst - Xor = 0x5a, // src ^ dst - Nand = 0x5f, // ~(src & dst) - And = 0x88, // src & dst - Equiv = 0x99, // ~(src ^ dst) - Noop = 0xaa, // dst - OrInverted = 0xaf, // ~src | dst - Copy = 0xcc, // src - OrReverse = 0xdd, // src | ~dst - Or = 0xEE, // src | dst - Whiteness = 0xff, -}; - -enum class CbColorFormat { - /* - 00 - CB_DISABLE: Disables drawing to color - buffer. Causes DB to not send tiles/quads to CB. CB - itself ignores this field. - 01 - CB_NORMAL: Normal rendering mode. DB - should send tiles and quads for pixel exports or just - quads for compute exports. - 02 - CB_ELIMINATE_FAST_CLEAR: Fill fast - cleared color surface locations with clear color. DB - should send only tiles. - 03 - CB_RESOLVE: Read from MRT0, average all - samples, and write to MRT1, which is one-sample. DB - should send only tiles. - 04 - CB_DECOMPRESS: Decompress MRT0 to a - */ - Disable, - Normal, - EliminateFastClear, - Resolve, -}; - -struct QueueRegisters { - std::uint64_t pgmPsAddress = 0; - std::uint64_t pgmVsAddress = 0; - std::uint64_t pgmComputeAddress = 0; - std::uint32_t userVsData[16]; - std::uint32_t userPsData[16]; - std::uint32_t userComputeData[16]; - std::uint32_t computeNumThreadX = 1; - std::uint32_t computeNumThreadY = 1; - std::uint32_t computeNumThreadZ = 1; - std::uint8_t psUserSpgrs; - std::uint8_t vsUserSpgrs; - std::uint8_t computeUserSpgrs; - - ColorBuffer colorBuffers[colorBuffersCount]; - - std::uint32_t indexType; - std::uint64_t indexBase; - - std::uint32_t screenScissorX = 0; - std::uint32_t screenScissorY = 0; - std::uint32_t screenScissorW = 0; - std::uint32_t screenScissorH = 0; - - CbColorFormat cbColorFormat = CbColorFormat::Normal; - - CbRasterOp cbRasterOp = CbRasterOp::Copy; - - std::uint32_t vgtPrimitiveType = 0; - bool stencilEnable = false; - bool depthEnable = false; - bool depthWriteEnable = false; - bool depthBoundsEnable = false; - int zFunc = 0; - bool backFaceEnable = false; - int stencilFunc = 0; - int stencilFuncBackFace = 0; - - float depthClear = 1.f; - - bool cullFront = false; - bool cullBack = false; - int face = 0; // 0 - CCW, 1 - CW - bool polyMode = false; - int polyModeFrontPType = 0; - int polyModeBackPType = 0; - bool polyOffsetFrontEnable = false; - bool polyOffsetBackEnable = false; - bool polyOffsetParaEnable = false; - bool vtxWindowOffsetEnable = false; - bool provokingVtxLast = false; - bool erspCorrDis = false; - bool multiPrimIbEna = false; - - bool depthClearEnable = false; - bool stencilClearEnable = false; - bool depthCopy = false; - bool stencilCopy = false; - bool resummarizeEnable = false; - bool stencilCompressDisable = false; - bool depthCompressDisable = false; - bool copyCentroid = false; - int copySample = 0; - bool zpassIncrementDisable = false; - - std::uint64_t zReadBase = 0; - std::uint64_t zWriteBase = 0; - - BlendMultiplier blendColorSrc = {}; - BlendFunc blendColorFn = {}; - BlendMultiplier blendColorDst = {}; - BlendMultiplier blendAlphaSrc = {}; - BlendFunc blendAlphaFn = {}; - BlendMultiplier blendAlphaDst = {}; - bool blendSeparateAlpha = false; - bool blendEnable = false; - std::uint32_t cbRenderTargetMask = 0; - - void setRegister(std::uint32_t regId, std::uint32_t value) { - switch (regId) { - case SPI_SHADER_PGM_LO_PS: - pgmPsAddress &= ~((1ull << 40) - 1); - pgmPsAddress |= static_cast(value) << 8; - break; - case SPI_SHADER_PGM_HI_PS: - pgmPsAddress &= (1ull << 40) - 1; - pgmPsAddress |= static_cast(value) << 40; - break; - case SPI_SHADER_PGM_LO_VS: - pgmVsAddress &= ~((1ull << 40) - 1); - pgmVsAddress |= static_cast(value) << 8; - break; - case SPI_SHADER_PGM_HI_VS: - pgmVsAddress &= (1ull << 40) - 1; - pgmVsAddress |= static_cast(value) << 40; - break; - - case SPI_SHADER_USER_DATA_VS_0: - case SPI_SHADER_USER_DATA_VS_1: - case SPI_SHADER_USER_DATA_VS_2: - case SPI_SHADER_USER_DATA_VS_3: - case SPI_SHADER_USER_DATA_VS_4: - case SPI_SHADER_USER_DATA_VS_5: - case SPI_SHADER_USER_DATA_VS_6: - case SPI_SHADER_USER_DATA_VS_7: - case SPI_SHADER_USER_DATA_VS_8: - case SPI_SHADER_USER_DATA_VS_9: - case SPI_SHADER_USER_DATA_VS_10: - case SPI_SHADER_USER_DATA_VS_11: - case SPI_SHADER_USER_DATA_VS_12: - case SPI_SHADER_USER_DATA_VS_13: - case SPI_SHADER_USER_DATA_VS_14: - case SPI_SHADER_USER_DATA_VS_15: - userVsData[regId - SPI_SHADER_USER_DATA_VS_0] = value; - break; - - case SPI_SHADER_USER_DATA_PS_0: - case SPI_SHADER_USER_DATA_PS_1: - case SPI_SHADER_USER_DATA_PS_2: - case SPI_SHADER_USER_DATA_PS_3: - case SPI_SHADER_USER_DATA_PS_4: - case SPI_SHADER_USER_DATA_PS_5: - case SPI_SHADER_USER_DATA_PS_6: - case SPI_SHADER_USER_DATA_PS_7: - case SPI_SHADER_USER_DATA_PS_8: - case SPI_SHADER_USER_DATA_PS_9: - case SPI_SHADER_USER_DATA_PS_10: - case SPI_SHADER_USER_DATA_PS_11: - case SPI_SHADER_USER_DATA_PS_12: - case SPI_SHADER_USER_DATA_PS_13: - case SPI_SHADER_USER_DATA_PS_14: - case SPI_SHADER_USER_DATA_PS_15: - userPsData[regId - SPI_SHADER_USER_DATA_PS_0] = value; - break; - - case SPI_SHADER_PGM_RSRC2_PS: - psUserSpgrs = (value >> 1) & 0x1f; - break; - - case SPI_SHADER_PGM_RSRC2_VS: - vsUserSpgrs = (value >> 1) & 0x1f; - break; - - case CB_COLOR0_BASE ... CB_COLOR6_DCC_BASE: { - auto buffer = - (regId - CB_COLOR0_BASE) / (CB_COLOR1_BASE - CB_COLOR0_BASE); - auto index = (regId - CB_COLOR0_BASE) % (CB_COLOR1_BASE - CB_COLOR0_BASE); - colorBuffers[buffer].setRegister(index, value); - break; - } - - case DB_RENDER_CONTROL: - depthClearEnable = getBit(value, 0); - stencilClearEnable = getBit(value, 1); - depthCopy = getBit(value, 2); - stencilCopy = getBit(value, 3); - resummarizeEnable = getBit(value, 4); - stencilCompressDisable = getBit(value, 5); - depthCompressDisable = getBit(value, 6); - copyCentroid = getBit(value, 7); - copySample = getBits(value, 10, 8); - zpassIncrementDisable = getBit(value, 11); - break; - - case DB_Z_READ_BASE: - zReadBase = static_cast(value) << 8; - break; - - case DB_Z_WRITE_BASE: - zWriteBase = static_cast(value) << 8; - break; - - case DB_DEPTH_CLEAR: - depthClear = std::bit_cast(value); - break; - - case DB_DEPTH_CONTROL: - stencilEnable = getBit(value, 0) != 0; - depthEnable = getBit(value, 1) != 0; - depthWriteEnable = getBit(value, 2) != 0; - depthBoundsEnable = getBit(value, 3) != 0; - zFunc = getBits(value, 6, 4); - backFaceEnable = getBit(value, 7); - stencilFunc = getBits(value, 11, 8); - stencilFuncBackFace = getBits(value, 23, 20); - - // std::printf("stencilEnable=%u, depthEnable=%u, depthWriteEnable=%u, " - // "depthBoundsEnable=%u, zFunc=%u, backFaceEnable=%u, " - // "stencilFunc=%u, stencilFuncBackFace=%u\n", - // stencilEnable, depthEnable, depthWriteEnable, - // depthBoundsEnable, zFunc, backFaceEnable, stencilFunc, - // stencilFuncBackFace); - break; - - case CB_TARGET_MASK: { - cbRenderTargetMask = value; - break; - } - - case CB_COLOR_CONTROL: { - /* - If true, then each UNORM format COLOR_8_8_8_8 - MRT is treated as an SRGB format instead. This affects - both normal draw and resolve. This bit exists for - compatibility with older architectures that did not have - an SRGB number type. - */ - auto degammaEnable = getBits(value, 3, 0); - - /* - This field selects standard color processing or one of - several major operation modes. - - POSSIBLE VALUES: - 00 - CB_DISABLE: Disables drawing to color - buffer. Causes DB to not send tiles/quads to CB. CB - itself ignores this field. - 01 - CB_NORMAL: Normal rendering mode. DB - should send tiles and quads for pixel exports or just - quads for compute exports. - 02 - CB_ELIMINATE_FAST_CLEAR: Fill fast - cleared color surface locations with clear color. DB - should send only tiles. - 03 - CB_RESOLVE: Read from MRT0, average all - samples, and write to MRT1, which is one-sample. DB - should send only tiles. - 04 - CB_DECOMPRESS: Decompress MRT0 to a - uncompressed color format. This is required before a - multisampled surface is accessed by the CPU, or used as - a texture. This also decompresses the FMASK buffer. A - CB_ELIMINATE_FAST_CLEAR pass before this is - unnecessary. DB should send tiles and quads. - 05 - CB_FMASK_DECOMPRESS: Decompress the - FMASK buffer into a texture readable format. A - CB_ELIMINATE_FAST_CLEAR pass before this is - unnecessary. DB should send only tiles. - */ - auto mode = getBits(value, 6, 4); - - /* - This field supports the 28 boolean ops that combine - either source and dest or brush and dest, with brush - provided by the shader in place of source. The code - 0xCC (11001100) copies the source to the destination, - which disables the ROP function. ROP must be disabled - if any MRT enables blending. - - POSSIBLE VALUES: - 00 - 0x00: BLACKNESS - 05 - 0x05 - 10 - 0x0A - 15 - 0x0F - 17 - 0x11: NOTSRCERASE - 34 - 0x22 - 51 - 0x33: NOTSRCCOPY - 68 - 0x44: SRCERASE - 80 - 0x50 - 85 - 0x55: DSTINVERT - 90 - 0x5A: PATINVERT - 95 - 0x5F - 102 - 0x66: SRCINVERT - 119 - 0x77 - 136 - 0x88: SRCAND - 153 - 0x99 - 160 - 0xA0 - 165 - 0xA5 - 170 - 0xAA - 175 - 0xAF - 187 - 0xBB: MERGEPAINT - 204 - 0xCC: SRCCOPY - 221 - 0xDD - 238 - 0xEE: SRCPAINT - 240 - 0xF0: PATCOPY - 245 - 0xF5 - 250 - 0xFA - 255 - 0xFF: WHITENESS - */ - auto rop3 = getBits(value, 23, 16); - - // std::printf(" * degammaEnable = %x\n", degammaEnable); - // std::printf(" * mode = %x\n", mode); - // std::printf(" * rop3 = %x\n", rop3); - - cbColorFormat = static_cast(mode); - cbRasterOp = static_cast(rop3); - break; - } - - case PA_CL_CLIP_CNTL: - cullFront = getBit(value, 0); - cullBack = getBit(value, 1); - face = getBit(value, 2); - polyMode = getBits(value, 4, 3); - polyModeFrontPType = getBits(value, 7, 5); - polyModeBackPType = getBits(value, 10, 8); - polyOffsetFrontEnable = getBit(value, 11); - polyOffsetBackEnable = getBit(value, 12); - polyOffsetParaEnable = getBit(value, 13); - vtxWindowOffsetEnable = getBit(value, 16); - provokingVtxLast = getBit(value, 19); - erspCorrDis = getBit(value, 20); - multiPrimIbEna = getBit(value, 21); - break; - - case PA_SC_SCREEN_SCISSOR_TL: - screenScissorX = static_cast(value); - screenScissorY = static_cast(value >> 16); - break; - - case PA_SC_SCREEN_SCISSOR_BR: - screenScissorW = static_cast(value) - screenScissorX; - screenScissorH = static_cast(value >> 16) - screenScissorY; - break; - - case VGT_PRIMITIVE_TYPE: - vgtPrimitiveType = value; - break; - - case COMPUTE_NUM_THREAD_X: - computeNumThreadX = value; - break; - - case COMPUTE_NUM_THREAD_Y: - computeNumThreadY = value; - break; - - case COMPUTE_NUM_THREAD_Z: - computeNumThreadZ = value; - break; - - case COMPUTE_PGM_LO: - pgmComputeAddress &= ~((1ull << 40) - 1); - pgmComputeAddress |= static_cast(value) << 8; - break; - - case COMPUTE_PGM_HI: - pgmComputeAddress &= (1ull << 40) - 1; - pgmComputeAddress |= static_cast(value) << 40; - break; - - case COMPUTE_PGM_RSRC1: - break; - case COMPUTE_PGM_RSRC2: - computeUserSpgrs = (value >> 1) & 0x1f; - break; - - case COMPUTE_USER_DATA_0: - case COMPUTE_USER_DATA_1: - case COMPUTE_USER_DATA_2: - case COMPUTE_USER_DATA_3: - case COMPUTE_USER_DATA_4: - case COMPUTE_USER_DATA_5: - case COMPUTE_USER_DATA_6: - case COMPUTE_USER_DATA_7: - case COMPUTE_USER_DATA_8: - case COMPUTE_USER_DATA_9: - case COMPUTE_USER_DATA_10: - case COMPUTE_USER_DATA_11: - case COMPUTE_USER_DATA_12: - case COMPUTE_USER_DATA_13: - case COMPUTE_USER_DATA_14: - case COMPUTE_USER_DATA_15: - userComputeData[regId - COMPUTE_USER_DATA_0] = value; - break; - - case CB_BLEND0_CONTROL: { - blendColorSrc = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK); - blendColorFn = (BlendFunc)fetchMaskedValue( - value, CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK); - blendColorDst = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK); - auto opacity_weight = - fetchMaskedValue(value, CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK); - blendAlphaSrc = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK); - blendAlphaFn = (BlendFunc)fetchMaskedValue( - value, CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK); - blendAlphaDst = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK); - blendSeparateAlpha = - fetchMaskedValue(value, - CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK) != 0; - blendEnable = - fetchMaskedValue(value, CB_BLEND0_CONTROL_BLEND_ENABLE_MASK) != 0; - - // std::printf(" * COLOR_SRCBLEND = %x\n", blendColorSrc); - // std::printf(" * COLOR_COMB_FCN = %x\n", blendColorFn); - // std::printf(" * COLOR_DESTBLEND = %x\n", blendColorDst); - // std::printf(" * OPACITY_WEIGHT = %x\n", opacity_weight); - // std::printf(" * ALPHA_SRCBLEND = %x\n", blendAlphaSrc); - // std::printf(" * ALPHA_COMB_FCN = %x\n", blendAlphaFn); - // std::printf(" * ALPHA_DESTBLEND = %x\n", blendAlphaDst); - // std::printf(" * SEPARATE_ALPHA_BLEND = %x\n", blendSeparateAlpha); - // std::printf(" * BLEND_ENABLE = %x\n", blendEnable); - break; - } - } - } -}; - -static void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image, - VkImageAspectFlags aspectFlags, - VkImageLayout oldLayout, - VkImageLayout newLayout) { - VkImageMemoryBarrier barrier{}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.oldLayout = oldLayout; - barrier.newLayout = newLayout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.image = image; - barrier.subresourceRange.aspectMask = aspectFlags; - barrier.subresourceRange.baseMipLevel = 0; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.baseArrayLayer = 0; - barrier.subresourceRange.layerCount = 1; - - auto layoutToStageAccess = [](VkImageLayout layout) - -> std::pair { - switch (layout) { - case VK_IMAGE_LAYOUT_UNDEFINED: - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - return {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0}; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT}; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return {VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT}; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return {VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT}; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return {VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT}; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT}; - - default: - util::unreachable("unsupported layout transition! %d", layout); - } - }; - - auto [sourceStage, sourceAccess] = layoutToStageAccess(oldLayout); - auto [destinationStage, destinationAccess] = layoutToStageAccess(newLayout); - - barrier.srcAccessMask = sourceAccess; - barrier.dstAccessMask = destinationAccess; - - vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0, - nullptr, 0, nullptr, 1, &barrier); -} - -static int getBitWidthOfSurfaceFormat(SurfaceFormat format) { - switch (format) { - case kSurfaceFormatInvalid: - return 0; - case kSurfaceFormat8: - return 8; - case kSurfaceFormat16: - return 16; - case kSurfaceFormat8_8: - return 8 + 8; - case kSurfaceFormat32: - return 32; - case kSurfaceFormat16_16: - return 16 + 16; - case kSurfaceFormat10_11_11: - return 10 + 11 + 11; - case kSurfaceFormat11_11_10: - return 11 + 11 + 10; - case kSurfaceFormat10_10_10_2: - return 10 + 10 + 10 + 2; - case kSurfaceFormat2_10_10_10: - return 2 + 10 + 10 + 10; - case kSurfaceFormat8_8_8_8: - return 8 + 8 + 8 + 8; - case kSurfaceFormat32_32: - return 32 + 32; - case kSurfaceFormat16_16_16_16: - return 16 + 16 + 16 + 16; - case kSurfaceFormat32_32_32: - return 32 + 32 + 32; - case kSurfaceFormat32_32_32_32: - return 32 + 32 + 32 + 32; - case kSurfaceFormat5_6_5: - return 5 + 6 + 5; - case kSurfaceFormat1_5_5_5: - return 1 + 5 + 5 + 5; - case kSurfaceFormat5_5_5_1: - return 5 + 5 + 5 + 1; - case kSurfaceFormat4_4_4_4: - return 4 + 4 + 4 + 4; - case kSurfaceFormat8_24: - return 8 + 24; - case kSurfaceFormat24_8: - return 24 + 8; - case kSurfaceFormatX24_8_32: - return 24 + 8 + 32; - case kSurfaceFormatGB_GR: - return 2 + 2; - case kSurfaceFormatBG_RG: - return 0; - case kSurfaceFormat5_9_9_9: - return 5 + 9 + 9 + 9; - case kSurfaceFormatBc1: - return 8; - case kSurfaceFormatBc2: - return 8; - case kSurfaceFormatBc3: - return 8; - case kSurfaceFormatBc4: - return 8; - case kSurfaceFormatBc5: - return 8; - case kSurfaceFormatBc6: - return 8; - case kSurfaceFormatBc7: - return 8; - case kSurfaceFormatFmask8_S2_F1: - return 0; - case kSurfaceFormatFmask8_S4_F1: - return 0; - case kSurfaceFormatFmask8_S8_F1: - return 0; - case kSurfaceFormatFmask8_S2_F2: - return 0; - case kSurfaceFormatFmask8_S4_F2: - return 0; - case kSurfaceFormatFmask8_S4_F4: - return 0; - case kSurfaceFormatFmask16_S16_F1: - return 0; - case kSurfaceFormatFmask16_S8_F2: - return 0; - case kSurfaceFormatFmask32_S16_F2: - return 0; - case kSurfaceFormatFmask32_S8_F4: - return 0; - case kSurfaceFormatFmask32_S8_F8: - return 0; - case kSurfaceFormatFmask64_S16_F4: - return 0; - case kSurfaceFormatFmask64_S16_F8: - return 0; - case kSurfaceFormat4_4: - return 4 + 4; - case kSurfaceFormat6_5_5: - return 6 + 5 + 5; - case kSurfaceFormat1: - return 1; - case kSurfaceFormat1Reversed: - return 0; - } - - return 0; -} - -static VkFormat surfaceFormatToVkFormat(SurfaceFormat surface, - TextureChannelType channel) { - switch (surface) { - case kSurfaceFormat4_4_4_4: { - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_R4G4B4A4_UNORM_PACK16; - default: - break; - } - - break; - } - - case kSurfaceFormat8: { - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_R8_UNORM; - case kTextureChannelTypeSNorm: - return VK_FORMAT_R8_SNORM; - case kTextureChannelTypeUInt: - return VK_FORMAT_R8_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R8_SINT; - case kTextureChannelTypeSrgb: - return VK_FORMAT_R8_SRGB; - default: - break; - } - - break; - } - case kSurfaceFormat32: - switch (channel) { - case kTextureChannelTypeUInt: - return VK_FORMAT_R32_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R32_SINT; - case kTextureChannelTypeFloat: - return VK_FORMAT_R32_SFLOAT; - case kTextureChannelTypeSrgb: - return VK_FORMAT_R32_UINT; // FIXME - default: - break; - } - break; - - case kSurfaceFormat8_8: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_R8G8_UNORM; - case kTextureChannelTypeSNorm: - return VK_FORMAT_R8G8_SNORM; - case kTextureChannelTypeUInt: - return VK_FORMAT_R8G8_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R8G8_SINT; - default: - break; - } - break; - - case kSurfaceFormat5_9_9_9: - switch (channel) { - case kTextureChannelTypeFloat: - return VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; - default: - break; - } - break; - - case kSurfaceFormat5_6_5: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_R5G6B5_UNORM_PACK16; - - default: - break; - } - break; - - case kSurfaceFormat16_16: - switch (channel) { - case kTextureChannelTypeUInt: - return VK_FORMAT_R16G16_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R16G16_SINT; - case kTextureChannelTypeFloat: - return VK_FORMAT_R16G16_SFLOAT; - default: - break; - } - break; - - case kSurfaceFormat32_32: - switch (channel) { - case kTextureChannelTypeUInt: - return VK_FORMAT_R32G32_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R32G32_SINT; - case kTextureChannelTypeFloat: - return VK_FORMAT_R32G32_SFLOAT; - default: - break; - } - break; - - case kSurfaceFormat16_16_16_16: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_R16G16B16A16_UNORM; - case kTextureChannelTypeSNorm: - return VK_FORMAT_R16G16B16A16_SNORM; - case kTextureChannelTypeUScaled: - return VK_FORMAT_R16G16B16A16_USCALED; - case kTextureChannelTypeSScaled: - return VK_FORMAT_R16G16B16A16_SSCALED; - case kTextureChannelTypeUInt: - return VK_FORMAT_R16G16B16A16_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R16G16B16A16_SINT; - case kTextureChannelTypeFloat: - return VK_FORMAT_R16G16B16A16_SFLOAT; - case kTextureChannelTypeSrgb: - return VK_FORMAT_R16G16B16A16_UNORM; // FIXME: wrong - - default: - break; - } - break; - - case kSurfaceFormat32_32_32: - switch (channel) { - case kTextureChannelTypeUInt: - return VK_FORMAT_R32G32B32_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R32G32B32_SINT; - case kTextureChannelTypeFloat: - return VK_FORMAT_R32G32B32_SFLOAT; - default: - break; - } - break; - case kSurfaceFormat32_32_32_32: - switch (channel) { - case kTextureChannelTypeUInt: - return VK_FORMAT_R32G32B32A32_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R32G32B32A32_SINT; - case kTextureChannelTypeFloat: - return VK_FORMAT_R32G32B32A32_SFLOAT; - default: - break; - } - break; - - case kSurfaceFormat24_8: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_D32_SFLOAT_S8_UINT; // HACK for amdgpu - - default: - break; - } - - break; - - case kSurfaceFormat8_8_8_8: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_R8G8B8A8_UNORM; - case kTextureChannelTypeSNorm: - return VK_FORMAT_R8G8B8A8_SNORM; - case kTextureChannelTypeUScaled: - return VK_FORMAT_R8G8B8A8_USCALED; - case kTextureChannelTypeSScaled: - return VK_FORMAT_R8G8B8A8_SSCALED; - case kTextureChannelTypeUInt: - return VK_FORMAT_R8G8B8A8_UINT; - case kTextureChannelTypeSInt: - return VK_FORMAT_R8G8B8A8_SINT; - // case kTextureChannelTypeSNormNoZero: - // return VK_FORMAT_R8G8B8A8_SNORM; - case kTextureChannelTypeSrgb: - return VK_FORMAT_R8G8B8A8_SRGB; - // case kTextureChannelTypeUBNorm: - // return VK_FORMAT_R8G8B8A8_UNORM; - // case kTextureChannelTypeUBNormNoZero: - // return VK_FORMAT_R8G8B8A8_UNORM; - // case kTextureChannelTypeUBInt: - // return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; - // case kTextureChannelTypeUBScaled: - // return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; - - default: - break; - } - break; - - case kSurfaceFormatBc1: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; - case kTextureChannelTypeSrgb: - return VK_FORMAT_BC1_RGBA_SRGB_BLOCK; - default: - break; - } - break; - - case kSurfaceFormatBc2: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_BC2_UNORM_BLOCK; - case kTextureChannelTypeSrgb: - return VK_FORMAT_BC2_SRGB_BLOCK; - default: - break; - } - break; - - case kSurfaceFormatBc3: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_BC3_UNORM_BLOCK; - case kTextureChannelTypeSrgb: - return VK_FORMAT_BC3_SRGB_BLOCK; - default: - break; - } - break; - - case kSurfaceFormatBc4: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_BC4_UNORM_BLOCK; - - case kTextureChannelTypeSNorm: - return VK_FORMAT_BC4_SNORM_BLOCK; - - default: - break; - } - break; - case kSurfaceFormatBc5: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_BC5_UNORM_BLOCK; - - case kTextureChannelTypeSNorm: - return VK_FORMAT_BC5_SNORM_BLOCK; - - default: - break; - } - break; - - case kSurfaceFormatBc6: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_BC6H_UFLOAT_BLOCK; - - case kTextureChannelTypeSNorm: - return VK_FORMAT_BC6H_SFLOAT_BLOCK; - - default: - break; - } - break; - - case kSurfaceFormatBc7: - switch (channel) { - case kTextureChannelTypeUNorm: - return VK_FORMAT_BC7_UNORM_BLOCK; - - case kTextureChannelTypeSrgb: - return VK_FORMAT_BC7_SRGB_BLOCK; - - default: - break; - } - break; - - default: - break; - } - - util::unreachable("unimplemented surface format. %x.%x\n", (int)surface, - (int)channel); -} - -static VkPrimitiveTopology getVkPrimitiveType(PrimitiveType type) { - switch (type) { - case kPrimitiveTypePointList: - return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - case kPrimitiveTypeLineList: - return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; - case kPrimitiveTypeLineStrip: - return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; - case kPrimitiveTypeTriList: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - case kPrimitiveTypeTriFan: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; - case kPrimitiveTypeTriStrip: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; - case kPrimitiveTypePatch: - return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; - case kPrimitiveTypeLineListAdjacency: - return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; - case kPrimitiveTypeLineStripAdjacency: - return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; - case kPrimitiveTypeTriListAdjacency: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY; - case kPrimitiveTypeTriStripAdjacency: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; - case kPrimitiveTypeLineLoop: - return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME - - case kPrimitiveTypeRectList: - case kPrimitiveTypeQuadList: - case kPrimitiveTypeQuadStrip: - case kPrimitiveTypePolygon: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - - default: - util::unreachable(); - } -} - -static std::pair -quadListPrimConverter(std::uint64_t index) { - static constexpr int indecies[] = {0, 1, 2, 2, 3, 0}; - return {index, index / 6 + indecies[index % 6]}; -} - -static std::pair -quadStripPrimConverter(std::uint64_t index) { - static constexpr int indecies[] = {0, 1, 3, 0, 3, 2}; - return {index, (index / 6) * 4 + indecies[index % 6]}; -} - -using ConverterFn = - std::pair(std::uint64_t index); - -static ConverterFn *getPrimConverterFn(PrimitiveType primType, - std::uint32_t *count) { - switch (primType) { - case kPrimitiveTypeQuadList: - *count = *count / 4 * 6; - return quadListPrimConverter; - - case kPrimitiveTypeQuadStrip: - *count = *count / 4 * 6; - return quadStripPrimConverter; - - default: - util::unreachable(); - } -} - -static bool isPrimRequiresConversion(PrimitiveType primType) { - switch (primType) { - case kPrimitiveTypePointList: - case kPrimitiveTypeLineList: - case kPrimitiveTypeLineStrip: - case kPrimitiveTypeTriList: - case kPrimitiveTypeTriFan: - case kPrimitiveTypeTriStrip: - case kPrimitiveTypePatch: - case kPrimitiveTypeLineListAdjacency: - case kPrimitiveTypeLineStripAdjacency: - case kPrimitiveTypeTriListAdjacency: - case kPrimitiveTypeTriStripAdjacency: - return false; - case kPrimitiveTypeLineLoop: // FIXME - util::unreachable(); - return false; - - case kPrimitiveTypeRectList: - return false; // handled by geometry shader - - case kPrimitiveTypeQuadList: - case kPrimitiveTypeQuadStrip: - case kPrimitiveTypePolygon: - return true; - - default: - util::unreachable("prim type: %u\n", (unsigned)primType); - } -} - -static bool validateSpirv(const std::vector &bin) { - spv_target_env target_env = SPV_ENV_VULKAN_1_3; - spv_context spvContext = spvContextCreate(target_env); - spv_diagnostic diagnostic = nullptr; - spv_const_binary_t binary = {bin.data(), bin.size()}; - spv_result_t error = spvValidate(spvContext, &binary, &diagnostic); - if (error != 0) - spvDiagnosticPrint(diagnostic); - spvDiagnosticDestroy(diagnostic); - spvContextDestroy(spvContext); - return error == 0; -} - -static void printSpirv(const std::vector &bin) { -#ifndef NDEBUG - spv_target_env target_env = SPV_ENV_VULKAN_1_3; - spv_context spvContext = spvContextCreate(target_env); - spv_diagnostic diagnostic = nullptr; - - spv_result_t error = spvBinaryToText( - spvContext, bin.data(), bin.size(), - SPV_BINARY_TO_TEXT_OPTION_PRINT | // SPV_BINARY_TO_TEXT_OPTION_COLOR | - // SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES - // | - SPV_BINARY_TO_TEXT_OPTION_COMMENT | SPV_BINARY_TO_TEXT_OPTION_INDENT, - nullptr, &diagnostic); - - if (error != 0) { - spvDiagnosticPrint(diagnostic); - } - - spvDiagnosticDestroy(diagnostic); - spvContextDestroy(spvContext); - - if (error != 0) { - return; - } - - // spirv_cross::CompilerGLSL glsl(bin); - // spirv_cross::CompilerGLSL::Options options; - // options.version = 460; - // options.es = false; - // options.vulkan_semantics = true; - // glsl.set_common_options(options); - // std::printf("%s\n", glsl.compile().c_str()); -#endif -} - -static std::optional> -optimizeSpirv(std::span spirv) { - spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_3); - optimizer.RegisterPerformancePasses(); - optimizer.RegisterPass(spvtools::CreateSimplificationPass()); - - std::vector result; - if (optimizer.Run(spirv.data(), spirv.size(), &result)) { - return result; - } - - util::unreachable(); - return {}; -} - -static VkShaderStageFlagBits shaderStageToVk(amdgpu::shader::Stage stage) { - switch (stage) { - case amdgpu::shader::Stage::None: - break; - case amdgpu::shader::Stage::Fragment: - return VK_SHADER_STAGE_FRAGMENT_BIT; - case amdgpu::shader::Stage::Vertex: - return VK_SHADER_STAGE_VERTEX_BIT; - case amdgpu::shader::Stage::Geometry: - return VK_SHADER_STAGE_GEOMETRY_BIT; - case amdgpu::shader::Stage::Compute: - return VK_SHADER_STAGE_COMPUTE_BIT; - } - - return VK_SHADER_STAGE_ALL; -} - -static vk::MemoryResource hostVisibleMemory; -static vk::MemoryResource deviceLocalMemory; - -static vk::MemoryResource &getHostVisibleMemory() { - if (!hostVisibleMemory) { - hostVisibleMemory.initHostVisible(1024 * 1024 * 512); - } - - return hostVisibleMemory; -} - -static vk::MemoryResource &getDeviceLocalMemory() { - if (!deviceLocalMemory) { - deviceLocalMemory.initDeviceLocal(1024 * 1024 * 512); - } - - return deviceLocalMemory; -} - -static std::uint64_t nextImageId = 0; -static void saveImage(const char *name, vk::Image2D &image) { - vk::ImageRef imageRef(image); - vk::Image2D transferImage(imageRef.getWidth(), imageRef.getHeight(), - VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL | - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - - auto transferImageMemory = - vk::DeviceMemory::Allocate(transferImage.getMemoryRequirements(), - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - - transferImage.bindMemory(vk::DeviceMemoryRef{ - .deviceMemory = transferImageMemory.getHandle(), - .offset = 0, - .size = transferImageMemory.getSize(), - }); - - auto transferImageRef = vk::ImageRef(transferImage); - - auto imageSize = transferImageRef.getMemoryRequirements().size; - - auto transferBuffer = vk::Buffer::Allocate( - getHostVisibleMemory(), imageSize, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - auto taskChain = TaskChain::Create(); - auto blitTask = taskChain->add( - ProcessQueue::Graphics, - [&, transferBuffer = transferBuffer.getHandle(), - imageRef = vk::ImageRef(image)](VkCommandBuffer commandBuffer) mutable { - imageRef.transitionLayout(commandBuffer, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - VkImageBlit region{ - .srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}, - .srcOffsets = {{}, - {static_cast(imageRef.getWidth()), - static_cast(imageRef.getHeight()), 1}}, - .dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}, - .dstOffsets = {{}, - {static_cast(imageRef.getWidth()), - static_cast(imageRef.getHeight()), 1}}, - }; - - transferImageRef.transitionLayout(commandBuffer, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - vkCmdBlitImage(commandBuffer, imageRef.getHandle(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - transferImage.getHandle(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion, - VK_FILTER_NEAREST); - - transferImageRef.transitionLayout(commandBuffer, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - transferImageRef.writeToBuffer(commandBuffer, transferBuffer, - VK_IMAGE_ASPECT_COLOR_BIT); - imageRef.transitionLayout(commandBuffer, VK_IMAGE_LAYOUT_GENERAL); - }); - taskChain->add(blitTask, [&, name = std::string(name)] { - std::ofstream file(name, std::ios::out | std::ios::binary); - auto data = (unsigned int *)transferBuffer.getData(); - - file << "P6\n" - << transferImageRef.getWidth() << "\n" - << transferImageRef.getHeight() << "\n" - << 255 << "\n"; - - for (uint32_t y = 0; y < transferImageRef.getHeight(); y++) { - for (uint32_t x = 0; x < transferImageRef.getWidth(); x++) { - file.write((char *)data, 3); - data++; - } - } - }); - - taskChain->wait(); -} - -struct BufferRef { - VkBuffer buffer = VK_NULL_HANDLE; - VkDeviceSize offset = 0; - VkDeviceSize size = 0; -}; - -static constexpr bool isAligned(std::uint64_t offset, std::uint64_t alignment) { - return (offset & (alignment - 1)) == 0; -} - -static void -fillStageBindings(std::vector &bindings, - shader::Stage stage) { - for (std::size_t i = 0; i < shader::UniformBindings::kBufferSlots; ++i) { - auto binding = shader::UniformBindings::getBufferBinding(stage, i); - bindings[binding] = VkDescriptorSetLayoutBinding{ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = shaderStageToVk(stage), - .pImmutableSamplers = nullptr}; - } - - for (std::size_t i = 0; i < shader::UniformBindings::kImageSlots; ++i) { - auto binding = shader::UniformBindings::getImageBinding(stage, i); - bindings[binding] = VkDescriptorSetLayoutBinding{ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = 1, - .stageFlags = shaderStageToVk(stage), - .pImmutableSamplers = nullptr}; - } - - for (std::size_t i = 0; i < shader::UniformBindings::kSamplerSlots; ++i) { - auto binding = shader::UniformBindings::getSamplerBinding(stage, i); - bindings[binding] = VkDescriptorSetLayoutBinding{ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .descriptorCount = 1, - .stageFlags = shaderStageToVk(stage), - .pImmutableSamplers = nullptr}; - } - - for (std::size_t i = 0; i < shader::UniformBindings::kStorageImageSlots; - ++i) { - auto binding = shader::UniformBindings::getStorageImageBinding(stage, i); - bindings[binding] = VkDescriptorSetLayoutBinding{ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = shaderStageToVk(stage), - .pImmutableSamplers = nullptr}; - } -} -static std::pair getGraphicsLayout() { - static std::pair result{}; - - if (result.first != VK_NULL_HANDLE) { - return result; - } - - std::vector bindings( - shader::UniformBindings::kStageSize * 2); - - for (auto stage : {shader::Stage::Vertex, shader::Stage::Fragment}) { - fillStageBindings(bindings, stage); - } - - VkDescriptorSetLayoutCreateInfo descLayoutInfo{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }; - - Verify() << vkCreateDescriptorSetLayout(vk::g_vkDevice, &descLayoutInfo, - vk::g_vkAllocator, &result.first); - - VkPipelineLayoutCreateInfo piplineLayoutInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &result.first, - }; - - Verify() << vkCreatePipelineLayout(vk::g_vkDevice, &piplineLayoutInfo, - vk::g_vkAllocator, &result.second); - - return result; -} - -static std::pair getComputeLayout() { - static std::pair result{}; - - if (result.first != VK_NULL_HANDLE) { - return result; - } - - std::vector bindings( - shader::UniformBindings::kStageSize); - - fillStageBindings(bindings, shader::Stage::Compute); - - VkDescriptorSetLayoutCreateInfo layoutInfo{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }; - - Verify() << vkCreateDescriptorSetLayout(vk::g_vkDevice, &layoutInfo, nullptr, - &result.first); - - VkPipelineLayoutCreateInfo piplineLayoutInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &result.first, - }; - - Verify() << vkCreatePipelineLayout(vk::g_vkDevice, &piplineLayoutInfo, - vk::g_vkAllocator, &result.second); - return result; -} - -struct ShaderKey { - std::uint64_t address; - std::uint16_t dimX; - std::uint16_t dimY; - std::uint16_t dimZ; - shader::Stage stage; - std::uint8_t userSgprCount; - std::uint32_t userSgprs[16]; - - auto operator<=>(const ShaderKey &other) const { - auto result = address <=> other.address; - if (result != std::strong_ordering::equal) { - return result; - } - - result = dimX <=> other.dimX; - if (result != std::strong_ordering::equal) { - return result; - } - - result = dimY <=> other.dimY; - if (result != std::strong_ordering::equal) { - return result; - } - - result = dimZ <=> other.dimZ; - if (result != std::strong_ordering::equal) { - return result; - } - - result = stage <=> other.stage; - if (result != std::strong_ordering::equal) { - return result; - } - - result = userSgprCount <=> other.userSgprCount; - if (result != std::strong_ordering::equal) { - return result; - } - - for (std::size_t i = 0; i < std::size(userSgprs); ++i) { - if (i >= userSgprCount) { - break; - } - - result = userSgprs[i] <=> other.userSgprs[i]; - if (result != std::strong_ordering::equal) { - return result; - } - } - - return result; - } -}; - -struct CachedShader { - std::map> cachedData; - shader::Shader info; - VkShaderEXT shader; - - ~CachedShader() { - _vkDestroyShaderEXT(vk::g_vkDevice, shader, vk::g_vkAllocator); - } -}; - -struct CacheOverlayBase; -struct CacheBufferOverlay; -struct CacheImageOverlay; - -struct CacheSyncEntry { - std::uint64_t tag; - Ref overlay; - - auto operator<=>(const CacheSyncEntry &) const = default; -}; - -enum class CacheMode { None, AsyncWrite, LazyWrite }; - -struct CacheOverlayBase { - std::mutex mtx; - RemoteMemory memory; - Ref writeBackTaskCtl; - std::function unlockMutableTask; - std::uint64_t lockTag = 0; - std::uint64_t lockCount = 0; - shader::AccessOp lockOp = shader::AccessOp::None; - CacheMode cacheMode = CacheMode::None; - util::MemoryTableWithPayload syncState; - - std::atomic refs{0}; - virtual ~CacheOverlayBase() = default; - - void incRef() { refs.fetch_add(1, std::memory_order::relaxed); } - void decRef() { - if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) { - delete this; - } - } - - struct LockInfo { - bool isLocked; - shader::AccessOp prevLockOps; - }; - - LockInfo tryLock(std::uint64_t tag, shader::AccessOp op) { - std::lock_guard lock(mtx); - if (lockTag != tag && lockTag != 0) { - return {false, {}}; - } - - lockTag = tag; - ++lockCount; - auto prevLockOps = lockOp; - lockOp |= op; - return {true, prevLockOps}; - } - - void unlock(std::uint64_t tag) { - Ref waitTask; - - { - std::lock_guard lock(mtx); - if (lockTag != tag) { - util::unreachable(); - } - - if (--lockCount != 0) { - return; - } - - release(tag); - lockTag = 0; - auto result = lockOp; - lockOp = shader::AccessOp::None; - - if ((result & shader::AccessOp::Store) == shader::AccessOp::Store) { - if (unlockMutableTask) { - unlockMutableTask(); - unlockMutableTask = nullptr; - } - - if (writeBackTaskCtl) { - getCpuScheduler().enqueue(writeBackTaskCtl); - if (cacheMode == CacheMode::None) { - waitTask = std::move(writeBackTaskCtl); - writeBackTaskCtl = nullptr; - } - } - } - } - - if (waitTask) { - waitTask->wait(); - } - } - - virtual void release(std::uint64_t tag) {} - - struct SyncTag { - std::uint64_t beginAddress; - std::uint64_t endAddress; - std::uint64_t value; - }; - - std::optional getSyncTag(std::uint64_t address, std::uint64_t size) { - std::lock_guard lock(mtx); - auto it = syncState.queryArea(address); - if (it == syncState.end()) { - return {}; - } - - if (it.endAddress() < address + size || it.beginAddress() > address) { - // has no single sync state - return {}; - } - - return SyncTag{ - .beginAddress = it.beginAddress(), - .endAddress = it.endAddress(), - .value = it.get(), - }; - } - - bool isInSync(util::MemoryTableWithPayload &table, - std::mutex &tableMutex, std::uint64_t address, - std::uint64_t size) { - auto optSyncTag = getSyncTag(address, size); - if (!optSyncTag) { - return false; - } - - auto syncTag = *optSyncTag; - - std::lock_guard lock(tableMutex); - auto tableArea = table.queryArea(address); - - if (tableArea == table.end()) { - return false; - } - - if (tableArea.beginAddress() > address || - tableArea.endAddress() < address + size) { - return false; - } - - return tableArea->tag == syncTag.value; - } - - virtual void writeBuffer(TaskChain &taskChain, - Ref sourceBuffer, - std::uint64_t address, std::uint64_t size, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId) { - std::printf("cache: unimplemented buffer write to %lx-%lx\n", address, - address + size); - } - - virtual void readBuffer(TaskChain &taskChain, - Ref targetBuffer, - std::uint64_t address, std::uint64_t size, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId) { - std::printf("cache: unimplemented buffer read from %lx-%lx\n", address, - address + size); - } -}; - -struct CacheEntry { - std::uint64_t beginAddress; - std::uint64_t endAddress; - std::uint64_t tag; - Ref overlay; -}; - -struct CacheBufferOverlay : CacheOverlayBase { - vk::Buffer buffer; - std::uint64_t bufferAddress; - - void read(TaskChain &taskChain, - util::MemoryTableWithPayload &table, - std::mutex &tableMtx, std::uint64_t address, - std::uint32_t elementCount, std::uint32_t stride, - std::uint32_t elementSize, bool cache, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId, - bool tableLocked = false) { - std::lock_guard lock(mtx); - auto size = stride == 0 - ? static_cast(elementCount) * elementSize - : static_cast(elementCount) * stride; - auto doRead = [&](std::uint64_t address, std::uint64_t size, - std::uint64_t tag, Ref overlay) { - overlay->readBuffer(taskChain, this, address, size, waitTask); - syncState.map(address, address + size, tag); - }; - - auto getAreaInfo = [&](std::uint64_t address) { - if (tableLocked) { - auto it = table.queryArea(address); - if (it == table.end()) { - util::unreachable(); - } - - return CacheEntry{ - .beginAddress = it.beginAddress(), - .endAddress = it.endAddress(), - .tag = it->tag, - .overlay = it->overlay, - }; - } - - std::lock_guard lock(tableMtx); - auto it = table.queryArea(address); - if (it == table.end()) { - util::unreachable(); - } - return CacheEntry{ - .beginAddress = it.beginAddress(), - .endAddress = it.endAddress(), - .tag = it->tag, - .overlay = it->overlay, - }; - }; - - while (size > 0) { - auto state = getAreaInfo(address); - - assert(state.endAddress > address); - auto origAreaSize = std::min(state.endAddress - address, size); - auto areaSize = origAreaSize; - - if (!cache) { - state.overlay->readBuffer(taskChain, this, address, areaSize, waitTask); - size -= areaSize; - address += areaSize; - continue; - } - - while (areaSize > 0) { - auto blockSyncStateIt = syncState.queryArea(address); - - if (blockSyncStateIt == syncState.end()) { - doRead(address, areaSize, state.tag, state.overlay); - address += areaSize; - break; - } - - auto blockSize = - std::min(blockSyncStateIt.endAddress() - address, areaSize); - - if (blockSyncStateIt.get() != state.tag) { - doRead(address, areaSize, state.tag, state.overlay); - } - - areaSize -= blockSize; - address += blockSize; - } - - size -= origAreaSize; - } - } - - void readBuffer(TaskChain &taskChain, Ref targetBuffer, - std::uint64_t address, std::uint64_t size, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId) override { - auto readTask = [=, self = Ref(this)] { - auto targetOffset = address - targetBuffer->bufferAddress; - auto sourceOffset = address - self->bufferAddress; - std::memcpy((char *)targetBuffer->buffer.getData() + targetOffset, - (char *)self->buffer.getData() + sourceOffset, size); - }; - - if (size < bridge::kHostPageSize && waitTask == GpuTaskLayout::kInvalidId) { - readTask(); - } else { - taskChain.add(waitTask, std::move(readTask)); - } - } -}; - -struct CacheImageOverlay : CacheOverlayBase { - vk::Image2D image; - - vk::Buffer trasferBuffer; // TODO: remove - VkImageView view = VK_NULL_HANDLE; - std::uint32_t dataWidth; - std::uint32_t dataPitch; - std::uint32_t dataHeight; - std::uint8_t bpp; - TileMode tileMode; - VkImageAspectFlags aspect; - Ref usedBuffer; - - ~CacheImageOverlay() { - if (view != VK_NULL_HANDLE) { - vkDestroyImageView(vk::g_vkDevice, view, vk::g_vkAllocator); - } - } - - void release(std::uint64_t tag) override { - if (false && (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { - saveImage(("images/" + std::to_string(nextImageId++) + ".ppm").c_str(), - image); - } - - if (usedBuffer) { - usedBuffer->unlock(tag); - usedBuffer = nullptr; - } - } - - void read(TaskChain &taskChain, std::uint64_t address, - Ref srcBuffer, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId) { - if (usedBuffer != nullptr) { - util::unreachable(); - } - - usedBuffer = srcBuffer; - auto offset = address - srcBuffer->bufferAddress; - auto size = dataHeight * dataPitch * bpp; - - if (dataPitch == dataWidth && - (tileMode == kTileModeDisplay_2dThin || - tileMode == kTileModeDisplay_LinearAligned)) { - taskChain.add( - ProcessQueue::Graphics, waitTask, - [=, self = Ref(this)](VkCommandBuffer commandBuffer) { - vk::ImageRef imageRef(self->image); - imageRef.transitionLayout(commandBuffer, VK_IMAGE_LAYOUT_GENERAL); - - VkBufferImageCopy region{ - .bufferOffset = offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = - { - .aspectMask = self->aspect, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {imageRef.getWidth(), imageRef.getHeight(), 1}, - }; - - vkCmdCopyBufferToImage(commandBuffer, srcBuffer->buffer.getHandle(), - self->image.getHandle(), - VK_IMAGE_LAYOUT_GENERAL, 1, ®ion); - auto tag = *srcBuffer->getSyncTag(address, size); - std::lock_guard lock(self->mtx); - self->syncState.map(address, address + size, tag.value); - }); - - return; - } - - auto transferBufferReadId = taskChain.add(waitTask, [=, self = Ref(this)] { - auto bufferData = (char *)srcBuffer->buffer.getData() + offset; - - self->trasferBuffer.readFromImage(bufferData, self->bpp, self->tileMode, - self->dataWidth, self->dataHeight, 1, - self->dataPitch); - }); - - taskChain.add( - ProcessQueue::Graphics, transferBufferReadId, - [=, self = Ref(this)](VkCommandBuffer commandBuffer) { - vk::ImageRef imageRef(self->image); - imageRef.transitionLayout(commandBuffer, VK_IMAGE_LAYOUT_GENERAL); - imageRef.readFromBuffer( - commandBuffer, self->trasferBuffer.getHandle(), self->aspect); - - auto tag = *srcBuffer->getSyncTag(address, size); - std::lock_guard lock(self->mtx); - self->syncState.map(address, address + size, tag.value); - }); - } - - void readBuffer(TaskChain &taskChain, Ref targetBuffer, - std::uint64_t address, std::uint64_t size, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId) override { - auto offset = address - targetBuffer->bufferAddress; - - if (dataPitch == dataWidth && - (tileMode == kTileModeDisplay_2dThin || - tileMode == kTileModeDisplay_LinearAligned)) { - auto linearReadTask = [=, - self = Ref(this)](VkCommandBuffer commandBuffer) { - vk::ImageRef imageRef(self->image); - imageRef.transitionLayout(commandBuffer, VK_IMAGE_LAYOUT_GENERAL); - - VkBufferImageCopy region{ - .bufferOffset = offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = - { - .aspectMask = self->aspect, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {imageRef.getWidth(), imageRef.getHeight(), - imageRef.getDepth()}, - }; - - vkCmdCopyImageToBuffer(commandBuffer, imageRef.getHandle(), - VK_IMAGE_LAYOUT_GENERAL, - targetBuffer->buffer.getHandle(), 1, ®ion); - }; - taskChain.add(ProcessQueue::Graphics, waitTask, - std::move(linearReadTask)); - return; - } - - auto writeToTransferBufferTask = taskChain.add( - ProcessQueue::Graphics, waitTask, - [=, self = Ref(this)](VkCommandBuffer commandBuffer) { - vk::ImageRef imageRef(self->image); - imageRef.writeToBuffer(commandBuffer, self->trasferBuffer.getHandle(), - self->aspect); - }); - - taskChain.add(writeToTransferBufferTask, [=, self = Ref(this)] { - auto targetData = (char *)targetBuffer->buffer.getData() + offset; - self->trasferBuffer.writeAsImageTo(targetData, self->bpp, self->tileMode, - self->dataWidth, self->dataHeight, 1, - self->dataPitch); - }); - } -}; - -struct MemoryOverlay : CacheOverlayBase { - void readBuffer(TaskChain &taskChain, Ref targetBuffer, - std::uint64_t address, std::uint64_t size, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId) override { - auto readTask = [=, this] { - auto offset = address - targetBuffer->bufferAddress; - auto targetData = (char *)targetBuffer->buffer.getData() + offset; - - std::memcpy(targetData, memory.getPointer(address), size); - }; - - if (size < bridge::kHostPageSize && waitTask == GpuTaskLayout::kInvalidId) { - readTask(); - } else { - taskChain.add(waitTask, std::move(readTask)); - } - } - - void - writeBuffer(TaskChain &taskChain, Ref sourceBuffer, - std::uint64_t address, std::uint64_t size, - std::uint64_t waitTask = GpuTaskLayout::kInvalidId) override { - auto writeTask = [=, this] { - auto offset = address - sourceBuffer->bufferAddress; - auto sourceData = (char *)sourceBuffer->buffer.getData() + offset; - - std::memcpy(memory.getPointer(address), sourceData, size); - }; - - if (size < bridge::kHostPageSize && waitTask == GpuTaskLayout::kInvalidId) { - writeTask(); - } else { - taskChain.add(waitTask, std::move(writeTask)); - } - } -}; - -static void notifyPageChanges(int vmId, std::uint32_t firstPage, - std::uint32_t pageCount) { - std::uint64_t command = - (static_cast(pageCount - 1) << 32) | firstPage; - - while (true) { - for (std::size_t i = 0; i < std::size(g_bridge->cacheCommands); ++i) { - std::uint64_t expCommand = 0; - if (g_bridge->cacheCommands[vmId][i].compare_exchange_strong( - expCommand, command, std::memory_order::acquire, - std::memory_order::relaxed)) { - return; - } - } - } -} - -static void modifyWatchFlags(int vmId, std::uint64_t address, - std::uint64_t size, std::uint8_t addFlags, - std::uint8_t removeFlags) { - auto firstPage = address / bridge::kHostPageSize; - auto lastPage = - (address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize; - bool hasChanges = false; - for (auto page = firstPage; page < lastPage; ++page) { - auto prevValue = - g_bridge->cachePages[vmId][page].load(std::memory_order::relaxed); - auto newValue = (prevValue & ~removeFlags) | addFlags; - - if (newValue == prevValue) { - continue; - } - - while (!g_bridge->cachePages[vmId][page].compare_exchange_weak( - prevValue, newValue, std::memory_order::relaxed)) { - newValue = (prevValue & ~removeFlags) | addFlags; - } - - if (newValue != prevValue) { - hasChanges = true; - } - } - - if (hasChanges) { - notifyPageChanges(vmId, firstPage, lastPage - firstPage); - } -} - -static void watchWrites(int vmId, std::uint64_t address, std::uint64_t size) { - modifyWatchFlags(vmId, address, size, bridge::kPageWriteWatch, - bridge::kPageInvalidated); -} -static void lockReadWrite(int vmId, std::uint64_t address, std::uint64_t size, - bool isLazy) { - modifyWatchFlags(vmId, address, size, - bridge::kPageReadWriteLock | - (isLazy ? bridge::kPageLazyLock : 0), - bridge::kPageInvalidated); -} -static void unlockReadWrite(int vmId, std::uint64_t address, - std::uint64_t size) { - modifyWatchFlags(vmId, address, size, bridge::kPageWriteWatch, - bridge::kPageReadWriteLock | bridge::kPageLazyLock); -} - -struct CacheLine { - std::uint64_t areaAddress; - std::uint64_t areaSize; - - Ref memoryOverlay; - - // TODO: flat image storage - struct ImageKey { - std::uint64_t address; - SurfaceFormat dataFormat; - TextureChannelType channelType; - TileMode tileMode; - std::uint32_t width; - std::uint32_t height; - std::uint32_t depth; - std::uint32_t pitch; - bool isStorage; - - auto operator<=>(const ImageKey &other) const = default; - }; - - RemoteMemory memory; - std::mutex hostSyncMtx; - util::MemoryTableWithPayload hostSyncTable; - - std::mutex bufferTableMtx; - std::unordered_map>> - bufferTable; - - std::mutex imageTableMtx; - std::map> imageTable; - - std::mutex writeBackTableMtx; - util::MemoryTableWithPayload> writeBackTable; - - CacheLine(RemoteMemory memory, std::uint64_t areaAddress, - std::uint64_t areaSize) - : memory(memory), areaAddress(areaAddress), areaSize(areaSize) { - memoryOverlay = new MemoryOverlay(); - memoryOverlay->memory = memory; - hostSyncTable.map(areaAddress, areaAddress + areaSize, {1, memoryOverlay}); - } - - void markHostInvalidated(std::uint64_t tag, std::uint64_t address, - std::uint64_t size) { - std::scoped_lock lock(hostSyncMtx, memoryOverlay->mtx); - - hostSyncTable.map(address, address + size, {tag, memoryOverlay}); - memoryOverlay->syncState.map(address, address + size, tag); - } - - bool handleHostInvalidations(std::uint64_t tag, std::uint64_t address, - std::uint64_t size) { - auto firstPage = address / bridge::kHostPageSize; - auto lastPage = - (address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize; - - bool hasInvalidations = false; - - for (auto page = firstPage; page < lastPage; ++page) { - auto prevValue = g_bridge->cachePages[memory.vmId][page].load( - std::memory_order::relaxed); - - if (~prevValue & bridge::kPageInvalidated) { - continue; - } - - while (!g_bridge->cachePages[memory.vmId][page].compare_exchange_weak( - prevValue, prevValue & ~bridge::kPageInvalidated, - std::memory_order::relaxed)) { - } - - markHostInvalidated(tag, page * bridge::kHostPageSize, - bridge::kHostPageSize); - hasInvalidations = true; - } - - return hasInvalidations; - } - - void trackCacheRead(std::uint64_t address, std::uint64_t size) { - watchWrites(memory.vmId, address, size); - } - - void setWriteBackTask(std::uint64_t address, std::uint64_t size, - Ref task) { - std::lock_guard lock(writeBackTableMtx); - auto it = writeBackTable.queryArea(address); - - while (it != writeBackTable.end()) { - if (it.beginAddress() >= address + size) { - break; - } - - auto task = it.get(); - - if (it.beginAddress() >= address && it.endAddress() <= address + size) { - if (task != nullptr) { - // another task with smaller range already in progress, we can - // cancel it - - // std::printf("prev upload task cancelation\n"); - task->cancel(); - } - } - - if (task != nullptr) { - task->wait(); - } - - ++it; - } - - writeBackTable.map(address, address + size, std::move(task)); - } - - std::atomic writeBackTag{1}; - - void lazyMemoryUpdate(std::uint64_t tag, std::uint64_t address) { - // std::printf("memory lazy update, address %lx\n", address); - - std::size_t beginAddress; - std::size_t areaSize; - { - std::lock_guard lock(hostSyncMtx); - auto it = hostSyncTable.queryArea(address); - - if (it == hostSyncTable.end()) { - util::unreachable(); - } - - beginAddress = it.beginAddress(); - areaSize = it.size(); - } - - auto updateTaskChain = TaskChain::Create(); - auto uploadBuffer = getBuffer(tag, *updateTaskChain.get(), beginAddress, - areaSize, 1, 1, shader::AccessOp::Load); - memoryOverlay->writeBuffer(*updateTaskChain.get(), uploadBuffer, - beginAddress, areaSize); - updateTaskChain->wait(); - uploadBuffer->unlock(tag); - unlockReadWrite(memory.vmId, beginAddress, areaSize); - // std::printf("memory lazy update, %lx finish\n", address); - } - - void trackCacheWrite(std::uint64_t address, std::uint64_t size, - std::uint64_t tag, Ref entry) { - - entry->unlockMutableTask = [=, this] { - if (entry->cacheMode != CacheMode::None) { - lockReadWrite(memory.vmId, address, size, - entry->cacheMode == CacheMode::LazyWrite); - entry->syncState.map(address, address + size, tag); - - std::lock_guard lock(hostSyncMtx); - hostSyncTable.map(address, address + size, - {.tag = tag, .overlay = entry}); - } else { - std::lock_guard lock(hostSyncMtx); - hostSyncTable.map(address, address + size, - {.tag = tag, .overlay = memoryOverlay}); - } - }; - - if (entry->cacheMode != CacheMode::LazyWrite) { - auto writeBackTask = createCpuTask([=, this]( - const AsyncTaskCtl &ctl) mutable { - if (ctl.isCancelRequested()) { - return TaskResult::Canceled; - } - - auto taskChain = TaskChain::Create(); - Ref uploadBuffer; - auto tag = writeBackTag.fetch_add(1, std::memory_order::relaxed); - - if (entry->cacheMode == CacheMode::None) { - uploadBuffer = static_cast(entry.get()); - if (!uploadBuffer->tryLock(tag, shader::AccessOp::None).isLocked) { - taskChain->add([&] { - return uploadBuffer->tryLock(tag, shader::AccessOp::None).isLocked - ? TaskResult::Complete - : TaskResult::Reschedule; - }); - } - } else { - uploadBuffer = getBuffer(tag, *taskChain.get(), address, size, 1, 1, - shader::AccessOp::Load); - } - taskChain->wait(); - - if (ctl.isCancelRequested()) { - uploadBuffer->unlock(tag); - return TaskResult::Canceled; - } - - memoryOverlay->writeBuffer(*taskChain.get(), uploadBuffer, address, - size); - uploadBuffer->unlock(tag); - - if (ctl.isCancelRequested()) { - return TaskResult::Canceled; - } - - taskChain->wait(); - - if (entry->cacheMode != CacheMode::None) { - unlockReadWrite(memory.vmId, address, size); - } - return TaskResult::Complete; - }); - - { - std::lock_guard lock(entry->mtx); - entry->writeBackTaskCtl = writeBackTask; - } - setWriteBackTask(address, size, std::move(writeBackTask)); - } - } - - Ref - getBuffer(std::uint64_t tag, TaskChain &initTaskSet, std::uint64_t address, - std::uint32_t elementCount, std::uint32_t stride, - std::uint32_t elementSize, shader::AccessOp access) { - auto size = stride == 0 - ? static_cast(elementCount) * elementSize - : static_cast(elementCount) * stride; - - auto result = getBufferInternal(address, size); - - if (auto [isLocked, prevLockAccess] = result->tryLock(tag, access); - isLocked) { - initLockedBuffer(result, tag, initTaskSet, address, elementCount, stride, - elementSize, access & ~prevLockAccess); - return result; - } - - auto lockTaskId = initTaskSet.createExternalTask(); - auto waitForLockTask = createCpuTask([=, this, - initTaskSet = Ref(&initTaskSet)] { - auto [isLocked, prevLockAccess] = result->tryLock(tag, access); - if (!isLocked) { - return TaskResult::Reschedule; - } - - auto initTaskChain = TaskChain::Create(); - initLockedBuffer(result, tag, *initTaskChain.get(), address, elementCount, - stride, elementSize, access & ~prevLockAccess); - - initTaskChain->wait(); - initTaskSet->notifyExternalTaskComplete(lockTaskId); - return TaskResult::Complete; - }); - - getCpuScheduler().enqueue(std::move(waitForLockTask)); - return result; - } - - Ref - getImage(std::uint64_t tag, TaskChain &initTaskChain, std::uint64_t address, - SurfaceFormat dataFormat, TextureChannelType channelType, - TileMode tileMode, std::uint32_t width, std::uint32_t height, - std::uint32_t depth, std::uint32_t pitch, int selX, int selY, - int selZ, int selW, shader::AccessOp access, bool isColor, - bool isStorage) { - auto result = getImageInternal(address, dataFormat, channelType, tileMode, - width, height, depth, pitch, selX, selY, - selZ, selW, isColor, isStorage); - - auto size = result->bpp * result->dataHeight * result->dataPitch; - - if (auto [isLocked, prevLockAccess] = result->tryLock(tag, access); - isLocked) { - initLockedImage(result, tag, initTaskChain, address, size, - access & ~prevLockAccess); - return result; - } - - auto lockTaskId = initTaskChain.createExternalTask(); - auto waitForLockTask = - createCpuTask([=, this, pipelineInitChain = Ref(&initTaskChain)] { - auto [isLocked, prevLockAccess] = result->tryLock(tag, access); - if (!isLocked) { - return TaskResult::Reschedule; - } - - auto initTaskChain = TaskChain::Create(); - initLockedImage(result, tag, *initTaskChain.get(), address, size, - access & ~prevLockAccess); - - initTaskChain->wait(); - pipelineInitChain->notifyExternalTaskComplete(lockTaskId); - return TaskResult::Complete; - }); - - getCpuScheduler().enqueue(std::move(waitForLockTask)); - return result; - } - -private: - void initLockedImage(Ref result, std::uint64_t writeTag, - TaskChain &initTaskChain, std::uint64_t address, - std::uint64_t size, shader::AccessOp access) { - auto cacheBeginAddress = - (address + bridge::kHostPageSize - 1) & ~(bridge::kHostPageSize - 1); - auto cacheEndAddress = (address + size) & ~(bridge::kHostPageSize - 1); - - if (cacheBeginAddress == cacheEndAddress) { - cacheBeginAddress = address; - cacheEndAddress = address + size; - } - - auto cacheSize = cacheEndAddress - cacheBeginAddress; - - if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) { - if (result->writeBackTaskCtl) { - result->writeBackTaskCtl->cancel(); - result->writeBackTaskCtl->wait(); - } - } - - if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) { - if (handleHostInvalidations(writeTag - 1, cacheBeginAddress, cacheSize) || - !result->isInSync(hostSyncTable, hostSyncMtx, address, size)) { - auto buffer = getBuffer(writeTag, initTaskChain, address, size, 0, 1, - shader::AccessOp::Load); - auto bufferInitTask = initTaskChain.getLastTaskId(); - - result->read(initTaskChain, address, std::move(buffer), bufferInitTask); - trackCacheRead(cacheBeginAddress, cacheSize); - } - } - - if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) { - trackCacheWrite(address, size, writeTag, result); - } - } - - void initLockedBuffer(Ref result, std::uint64_t writeTag, - TaskChain &readTaskSet, std::uint64_t address, - std::uint32_t elementCount, std::uint32_t stride, - std::uint32_t elementSize, shader::AccessOp access) { - auto size = stride == 0 - ? static_cast(elementCount) * elementSize - : static_cast(elementCount) * stride; - - if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) { - if (result->writeBackTaskCtl) { - result->writeBackTaskCtl->cancel(); - result->writeBackTaskCtl->wait(); - } - } - - if ((access & shader::AccessOp::Load) == shader::AccessOp::Load) { - if (result->cacheMode == CacheMode::None || - handleHostInvalidations(writeTag - 1, address, size) || - !result->isInSync(hostSyncTable, hostSyncMtx, address, size)) { - result->read(readTaskSet, hostSyncTable, hostSyncMtx, address, - elementCount, stride, elementSize, - result->cacheMode != CacheMode::None); - - if (result->cacheMode != CacheMode::None) { - // std::printf("caching %lx-%lx\n", address, size); - trackCacheRead(address, size); - } - } - } - - if ((access & shader::AccessOp::Store) == shader::AccessOp::Store) { - trackCacheWrite(address, size, writeTag, result); - } - } - - Ref getBufferInternal(std::uint64_t address, - std::uint64_t size) { - auto alignment = - vk::g_physicalDeviceProperties.limits.minStorageBufferOffsetAlignment; - - if (address + size > areaAddress + areaSize) { - util::unreachable(); - } - - auto offset = (address - areaAddress) & (alignment - 1); - - std::lock_guard lock(bufferTableMtx); - auto &table = bufferTable[offset]; - - if (auto it = table.queryArea(address); it != table.end()) { - if (it.beginAddress() <= address && it.endAddress() >= address + size) { - if (!isAligned(address - it.beginAddress(), alignment)) { - util::unreachable(); - } - - return it.get(); - } - - assert(it.beginAddress() <= address); - - auto endAddress = std::max(it.endAddress(), address + size); - address = it.beginAddress(); - - while (it != table.end()) { - if (endAddress > it.endAddress()) { - auto nextIt = it; - if (++nextIt != table.end()) { - if (nextIt.beginAddress() >= endAddress) { - break; - } - endAddress = nextIt.endAddress(); - } - } - ++it; - } - - size = endAddress - address; - } - - auto bufferOverlay = new CacheBufferOverlay(); - bufferOverlay->memory = memory; - bufferOverlay->buffer = vk::Buffer::Allocate( - getHostVisibleMemory(), size, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT); - bufferOverlay->bufferAddress = address; - bufferOverlay->cacheMode = - size >= 3 * bridge::kHostPageSize - ? CacheMode::LazyWrite - : (size >= bridge::kHostPageSize ? CacheMode::AsyncWrite - : CacheMode::None); - - table.map(address, address + size, bufferOverlay); - return bufferOverlay; - } - - Ref - getImageInternal(std::uint64_t address, SurfaceFormat dataFormat, - TextureChannelType channelType, TileMode tileMode, - std::uint32_t width, std::uint32_t height, - std::uint32_t depth, std::uint32_t pitch, int selX, int selY, - int selZ, int selW, bool isColor, bool isStorage) { - ImageKey key{ - .address = address, - .dataFormat = dataFormat, - .channelType = channelType, - .tileMode = tileMode, - .width = width, - .height = height, - .depth = depth, - .pitch = pitch, - .isStorage = isStorage, - }; - - decltype(imageTable)::iterator it; - { - std::lock_guard lock(imageTableMtx); - - auto [emplacedIt, inserted] = - imageTable.try_emplace(key, Ref{}); - - if (!inserted) { - return emplacedIt->second; - } - - it = emplacedIt; - } - - std::printf( - "Image cache miss: address: %lx, dataFormat: %u, channelType: %u, " - "tileMode: %u, width: %u, height: %u, depth: %u, pitch: %u\n", - address, dataFormat, channelType, tileMode, width, height, depth, - pitch); - - auto colorFormat = surfaceFormatToVkFormat(dataFormat, channelType); - auto usage = - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; - - if (isStorage) { - usage |= VK_IMAGE_USAGE_STORAGE_BIT; - } else { - usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - } - - bool isCompressed = - dataFormat == kSurfaceFormatBc1 || dataFormat == kSurfaceFormatBc2 || - dataFormat == kSurfaceFormatBc3 || dataFormat == kSurfaceFormatBc4 || - dataFormat == kSurfaceFormatBc5 || dataFormat == kSurfaceFormatBc6 || - dataFormat == kSurfaceFormatBc7; - if (!isCompressed) { - if (isColor) { - usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - } else { - usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - } - } - - if (isStorage) { - if (colorFormat == VK_FORMAT_R8G8B8A8_SRGB) { - colorFormat = VK_FORMAT_R8G8B8A8_UNORM; - } - } - - auto newOverlay = new CacheImageOverlay(); - newOverlay->memory = memory; - - newOverlay->image = vk::Image2D::Allocate(getDeviceLocalMemory(), width, - height, colorFormat, usage); - - auto bpp = getBitWidthOfSurfaceFormat(dataFormat) / 8; - - std::uint32_t dataWidth = width; - std::uint32_t dataPitch = pitch; - std::uint32_t dataHeight = height; - - /*if (dataFormat == kSurfaceFormatBc1) { - width = (width + 7) / 8; - height = (height + 7) / 8; - pitch = (pitch + 7) / 8; - bpp = 8; - } else */ - if (isCompressed) { - dataWidth = (width + 3) / 4; - dataPitch = (pitch + 3) / 4; - dataHeight = (height + 3) / 4; - bpp = 16; - } - - auto memSize = vk::ImageRef(newOverlay->image).getMemoryRequirements().size; - - newOverlay->trasferBuffer = vk::Buffer::Allocate( - getHostVisibleMemory(), memSize, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - - newOverlay->dataWidth = dataWidth; - newOverlay->dataPitch = dataPitch; - newOverlay->dataHeight = dataHeight; - newOverlay->bpp = bpp; - newOverlay->tileMode = tileMode; - - VkImageViewCreateInfo viewInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = newOverlay->image.getHandle(), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = colorFormat, - .components = {}, - .subresourceRange = - { - .aspectMask = static_cast( - isColor ? VK_IMAGE_ASPECT_COLOR_BIT - : VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT), - .baseMipLevel = 0, // TODO - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - - if (isColor) { - auto selToVkSwizzle = [](int sel) { - switch (sel) { - case 0: - return VK_COMPONENT_SWIZZLE_ZERO; - case 1: - return VK_COMPONENT_SWIZZLE_ONE; - case 4: - return VK_COMPONENT_SWIZZLE_R; - case 5: - return VK_COMPONENT_SWIZZLE_G; - case 6: - return VK_COMPONENT_SWIZZLE_B; - case 7: - return VK_COMPONENT_SWIZZLE_A; - } - util::unreachable("unknown channel swizzle %u\n", sel); - }; - - viewInfo.components = { - .r = selToVkSwizzle(selZ), - .g = selToVkSwizzle(selY), - .b = selToVkSwizzle(selX), - .a = selToVkSwizzle(selW), - }; - } - - Verify() << vkCreateImageView(vk::g_vkDevice, &viewInfo, nullptr, - &newOverlay->view); - - newOverlay->aspect = - isColor ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; - newOverlay->cacheMode = memSize >= bridge::kHostPageSize * 3 - ? CacheMode::LazyWrite - : CacheMode::AsyncWrite; - it->second = newOverlay; - return it->second; - } -}; - -struct Cache { - // TODO: use descriptor buffer instead - VkDescriptorPool graphicsDescriptorPool{}; - VkDescriptorPool computeDescriptorPool{}; - std::vector graphicsDecsriptorSets; - std::vector computeDecsriptorSets; - - struct DetachedImageKey { - SurfaceFormat dataFormat; - TextureChannelType channelType; - std::uint32_t width; - std::uint32_t height; - std::uint32_t depth; - - auto operator<=>(const DetachedImageKey &other) const = default; - }; - - RemoteMemory memory; - std::map samplers; - std::map> datachedImages; - std::map> cacheLines; - std::atomic nextTag{2}; - std::map> shaders; - - std::mutex mtx; - - Cache(int vmId) : memory({vmId}) { - getCpuScheduler().enqueue([this, vmId] { - auto page = - g_bridge->gpuCacheCommand[vmId].load(std::memory_order::relaxed); - if (page == 0) { - return TaskResult::Reschedule; - } - - g_bridge->gpuCacheCommand[vmId].store(0, std::memory_order::relaxed); - auto address = static_cast(page) * bridge::kHostPageSize; - - auto &line = getLine(address, bridge::kHostPageSize); - line.lazyMemoryUpdate(createTag(), address); - return TaskResult::Reschedule; - }); - } - - void clear() { - vkDestroyDescriptorPool(vk::g_vkDevice, graphicsDescriptorPool, - vk::g_vkAllocator); - vkDestroyDescriptorPool(vk::g_vkDevice, computeDescriptorPool, - vk::g_vkAllocator); - for (auto &[s, handle] : samplers) { - vkDestroySampler(vk::g_vkDevice, handle, vk::g_vkAllocator); - } - graphicsDescriptorPool = VK_NULL_HANDLE; - computeDescriptorPool = VK_NULL_HANDLE; - samplers.clear(); - - graphicsDecsriptorSets.clear(); - computeDecsriptorSets.clear(); - datachedImages.clear(); - cacheLines.clear(); - nextTag = 2; - } - - void syncLines() { - std::lock_guard lock(mtx); - - auto areas = std::exchange(memoryAreaTable[memory.vmId].invalidated, {}); - auto it = cacheLines.begin(); - - if (it == cacheLines.end()) { - return; - } - - for (auto area : areas) { - while (it->first > area) { - if (++it == cacheLines.end()) { - return; - } - } - - if (it->first == area) { - it = cacheLines.erase(it); - - if (it == cacheLines.end()) { - return; - } - } - } - } - - VkDescriptorSet getComputeDescriptorSet() { - { - std::lock_guard lock(mtx); - if (computeDescriptorPool == nullptr) { - VkDescriptorPoolSize poolSizes[]{ - { - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = shader::UniformBindings::kBufferSlots, - }, - { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = shader::UniformBindings::kImageSlots, - }, - { - .type = VK_DESCRIPTOR_TYPE_SAMPLER, - .descriptorCount = shader::UniformBindings::kSamplerSlots, - }, - { - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = shader::UniformBindings::kStorageImageSlots, - }, - }; - - VkDescriptorPoolCreateInfo info{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .maxSets = 32, - .poolSizeCount = static_cast(std::size(poolSizes)), - .pPoolSizes = poolSizes, - }; - - Verify() << vkCreateDescriptorPool( - vk::g_vkDevice, &info, vk::g_vkAllocator, &computeDescriptorPool); - } - - if (!computeDecsriptorSets.empty()) { - auto result = computeDecsriptorSets.back(); - computeDecsriptorSets.pop_back(); - return result; - } - } - - auto layout = getComputeLayout().first; - - VkDescriptorSetAllocateInfo info{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = computeDescriptorPool, - .descriptorSetCount = 1, - .pSetLayouts = &layout, - }; - - VkDescriptorSet result; - Verify() << vkAllocateDescriptorSets(vk::g_vkDevice, &info, &result); - return result; - } - - std::uint64_t createTag() { return nextTag.fetch_add(2); } - - VkSampler getSampler(const GnmSSampler &ssampler) { - std::lock_guard lock(mtx); - auto [it, inserted] = samplers.try_emplace(ssampler, VK_NULL_HANDLE); - - if (!inserted) { - return it->second; - } - - auto clampToVkAddressMode = [](int clamp) { - switch (clamp) { - case 0: - return VK_SAMPLER_ADDRESS_MODE_REPEAT; - case 1: - return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - case 2: - return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - case 4: - return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - } - return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - }; - - VkSamplerCreateInfo samplerInfo{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, - .addressModeU = clampToVkAddressMode(ssampler.clamp_x), - .addressModeV = clampToVkAddressMode(ssampler.clamp_y), - .addressModeW = clampToVkAddressMode(ssampler.clamp_z), - .mipLodBias = 0.0f, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 1.0, - .compareOp = (VkCompareOp)ssampler.depth_compare_func, - .minLod = 0.f, - .maxLod = 1.f, - .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE, - }; - - Verify() << vkCreateSampler(vk::g_vkDevice, &samplerInfo, nullptr, - &it->second); - return it->second; - } - - VkDescriptorSet getGraphicsDescriptorSet() { - { - std::lock_guard lock(mtx); - if (graphicsDescriptorPool == nullptr) { - VkDescriptorPoolSize poolSizes[]{ - { - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = shader::UniformBindings::kBufferSlots * 2, - }, - { - .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .descriptorCount = shader::UniformBindings::kImageSlots * 2, - }, - { - .type = VK_DESCRIPTOR_TYPE_SAMPLER, - .descriptorCount = shader::UniformBindings::kSamplerSlots * 2, - }, - { - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = - shader::UniformBindings::kStorageImageSlots * 2, - }, - }; - - VkDescriptorPoolCreateInfo info{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .maxSets = 32, - .poolSizeCount = static_cast(std::size(poolSizes)), - .pPoolSizes = poolSizes, - }; - - Verify() << vkCreateDescriptorPool( - vk::g_vkDevice, &info, vk::g_vkAllocator, &graphicsDescriptorPool); - } - - if (!graphicsDecsriptorSets.empty()) { - auto result = graphicsDecsriptorSets.back(); - graphicsDecsriptorSets.pop_back(); - return result; - } - } - - auto layout = getGraphicsLayout().first; - - VkDescriptorSetAllocateInfo info{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = graphicsDescriptorPool, - .descriptorSetCount = 1, - .pSetLayouts = &layout, - }; - - VkDescriptorSet result; - Verify() << vkAllocateDescriptorSets(vk::g_vkDevice, &info, &result); - return result; - } - - void releaseGraphicsDescriptorSet(VkDescriptorSet descSet) { - std::lock_guard lock(mtx); - graphicsDecsriptorSets.push_back(descSet); - } - - void releaseComputeDescriptorSet(VkDescriptorSet descSet) { - std::lock_guard lock(mtx); - computeDecsriptorSets.push_back(descSet); - } - - const CachedShader &getShader(TaskSet &taskSet, - VkDescriptorSetLayout descriptorSetLayout, - shader::Stage stage, std::uint64_t address, - std::uint32_t *userSgprs, - std::uint8_t userSgprsCount, - std::uint16_t dimX = 1, std::uint16_t dimY = 1, - std::uint16_t dimZ = 1) { - ShaderKey key{.address = address, - .dimX = dimX, - .dimY = dimY, - .dimZ = dimZ, - .stage = stage, - .userSgprCount = userSgprsCount}; - - std::memcpy(key.userSgprs, userSgprs, - userSgprsCount * sizeof(std::uint32_t)); - - decltype(shaders)::iterator it; - CachedShader *entry; - { - std::lock_guard lock(mtx); - - auto [emplacedIt, inserted] = - shaders.try_emplace(key, std::forward_list{}); - - if (!inserted) { - for (auto &shader : emplacedIt->second) { - bool isAllSame = true; - for (auto &[startAddress, bytes] : shader.cachedData) { - if (std::memcmp(memory.getPointer(startAddress), bytes.data(), - bytes.size()) != 0) { - isAllSame = false; - break; - } - } - - if (isAllSame) { - return shader; - } - } - - std::printf("cache: found shader with different data, recompiling\n"); - } - - it = emplacedIt; - entry = &it->second.emplace_front(); - } - - taskSet.append( - getCpuScheduler(), createCpuTask([=, this](const AsyncTaskCtl &) { - util::MemoryAreaTable<> dependencies; - flockfile(stdout); - auto info = shader::convert( - memory, stage, address, - std::span(userSgprs, userSgprsCount), dimX, - dimY, dimZ, dependencies); - - if (!validateSpirv(info.spirv)) { - printSpirv(info.spirv); - dumpShader(memory.getPointer(address)); - util::unreachable(); - } - - // if (auto opt = optimizeSpirv(info.spirv)) { - // info.spirv = std::move(*opt); - // } - - printSpirv(info.spirv); - funlockfile(stdout); - - for (auto [startAddress, endAddress] : dependencies) { - auto ptr = memory.getPointer(startAddress); - auto &target = entry->cachedData[startAddress]; - target.resize(endAddress - startAddress); - - // std::printf("shader dependency %lx-%lx\n", startAddress, - // endAddress); - std::memcpy(target.data(), ptr, target.size()); - } - - VkShaderCreateInfoEXT createInfo{ - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, - .flags = 0, - .stage = shaderStageToVk(stage), - .nextStage = 0, - .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT, - .codeSize = info.spirv.size() * sizeof(info.spirv[0]), - .pCode = info.spirv.data(), - .pName = "main", - .setLayoutCount = 1, - .pSetLayouts = &descriptorSetLayout, - }; - - VkShaderEXT shader; - Verify() << _vkCreateShadersEXT(vk::g_vkDevice, 1, &createInfo, - vk::g_vkAllocator, &shader); - entry->info = std::move(info); - entry->shader = shader; - })); - - return *entry; - } - - Ref - getImage(std::uint64_t tag, TaskChain &initTaskChain, std::uint64_t address, - SurfaceFormat dataFormat, TextureChannelType channelType, - TileMode tileMode, std::uint32_t width, std::uint32_t height, - std::uint32_t depth, std::uint32_t pitch, int selX, int selY, - int selZ, int selW, shader::AccessOp access, bool isColor = true, - bool isStorage = false) { - auto &line = getLine(address, pitch * height * depth); - return line.getImage(tag, initTaskChain, address, dataFormat, channelType, - tileMode, width, height, depth, pitch, selX, selY, - selZ, selW, access, isColor, isStorage); - } - - Ref - getBuffer(std::uint64_t tag, TaskChain &initTaskChain, std::uint64_t address, - std::uint32_t elementCount, std::uint32_t stride, - std::uint32_t elementSize, shader::AccessOp access) { - auto &line = getLine(address, stride != 0 ? stride * elementCount - : elementSize * elementCount); - return line.getBuffer(tag, initTaskChain, address, elementCount, stride, - elementSize, access); - } - -private: - CacheLine &getLine(std::uint64_t address, std::size_t size) { - std::lock_guard lock(mtx); - auto it = cacheLines.lower_bound(address); - - if (it == cacheLines.end() || - address >= it->second.areaAddress + it->second.areaSize || - it->second.areaAddress >= address + size) { - auto area = memoryAreaTable[memory.vmId].queryArea(address / kPageSize); - area.beginAddress *= kPageSize; - area.endAddress *= kPageSize; - - assert(address >= area.beginAddress && address + size < area.endAddress); - it = cacheLines.emplace_hint( - it, std::piecewise_construct, std::tuple{area.beginAddress}, - std::tuple{memory, area.beginAddress, area.endAddress}); - } - - return it->second; - } -}; - -static Cache &getCache(RemoteMemory memory) { - static Cache caches[6]{0, 1, 2, 3, 4, 5}; - return caches[memory.vmId]; -} - -static VkShaderEXT getPrimTypeRectGeomShader() { - static VkShaderEXT shader = VK_NULL_HANDLE; - if (shader != VK_NULL_HANDLE) { - return shader; - } - - auto layout = getGraphicsLayout().first; - VkShaderCreateInfoEXT createInfo{ - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, - .flags = 0, - .stage = VK_SHADER_STAGE_GEOMETRY_BIT, - .nextStage = 0, - .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT, - .codeSize = sizeof(spirv_rect_list_geom), - .pCode = spirv_rect_list_geom, - .pName = "main", - .setLayoutCount = 1, - .pSetLayouts = &layout, - }; - - Verify() << _vkCreateShadersEXT(vk::g_vkDevice, 1, &createInfo, - vk::g_vkAllocator, &shader); - return shader; -} - -struct GpuActionResources { - std::atomic refs{0}; - RemoteMemory memory; - // GpuTaskHandle taskHandle; - // QueueRegisters ®s; - std::uint64_t tag = getCache(memory).createTag(); - std::vector> usedImages; - std::vector> usedBuffers; - - GpuActionResources(RemoteMemory memory) : memory(memory) {} - - void release() { - for (auto image : usedImages) { - image->unlock(tag); - } - - for (auto buffer : usedBuffers) { - buffer->unlock(tag); - } - } - - void incRef() { refs.fetch_add(1, std::memory_order::relaxed); } - - void decRef() { - if (refs.fetch_sub(1, std::memory_order::relaxed) == 1) { - delete this; - } - } - - void loadShaderBindings(TaskChain &initTaskChain, VkDescriptorSet descSet, - const shader::Shader &shader) { - for (auto &uniform : shader.uniforms) { - switch (uniform.kind) { - case shader::Shader::UniformKind::Buffer: { - auto &vbuffer = *reinterpret_cast(uniform.buffer); - - auto bufferRef = getCache(memory).getBuffer( - tag, initTaskChain, vbuffer.getAddress(), vbuffer.getNumRecords(), - vbuffer.getStride(), vbuffer.getElementSize(), uniform.accessOp); - - VkDescriptorBufferInfo bufferInfo{ - .buffer = bufferRef->buffer.getHandle(), - .offset = vbuffer.getAddress() - bufferRef->bufferAddress, - .range = vbuffer.getSize(), - }; - - VkWriteDescriptorSet writeDescSet{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = descSet, - .dstBinding = uniform.binding, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .pBufferInfo = &bufferInfo, - }; - - usedBuffers.push_back(std::move(bufferRef)); - - vkUpdateDescriptorSets(vk::g_vkDevice, 1, &writeDescSet, 0, nullptr); - break; - } - - case shader::Shader::UniformKind::StorageImage: - case shader::Shader::UniformKind::Image: { - auto &tbuffer = *reinterpret_cast(uniform.buffer); - auto dataFormat = tbuffer.dfmt; - auto channelType = tbuffer.nfmt; - - // assert(tbuffer->width == tbuffer->pitch); - std::size_t width = tbuffer.width + 1; - std::size_t height = tbuffer.height + 1; - std::size_t depth = tbuffer.depth + 1; - std::size_t pitch = tbuffer.pitch + 1; - auto tileMode = (TileMode)tbuffer.tiling_idx; - - // std::printf( - // "image: mtype_L2 = %u, min_lod = %u, dfmt = %u, nfmt = %u, - // mtype01 " - // "= %u, width = %u, height = %u, perfMod = %u, interlaced = %u, " - // "dst_sel_x = %u, dst_sel_y = %u, dst_sel_z = %u, dst_sel_w = %u, - // " "base_level = %u, last_level = %u, tiling_idx = %u, pow2pad = - // %u, " "mtype2 = %u, type = %u, depth = %u, pitch = %u, base_array - // = %u, " "last_array = %u, min_lod_warn = %u, counter_bank_id = - // %u, " "LOD_hdw_cnt_en = %u\n", tbuffer.mtype_L2, tbuffer.min_lod, - // tbuffer.dfmt, tbuffer.nfmt, tbuffer.mtype01, tbuffer.width, - // tbuffer.height, tbuffer.perfMod, tbuffer.interlaced, - // tbuffer.dst_sel_x, tbuffer.dst_sel_y, tbuffer.dst_sel_z, - // tbuffer.dst_sel_w, tbuffer.base_level, tbuffer.last_level, - // tbuffer.tiling_idx, tbuffer.pow2pad, tbuffer.mtype2, - // (unsigned)tbuffer.type, tbuffer.depth, tbuffer.pitch, - // tbuffer.base_array, tbuffer.last_array, tbuffer.min_lod_warn, - // tbuffer.counter_bank_id, tbuffer.LOD_hdw_cnt_en); - - auto image = getCache(memory).getImage( - tag, initTaskChain, tbuffer.getAddress(), dataFormat, channelType, - tileMode, width, height, depth, pitch, tbuffer.dst_sel_x, - tbuffer.dst_sel_y, tbuffer.dst_sel_z, tbuffer.dst_sel_w, - uniform.accessOp, true, - uniform.kind == shader::Shader::UniformKind::StorageImage); - - VkDescriptorImageInfo imageInfo{ - .imageView = image->view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - usedImages.push_back(std::move(image)); - - VkWriteDescriptorSet writeDescSet{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = descSet, - .dstBinding = uniform.binding, - .descriptorCount = 1, - .descriptorType = - uniform.kind == shader::Shader::UniformKind::StorageImage - ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE - : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, - .pImageInfo = &imageInfo, - }; - - vkUpdateDescriptorSets(vk::g_vkDevice, 1, &writeDescSet, 0, nullptr); - break; - } - - case shader::Shader::UniformKind::Sampler: { - auto &ssampler = *reinterpret_cast(uniform.buffer); - auto sampler = getCache(memory).getSampler(ssampler); - - VkDescriptorImageInfo imageInfo{ - .sampler = sampler, - .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - }; - - VkWriteDescriptorSet writeDescSet{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = descSet, - .dstBinding = uniform.binding, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, - .pImageInfo = &imageInfo, - }; - - vkUpdateDescriptorSets(vk::g_vkDevice, 1, &writeDescSet, 0, nullptr); - break; - } - } - } - } -}; - -static void eliminateFastClear(RemoteMemory memory) { - // TODO - // util::unreachable(); -} - -static void resolve(RemoteMemory memory) { - // TODO: when texture cache will be implemented it MSAA should be done by - // GPU - util::unreachable(); - // auto srcBuffer = regs.colorBuffers[0]; - // auto dstBuffer = regs.colorBuffers[1]; - - // const auto src = memory.getPointer(srcBuffer.base); - // auto dst = memory.getPointer(dstBuffer.base); - - // if (src == nullptr || dst == nullptr) { - // return; - // } - - // std::memcpy(dst, src, regs.screenScissorH * regs.screenScissorW * 4); -} - -static void draw(RemoteMemory memory, TaskChain &taskSet, QueueRegisters ®s, - std::uint32_t count, std::uint64_t indeciesAddress, - std::uint32_t indexCount) { - if (regs.cbColorFormat == CbColorFormat::Disable) { - return; - } - - if (regs.cbColorFormat == CbColorFormat::EliminateFastClear) { - eliminateFastClear(memory); - return; - } - - if (regs.cbColorFormat == CbColorFormat::Resolve) { - resolve(memory); - return; - } - - if (regs.pgmVsAddress == 0 || regs.pgmPsAddress == 0) { - return; - } - - if (regs.cbRenderTargetMask == 0 || regs.colorBuffers[0].base == 0) { - return; - } - - auto primType = static_cast(regs.vgtPrimitiveType); - - if (primType == PrimitiveType::kPrimitiveTypeNone) { - return; - } - - regs.depthClearEnable = true; - - auto resources = Ref(new GpuActionResources(memory)); - auto &cache = getCache(memory); - - // std::printf("draw action, tag %lu\n", resources->tag); - - TaskSet shaderLoadTaskSet; - auto [desriptorSetLayout, pipelineLayout] = getGraphicsLayout(); - auto &vertexShader = cache.getShader(shaderLoadTaskSet, desriptorSetLayout, - shader::Stage::Vertex, regs.pgmVsAddress, - regs.userVsData, regs.vsUserSpgrs); - - auto &fragmentShader = cache.getShader( - shaderLoadTaskSet, desriptorSetLayout, shader::Stage::Fragment, - regs.pgmPsAddress, regs.userPsData, regs.psUserSpgrs); - - shaderLoadTaskSet.schedule(); - shaderLoadTaskSet.wait(); - - std::vector colorAttachments; - - std::vector colorBlendEnable; - std::vector colorBlendEquation; - std::vector colorWriteMask; - for (auto targetMask = regs.cbRenderTargetMask; - auto &colorBuffer : regs.colorBuffers) { - if (targetMask == 0 || colorBuffer.base == 0) { - break; - } - - auto mask = targetMask & 0xf; - - if (mask == 0) { - targetMask >>= 4; - continue; - } - targetMask >>= 4; - - shader::AccessOp access = shader::AccessOp::Load | shader::AccessOp::Store; - - auto dataFormat = (SurfaceFormat)colorBuffer.format; - auto channelType = kTextureChannelTypeSrgb; // TODO - - auto colorImage = getCache(memory).getImage( - resources->tag, taskSet, colorBuffer.base, dataFormat, channelType, - (TileMode)colorBuffer.tileModeIndex, - regs.screenScissorW + regs.screenScissorX, - regs.screenScissorH + regs.screenScissorY, 1, - regs.screenScissorW + regs.screenScissorX, 4, 5, 6, 7, access); - - colorAttachments.push_back({ - .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, - .imageView = colorImage->view, - .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - }); - - resources->usedImages.push_back(std::move(colorImage)); - - colorBlendEnable.push_back(regs.blendEnable ? VK_TRUE : VK_FALSE); - colorBlendEquation.push_back(VkColorBlendEquationEXT{ - .srcColorBlendFactor = - blendMultiplierToVkBlendFactor(regs.blendColorSrc), - .dstColorBlendFactor = - blendMultiplierToVkBlendFactor(regs.blendColorDst), - .colorBlendOp = blendFuncToVkBlendOp(regs.blendColorFn), - .srcAlphaBlendFactor = - regs.blendSeparateAlpha - ? blendMultiplierToVkBlendFactor(regs.blendAlphaSrc) - : blendMultiplierToVkBlendFactor(regs.blendColorSrc), - .dstAlphaBlendFactor = - regs.blendSeparateAlpha - ? blendMultiplierToVkBlendFactor(regs.blendAlphaDst) - : blendMultiplierToVkBlendFactor(regs.blendColorDst), - .alphaBlendOp = regs.blendSeparateAlpha - ? blendFuncToVkBlendOp(regs.blendAlphaFn) - : blendFuncToVkBlendOp(regs.blendColorFn), - - }); - - colorWriteMask.push_back(((mask & 1) ? VK_COLOR_COMPONENT_R_BIT : 0) | - ((mask & 2) ? VK_COLOR_COMPONENT_G_BIT : 0) | - ((mask & 4) ? VK_COLOR_COMPONENT_B_BIT : 0) | - ((mask & 8) ? VK_COLOR_COMPONENT_A_BIT : 0)); - } - - auto descSet = cache.getGraphicsDescriptorSet(); - - resources->loadShaderBindings(taskSet, descSet, vertexShader.info); - resources->loadShaderBindings(taskSet, descSet, fragmentShader.info); - - shader::AccessOp depthAccess = shader::AccessOp::None; - - if (!regs.depthClearEnable && regs.zReadBase != 0) { - depthAccess |= shader::AccessOp::Load; - } - - if (regs.depthWriteEnable && regs.zWriteBase != 0) { - depthAccess |= shader::AccessOp::Store; - } - - if (regs.zReadBase != regs.zWriteBase && regs.zWriteBase) { - util::unreachable("zWriteBase = %zx, zReadBase = %zx", regs.zWriteBase, - regs.zReadBase); - } - - Ref depthImage; - VkRenderingAttachmentInfo depthAttachment; - - if (regs.depthEnable) { - depthImage = cache.getImage(resources->tag, taskSet, regs.zReadBase, - kSurfaceFormat24_8, kTextureChannelTypeUNorm, - kTileModeDisplay_LinearAligned, - regs.screenScissorW + regs.screenScissorX, - regs.screenScissorH + regs.screenScissorY, 1, - regs.screenScissorW + regs.screenScissorX, 0, 0, - 0, 0, depthAccess, false); - - depthAttachment = { - .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, - .imageView = depthImage->view, - .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, - .loadOp = !regs.depthClearEnable && regs.zReadBase - ? VK_ATTACHMENT_LOAD_OP_LOAD - : VK_ATTACHMENT_LOAD_OP_CLEAR, - .storeOp = regs.depthWriteEnable && regs.zWriteBase - ? VK_ATTACHMENT_STORE_OP_STORE - : VK_ATTACHMENT_STORE_OP_DONT_CARE, - .clearValue = {.depthStencil = {.depth = regs.depthClear}}, - }; - - resources->usedImages.push_back(depthImage); - } - - vk::Buffer indexBufferStorage; - BufferRef indexBuffer; - auto needConversion = isPrimRequiresConversion(primType); - VkIndexType vkIndexType = (regs.indexType & 0x1f) == 0 ? VK_INDEX_TYPE_UINT16 - : VK_INDEX_TYPE_UINT32; - - if (needConversion) { - auto indecies = memory.getPointer(indeciesAddress); - if (indecies == nullptr) { - indexCount = count; - } - - unsigned origIndexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 16 : 32; - auto converterFn = getPrimConverterFn(primType, &indexCount); - - if (indecies == nullptr) { - if (indexCount < 0x10000) { - vkIndexType = VK_INDEX_TYPE_UINT16; - } else if (indecies) { - vkIndexType = VK_INDEX_TYPE_UINT32; - } - } - - unsigned indexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 16 : 32; - auto indexBufferSize = indexSize * indexCount; - - indexBufferStorage = vk::Buffer::Allocate( - getHostVisibleMemory(), indexBufferSize, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT); - - void *data = indexBufferStorage.getData(); - - if (indecies == nullptr) { - if (indexSize == 16) { - for (std::uint32_t i = 0; i < indexCount; ++i) { - auto [dstIndex, srcIndex] = converterFn(i); - ((std::uint16_t *)data)[dstIndex] = srcIndex; - } - } else { - for (std::uint32_t i = 0; i < indexCount; ++i) { - auto [dstIndex, srcIndex] = converterFn(i); - ((std::uint32_t *)data)[dstIndex] = srcIndex; - } - } - } else { - if (indexSize == 16) { - for (std::uint32_t i = 0; i < indexCount; ++i) { - auto [dstIndex, srcIndex] = converterFn(i); - std::uint32_t origIndex = origIndexSize == 16 - ? ((std::uint16_t *)indecies)[srcIndex] - : ((std::uint32_t *)indecies)[srcIndex]; - ((std::uint16_t *)data)[dstIndex] = origIndex; - } - - } else { - for (std::uint32_t i = 0; i < indexCount; ++i) { - auto [dstIndex, srcIndex] = converterFn(i); - std::uint32_t origIndex = origIndexSize == 16 - ? ((std::uint16_t *)indecies)[srcIndex] - : ((std::uint32_t *)indecies)[srcIndex]; - ((std::uint32_t *)data)[dstIndex] = origIndex; - } - } - } - - indexBuffer = {indexBufferStorage.getHandle(), 0, indexBufferSize}; - } else if (indeciesAddress != 0) { - unsigned indexSize = vkIndexType == VK_INDEX_TYPE_UINT16 ? 2 : 4; - - auto bufferRef = - cache.getBuffer(resources->tag, taskSet, indeciesAddress, indexCount, 0, - indexSize, shader::AccessOp::Load); - indexBuffer = { - .buffer = bufferRef->buffer.getHandle(), - .offset = indeciesAddress - bufferRef->bufferAddress, - .size = static_cast(indexCount) * indexSize, - }; - - resources->usedBuffers.push_back(std::move(bufferRef)); - } - - auto drawTaskFn = [colorAttachments = std::move(colorAttachments), - colorBlendEnable = std::move(colorBlendEnable), - colorBlendEquation = std::move(colorBlendEquation), - pipelineLayout, colorWriteMask = std::move(colorWriteMask), - vertexShader = vertexShader.shader, - fragmentShader = fragmentShader.shader, depthAttachment, - loadTaskSet = std::move(shaderLoadTaskSet), primType, - vkIndexType, indexBuffer, count, indexCount, descSet, - ®s](VkCommandBuffer drawCommandBuffer) { - VkRenderingInfo renderInfo{ - .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, - .renderArea = - { - .offset = {.x = static_cast(regs.screenScissorX), - .y = static_cast(regs.screenScissorY)}, - .extent = - { - .width = regs.screenScissorW, - .height = regs.screenScissorH, - }, - }, - .layerCount = 1, - .colorAttachmentCount = static_cast(colorAttachments.size()), - .pColorAttachments = colorAttachments.data(), - .pDepthAttachment = regs.depthEnable ? &depthAttachment : nullptr, - .pStencilAttachment = regs.depthEnable ? &depthAttachment : nullptr, - }; - - vkCmdBeginRendering(drawCommandBuffer, &renderInfo); - - // std::printf("viewport: %ux%u, %ux%u\n", regs.screenScissorX, - // regs.screenScissorY, regs.screenScissorW, - // regs.screenScissorH); - VkViewport viewport{}; - viewport.x = regs.screenScissorX; - viewport.y = (float)regs.screenScissorH - regs.screenScissorY; - viewport.width = regs.screenScissorW; - viewport.height = -(float)regs.screenScissorH; - viewport.minDepth = -1.0f; - viewport.maxDepth = 1.0f; - vkCmdSetViewport(drawCommandBuffer, 0, 1, &viewport); - - VkRect2D scissor{}; - scissor.extent.width = regs.screenScissorW; - scissor.extent.height = regs.screenScissorH; - scissor.offset.x = regs.screenScissorX; - scissor.offset.y = regs.screenScissorY; - vkCmdSetScissor(drawCommandBuffer, 0, 1, &scissor); - - _vkCmdSetColorBlendEnableEXT(drawCommandBuffer, 0, colorBlendEnable.size(), - colorBlendEnable.data()); - _vkCmdSetColorBlendEquationEXT(drawCommandBuffer, 0, - colorBlendEquation.size(), - colorBlendEquation.data()); - - _vkCmdSetDepthClampEnableEXT(drawCommandBuffer, VK_TRUE); - vkCmdSetDepthCompareOp(drawCommandBuffer, (VkCompareOp)regs.zFunc); - vkCmdSetDepthTestEnable(drawCommandBuffer, - regs.depthEnable ? VK_TRUE : VK_FALSE); - vkCmdSetDepthWriteEnable(drawCommandBuffer, - regs.depthWriteEnable ? VK_TRUE : VK_FALSE); - vkCmdSetDepthBounds(drawCommandBuffer, -1.f, 1.f); - vkCmdSetDepthBoundsTestEnable(drawCommandBuffer, - regs.depthBoundsEnable ? VK_TRUE : VK_FALSE); - vkCmdSetStencilOp(drawCommandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, - VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, - VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS); - - // VkDeviceSize strides = 0; - // vkCmdBindVertexBuffers2EXT(drawCommandBuffer, 0, 0, nullptr, nullptr, - // nullptr, &strides); - vkCmdSetRasterizerDiscardEnable(drawCommandBuffer, VK_FALSE); - vkCmdSetDepthBiasEnable(drawCommandBuffer, VK_TRUE); - vkCmdSetDepthBias(drawCommandBuffer, 0, 1, 1); - vkCmdSetPrimitiveRestartEnable(drawCommandBuffer, VK_FALSE); - - _vkCmdSetLogicOpEnableEXT(drawCommandBuffer, VK_FALSE); - _vkCmdSetLogicOpEXT(drawCommandBuffer, VK_LOGIC_OP_AND); - _vkCmdSetPolygonModeEXT(drawCommandBuffer, VK_POLYGON_MODE_FILL); - _vkCmdSetRasterizationSamplesEXT(drawCommandBuffer, VK_SAMPLE_COUNT_1_BIT); - VkSampleMask sampleMask = ~0; - _vkCmdSetSampleMaskEXT(drawCommandBuffer, VK_SAMPLE_COUNT_1_BIT, - &sampleMask); - _vkCmdSetTessellationDomainOriginEXT( - drawCommandBuffer, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT); - _vkCmdSetAlphaToCoverageEnableEXT(drawCommandBuffer, VK_FALSE); - _vkCmdSetVertexInputEXT(drawCommandBuffer, 0, nullptr, 0, nullptr); - _vkCmdSetColorWriteMaskEXT(drawCommandBuffer, 0, colorWriteMask.size(), - colorWriteMask.data()); - - vkCmdSetStencilCompareMask(drawCommandBuffer, - VK_STENCIL_FACE_FRONT_AND_BACK, 0); - vkCmdSetStencilWriteMask(drawCommandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, - 0); - vkCmdSetStencilReference(drawCommandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, - 0); - - vkCmdSetCullMode( - drawCommandBuffer, - (regs.cullBack ? VK_CULL_MODE_BACK_BIT : VK_CULL_MODE_NONE) | - (regs.cullFront ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_NONE)); - vkCmdSetFrontFace(drawCommandBuffer, regs.face - ? VK_FRONT_FACE_CLOCKWISE - : VK_FRONT_FACE_COUNTER_CLOCKWISE); - - vkCmdSetPrimitiveTopology(drawCommandBuffer, getVkPrimitiveType(primType)); - vkCmdSetStencilTestEnable(drawCommandBuffer, VK_FALSE); - - vkCmdBindDescriptorSets(drawCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipelineLayout, 0, 1, &descSet, 0, nullptr); - - VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_VERTEX_BIT, - VK_SHADER_STAGE_GEOMETRY_BIT, - VK_SHADER_STAGE_FRAGMENT_BIT}; - VkShaderEXT shaders[]{vertexShader, VK_NULL_HANDLE, fragmentShader}; - - if (primType == kPrimitiveTypeRectList) { - shaders[1] = getPrimTypeRectGeomShader(); - } - _vkCmdBindShadersEXT(drawCommandBuffer, std::size(stages), stages, shaders); - - if (indexBuffer.buffer == nullptr) { - vkCmdDraw(drawCommandBuffer, count, 1, 0, 0); - } else { - vkCmdBindIndexBuffer(drawCommandBuffer, indexBuffer.buffer, - indexBuffer.offset, vkIndexType); - vkCmdDrawIndexed(drawCommandBuffer, indexCount, 1, 0, 0, 0); - } - - vkCmdEndRendering(drawCommandBuffer); - }; - - auto drawTaskId = taskSet.add(ProcessQueue::Graphics, taskSet.getLastTaskId(), - std::move(drawTaskFn)); - - taskSet.add(drawTaskId, [=] { - // std::printf("releasing draw action, tag %lu\n", resources->tag); - getCache(memory).releaseGraphicsDescriptorSet(descSet); - resources->release(); - }); - - taskSet.wait(); -} - -static void dispatch(RemoteMemory memory, TaskChain &taskSet, - QueueRegisters ®s, std::size_t dimX, std::size_t dimY, - std::size_t dimZ) { - if (regs.pgmComputeAddress == 0) { - std::fprintf(stderr, "attempt to invoke dispatch without compute shader\n"); - return; - } - - auto resources = Ref(new GpuActionResources(memory)); - auto &cache = getCache(memory); - auto descSet = cache.getComputeDescriptorSet(); - - // std::printf("dispatch action, tag %lu\n", resources->tag); - - auto [desriptorSetLayout, pipelineLayout] = getComputeLayout(); - - TaskSet loadShaderTaskSet; - - auto &computeShader = cache.getShader( - loadShaderTaskSet, desriptorSetLayout, shader::Stage::Compute, - regs.pgmComputeAddress, regs.userComputeData, regs.computeUserSpgrs, - regs.computeNumThreadX, regs.computeNumThreadY, regs.computeNumThreadZ); - - loadShaderTaskSet.schedule(); - loadShaderTaskSet.wait(); - - resources->loadShaderBindings(taskSet, descSet, computeShader.info); - - auto dispatchTaskFn = - [=, shader = computeShader.shader](VkCommandBuffer commandBuffer) { - VkShaderStageFlagBits stages[]{VK_SHADER_STAGE_COMPUTE_BIT}; - _vkCmdBindShadersEXT(commandBuffer, 1, stages, &shader); - vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, - pipelineLayout, 0, 1, &descSet, 0, nullptr); - - vkCmdDispatch(commandBuffer, dimX, dimY, dimZ); - }; - - auto computeTaskId = - taskSet.add(ProcessQueue::Compute, taskSet.getLastTaskId(), - std::move(dispatchTaskFn)); - - taskSet.add(computeTaskId, [=] { - // std::printf("releasing dispatch action, tag %lu\n", resources->tag); - getCache(memory).releaseComputeDescriptorSet(descSet); - resources->release(); - }); -} - -enum class EventWriteSource : std::uint8_t { - Immediate32 = 0x1, - Immediate64 = 0x2, - GlobalClockCounter = 0x3, - GpuCoreClockCounter = 0x4, -}; - -struct EopData { - std::uint32_t eventType; - std::uint32_t eventIndex; - std::uint64_t address; - std::uint64_t value; - std::uint8_t dstSel; - std::uint8_t intSel; - EventWriteSource eventSource; -}; - -static std::uint64_t globalClock() { - // TODO - return 0x0; -} - -static std::uint64_t gpuCoreClock() { - // TODO - return 0x0; -} - -static void writeEop(RemoteMemory memory, EopData data) { - // std::printf("write eop: dstSel=%x, intSel=%x,eventIndex=%x, address = - // %#lx, - // " - // "value = %#lx, %x\n", - // data.dstSel, data.intSel, data.eventIndex, data.address, - // data.value, (unsigned)data.eventSource); - switch (data.eventSource) { - case EventWriteSource::Immediate32: { - *memory.getPointer(data.address) = data.value; - break; - } - case EventWriteSource::Immediate64: { - *memory.getPointer(data.address) = data.value; - break; - } - case EventWriteSource::GlobalClockCounter: { - *memory.getPointer(data.address) = globalClock(); - break; - } - case EventWriteSource::GpuCoreClockCounter: { - *memory.getPointer(data.address) = gpuCoreClock(); - break; - } - } -} - -static void drawIndexAuto(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, std::uint32_t count) { - draw(memory, waitTaskSet, regs, count, 0, 0); -} - -static void drawIndex2(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, std::uint32_t maxSize, - std::uint64_t address, std::uint32_t count) { - draw(memory, waitTaskSet, regs, count, address, maxSize); -} - -struct Queue { - Scheduler sched{1}; - QueueRegisters regs; - std::mutex mtx; - - struct CommandBuffer { - std::span commands; - }; - - std::deque commandBuffers; -}; - -static void handleCommandBuffer(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span &packets); -static void handleLoadConstRam(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - std::uint64_t addressLo = packet[1]; - std::uint64_t addressHi = packet[2]; - std::uint32_t numDw = getBits(packet[3], 14, 0); - std::uint32_t offset = getBits(packet[4], 15, 0); - auto address = addressLo | (addressHi << 32); -} - -static void handleSET_UCONFIG_REG(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - - std::uint32_t regId = 0xc000 + packet[1]; - - for (auto value : packet.subspan(2)) { - regs.setRegister(regId++, value); - } -} - -static void handleSET_CONTEXT_REG(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - std::uint32_t regId = 0xa000 + packet[1]; - - for (auto value : packet.subspan(2)) { - regs.setRegister(regId++, value); - } -} - -static void handleSET_SH_REG(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - - std::uint32_t regId = 0x2c00 + packet[1]; - - for (auto value : packet.subspan(2)) { - regs.setRegister(regId++, value); - } -} - -static void handleDMA_DATA(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto srcAddrLo = packet[2]; - auto srcAddrHi = packet[3]; - auto dstAddrLo = packet[4]; - auto dstAddrHi = packet[5]; - - auto srcAddr = srcAddrLo | (static_cast(srcAddrHi) << 32); - auto dstAddr = dstAddrLo | (static_cast(dstAddrHi) << 32); - - // std::printf("dma data: src address %lx, dst address %lx\n", srcAddr, - // dstAddr); -} - -static void handleAQUIRE_MEM(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - // std::printf("aquire mem\n"); -} - -static void handleWRITE_DATA(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto control = packet[1]; - auto destAddrLo = packet[2]; - auto destAddrHi = packet[3]; - auto data = packet.subspan(4); - auto size = data.size(); - - // 0 - Micro Engine - ME - // 1 - Prefetch parser - PFP - // 2 - Constant engine - CE - // 3 - Dispatch engine - DE - auto engineSel = getBits(control, 31, 30); - - // wait for confirmation that write complete - auto wrConfirm = getBit(control, 20); - - // do not increment address - auto wrOneAddr = getBit(control, 16); - - // 0 - mem-mapped register - // 1 - memory sync - // 2 - tc/l2 - // 3 - gds - // 4 - reserved - // 5 - memory async - auto dstSel = getBits(control, 11, 8); - - auto memMappedRegisterAddress = getBits(destAddrLo, 15, 0); - auto memory32bit = getBits(destAddrLo, 31, 2); - auto memory64bit = getBits(destAddrLo, 31, 3); - auto gdsOffset = getBits(destAddrLo, 15, 0); - - auto address = destAddrLo | (static_cast(destAddrHi) << 32); - auto dest = memory.getPointer(address); - // std::printf("write data: address=%lx\n", address); - for (unsigned i = 0; i < size; ++i) { - dest[i] = data[i]; - } -} - -static void handleINDEX_TYPE(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - regs.indexType = packet[1]; -} - -static void handleINDEX_BASE(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - // std::printf("INDEX_BASE:\n"); - // for (auto cmd : packet) { - // std::printf(" %x\n", cmd); - // } - - std::uint64_t addressLo = packet[1] << 1; - std::uint64_t addressHi = getBits(packet[2], 15, 0); - - regs.indexBase = (addressHi << 32) | addressLo; -} - -static void handleDRAW_INDEX_AUTO(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - drawIndexAuto(memory, waitTaskSet, regs, packet[1]); -} - -static void handleDRAW_INDEX_OFFSET_2(RemoteMemory memory, - TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto maxSize = packet[1]; - auto offset = packet[2]; - auto count = packet[3]; - auto drawInitiator = packet[4]; - - drawIndex2(memory, waitTaskSet, regs, maxSize, regs.indexBase + offset, - count); -} - -static void handleDRAW_INDEX_2(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto maxSize = packet[1]; - auto address = packet[2] | (static_cast(packet[3]) << 32); - auto count = packet[4]; - - drawIndex2(memory, waitTaskSet, regs, maxSize, address, count); -} - -static void handleDISPATCH_DIRECT(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto dimX = packet[1]; - auto dimY = packet[2]; - auto dimZ = packet[3]; - - dispatch(memory, waitTaskSet, regs, dimX, dimY, dimZ); -} - -static void handleCONTEXT_CONTROL(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - // std::printf("context control\n"); -} - -static void handleCLEAR_STATE(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - // std::printf("clear state\n"); -} - -static void handleRELEASE_MEM(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto writeSource = static_cast(getBits(packet[2], 32, 29)); - auto addressLo = packet[3]; - auto addressHi = packet[4]; - auto dataLo = packet[5]; - auto dataHi = packet[6]; - - auto address = addressLo | (static_cast(addressHi) << 32); - auto data = dataLo | (static_cast(dataHi) << 32); - - // std::printf("release memory: address %lx, data %lx, source %x\n", - // address, - // data, (unsigned)writeSource); - - switch (writeSource) { - case EventWriteSource::Immediate32: { - *memory.getPointer(address) = data; - break; - } - case EventWriteSource::Immediate64: { - *memory.getPointer(address) = data; - break; - } - case EventWriteSource::GlobalClockCounter: { - *memory.getPointer(address) = globalClock(); - break; - } - case EventWriteSource::GpuCoreClockCounter: { - *memory.getPointer(address) = gpuCoreClock(); - break; - } - } -} - -static void handleEVENT_WRITE(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - // std::printf("event write\n"); -} - -static void handleINDIRECT_BUFFER_3F(RemoteMemory memory, - TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto swapFn = getBits(packet[1], 1, 0); - auto addressLo = getBits(packet[1], 31, 2) << 2; - auto addressHi = packet[2]; - auto count = getBits(packet[3], 19, 0); - auto vmid = getBits(packet[3], 31, 24); - auto address = addressLo | (static_cast(addressHi) << 32); - std::printf("indirect buffer: address=%lx, size = %x, vmid=%x\n", address, - count, vmid); - - auto commands = std::span(memory.getPointer(address), count); - - waitTaskSet.add([=, waitTaskSet = TaskChain::Create()] mutable { - while (!commands.empty()) { - handleCommandBuffer(memory, *waitTaskSet.get(), regs, commands); - waitTaskSet->wait(); - } - std::printf("indirect buffer end\n"); - std::fflush(stdout); - }); -} - -static void handleEVENT_WRITE_EOP(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - EopData eopData{}; - eopData.eventType = getBits(packet[1], 6, 0); - eopData.eventIndex = getBits(packet[1], 12, 8); - eopData.address = - packet[2] | (static_cast(getBits(packet[3], 16, 0)) << 32); - eopData.value = packet[4] | (static_cast(packet[5]) << 32); - eopData.dstSel = getBit(packet[3], 16); - eopData.intSel = getBits(packet[3], 26, 24); - eopData.eventSource = - static_cast(getBits(packet[3], 32, 29)); - writeEop(memory, eopData); -} - -static void handleEVENT_WRITE_EOS(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - std::uint32_t eventType = getBits(packet[1], 6, 0); - std::uint32_t eventIndex = getBits(packet[1], 12, 8); - std::uint64_t address = - packet[2] | (static_cast(getBits(packet[3], 16, 0)) << 32); - std::uint32_t command = getBits(packet[3], 32, 16); - // std::printf("write eos: eventType=%x, eventIndex=%x, " - // "address = %#lx, command = %#x\n", - // eventType, eventIndex, address, command); - if (command == 0x4000) { // store 32bit data - *memory.getPointer(address) = packet[4]; - } else { - util::unreachable(); - } -} - -static void handleWAIT_REG_MEM(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto function = packet[1] & 7; - auto pollAddressLo = packet[2]; - auto pollAddressHi = packet[3]; - auto reference = packet[4]; - auto mask = packet[5]; - auto pollInterval = packet[6]; - - auto pollAddress = - pollAddressLo | (static_cast(pollAddressHi) << 32); - auto pointer = memory.getPointer(pollAddress); - - auto compare = [&](std::uint32_t value, std::uint32_t reference, - int function) { - switch (function) { - case 0: - return true; - case 1: - return value < reference; - case 2: - return value <= reference; - case 3: - return value == reference; - case 4: - return value != reference; - case 5: - return value >= reference; - case 6: - return value > reference; - } - - util::unreachable(); - }; - - // std::printf(" polling address %lx, reference = %x, mask = %x, " - // "function = %u, " - // "interval = %x, value = %x\n", - // pollAddress, reference, mask, function, pollInterval, - // *pointer & mask); - // std::fflush(stdout); - - reference &= mask; - - waitTaskSet.add([=] { - while (true) { - auto value = *pointer & mask; - if (compare(value, reference, function)) { - return; - } - } - }); -} - -static void handleNOP(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, std::span packet) {} - -static void handleUnknownCommand(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet) { - auto op = getBits(packet[0], 15, 8); - auto len = getBits(packet[0], 29, 16) + 1; - // std::printf("unimplemented packet: op=%s, len=%x\n", - // opcodeToString(op).c_str(), len); -} - -using CommandHandler = void (*)(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span packet); -static auto g_commandHandlers = [] { - std::array handlers; - handlers.fill(handleUnknownCommand); - handlers[kOpcodeNOP] = handleNOP; - - handlers[kOpcodeCLEAR_STATE] = handleCLEAR_STATE; - handlers[kOpcodeDISPATCH_DIRECT] = handleDISPATCH_DIRECT; - handlers[kOpcodeINDEX_BASE] = handleINDEX_BASE; - handlers[kOpcodeDRAW_INDEX_2] = handleDRAW_INDEX_2; - handlers[kOpcodeCONTEXT_CONTROL] = handleCONTEXT_CONTROL; - handlers[kOpcodeINDEX_TYPE] = handleINDEX_TYPE; - handlers[kOpcodeDRAW_INDEX_AUTO] = handleDRAW_INDEX_AUTO; - handlers[kOpcodeDRAW_INDEX_OFFSET_2] = handleDRAW_INDEX_OFFSET_2; - - handlers[kOpcodeWRITE_DATA] = handleWRITE_DATA; - handlers[kOpcodeWAIT_REG_MEM] = handleWAIT_REG_MEM; - handlers[kOpcodeINDIRECT_BUFFER_3F] = handleINDIRECT_BUFFER_3F; - - handlers[kOpcodeEVENT_WRITE] = handleEVENT_WRITE; - handlers[kOpcodeEVENT_WRITE_EOP] = handleEVENT_WRITE_EOP; - handlers[kOpcodeEVENT_WRITE_EOS] = handleEVENT_WRITE_EOS; - handlers[kOpcodeRELEASE_MEM] = handleRELEASE_MEM; - handlers[kOpcodeDMA_DATA] = handleDMA_DATA; - handlers[kOpcodeACQUIRE_MEM] = handleAQUIRE_MEM; - - handlers[kOpcodeSET_CONTEXT_REG] = handleSET_CONTEXT_REG; - handlers[kOpcodeSET_SH_REG] = handleSET_SH_REG; - handlers[kOpcodeSET_UCONFIG_REG] = handleSET_UCONFIG_REG; - - handlers[kOpcodeLOAD_CONST_RAM] = handleLoadConstRam; - return handlers; -}(); - -static void handleCommandBuffer(RemoteMemory memory, TaskChain &waitTaskSet, - QueueRegisters ®s, - std::span &packets) { - while (!packets.empty()) { - // std::uint64_t address = - // (char *)packets.data() - memory.shmPointer + 0x40000; - // std::fprintf(stderr, "address = %lx\n", address); - auto cmd = packets[0]; - auto type = getBits(cmd, 31, 30); - // std::printf("cmd: %x, %u\n", cmd, type); - - if (type == 3) { - // auto predicate = getBit(cmd, 0); - // auto shaderType = getBit(cmd, 1); - auto op = getBits(cmd, 15, 8); - auto len = getBits(cmd, 29, 16) + 2; - // std::printf("cmd: %s:%x, %x, %x\n", opcodeToString(op).c_str(), len, - // predicate, shaderType); - - g_commandHandlers[op](memory, waitTaskSet, regs, packets.subspan(0, len)); - packets = packets.subspan(len); - - if (!waitTaskSet.empty()) { - return; - } - - continue; - } - - if (type == 0) { - std::printf("!packet type 0!\n"); - auto baseIndex = getBits(cmd, 15, 0); - auto count = getBits(cmd, 29, 16); - std::printf("-- baseIndex=%x, count=%d\n", baseIndex, count); - packets = {}; // HACK - packets = packets.subspan(count); - continue; - } - - if (type == 2) { - // std::printf("!packet type 2!\n"); - packets = packets.subspan(1); - } - - if (type == 1) { - util::unreachable("Unexpected packet type 1!\n"); - } - } -} - -void amdgpu::device::AmdgpuDevice::handleProtectMemory(RemoteMemory memory, - std::uint64_t address, - std::uint64_t size, - std::uint32_t prot) { - auto beginPage = address / kPageSize; - auto endPage = (address + size + kPageSize - 1) / kPageSize; - - if (prot >> 4) { - memoryAreaTable[memory.vmId].map(beginPage, endPage); - const char *protStr; - switch (prot >> 4) { - case PROT_READ: - protStr = "R"; - break; - - case PROT_WRITE: - protStr = "W"; - break; - - case PROT_WRITE | PROT_READ: - protStr = "RW"; - break; - - default: - protStr = "unknown"; - break; - } - std::fprintf(stderr, "Allocated area at %zx, size %lx, prot %s, vmid %u\n", - address, size, protStr, memory.vmId); - } else { - memoryAreaTable[memory.vmId].unmap(beginPage, endPage); - std::fprintf(stderr, "Unmapped area at %zx, size %lx\n", address, size); - } -} - -static std::map queues; - -void amdgpu::device::AmdgpuDevice::handleCommandBuffer(RemoteMemory memory, - std::uint64_t queueId, - std::uint64_t address, - std::uint64_t size) { - auto count = size / sizeof(std::uint32_t); - - if (queueId == 0xc0023300) { - queueId = 0xc0023f00; - } - - auto [it, inserted] = queues.try_emplace(queueId); - - if (inserted) { - std::printf("creation queue %lx\n", queueId); - it->second.sched.enqueue([=, queue = &it->second, - initialized = false] mutable { - if (!initialized) { - initialized = true; - - if (queueId == 0xc0023f00) { - setThreadName("Graphics queue"); - } else { - setThreadName(("Compute queue" + std::to_string(queueId)).c_str()); - } - } - - Queue::CommandBuffer *commandBuffer; - { - std::lock_guard lock(queue->mtx); - if (queue->commandBuffers.empty()) { - return TaskResult::Reschedule; - } - commandBuffer = &queue->commandBuffers.front(); - } - - if (commandBuffer->commands.empty()) { - std::lock_guard lock(queue->mtx); - queue->commandBuffers.pop_front(); - return TaskResult::Reschedule; - } - - auto taskChain = TaskChain::Create(); - ::handleCommandBuffer(memory, *taskChain.get(), queue->regs, - commandBuffer->commands); - taskChain->wait(); - return TaskResult::Reschedule; - }); - } - - // std::fprintf(stderr, "address = %lx, count = %lx\n", address, count); - - std::lock_guard lock(it->second.mtx); - it->second.commandBuffers.push_back( - {.commands = - std::span(memory.getPointer(address), count)}); -} - -bool amdgpu::device::AmdgpuDevice::handleFlip( - RemoteMemory memory, VkQueue queue, VkCommandBuffer cmdBuffer, - TaskChain &taskChain, std::uint32_t bufferIndex, std::uint64_t arg, - VkImage targetImage, VkExtent2D targetExtent, VkSemaphore waitSemaphore, - VkSemaphore signalSemaphore, VkFence fence, bridge::CmdBuffer *buffers, - bridge::CmdBufferAttribute *bufferAttributes) { - - if (bufferIndex == ~static_cast(0)) { - g_bridge->flipBuffer[memory.vmId] = bufferIndex; - g_bridge->flipArg[memory.vmId] = arg; - g_bridge->flipCount[memory.vmId] = g_bridge->flipCount[memory.vmId] + 1; - - // black surface, ignore for now - return false; - } - - // std::fprintf(stderr, "device local memory: "); - // getDeviceLocalMemory().dump(); - // std::fprintf(stderr, "host visible memory: "); - // getHostVisibleMemory().dump(); - - auto buffer = buffers[bufferIndex]; - auto bufferAttr = bufferAttributes[buffer.attrId]; - - if (bufferAttr.pitch == 0 || bufferAttr.height == 0 || buffer.address == 0) { - std::printf("Attempt to flip unallocated buffer\n"); - return false; - } - - // std::fprintf(stderr, - // "flip: address=%lx, buffer=%ux%u, target=%ux%u, format = - // %x\n - // ", buffer.address, buffer.width, buffer.height, - // targetExtent.width, targetExtent.height, - // buffer.pixelFormat); - - TaskSet readTask; - TaskSet writeTask; - Ref imageRef; - - SurfaceFormat surfFormat; - TextureChannelType channelType; - - switch (bufferAttr.pixelFormat) { - case 0x80000000: - // bgra - surfFormat = kSurfaceFormat8_8_8_8; - channelType = kTextureChannelTypeSrgb; - break; - - case 0x80002200: - // rgba - surfFormat = kSurfaceFormat8_8_8_8; - channelType = kTextureChannelTypeSrgb; - break; - - case 0x88060000: - // bgra - surfFormat = kSurfaceFormat2_10_10_10; - channelType = kTextureChannelTypeSrgb; - break; - - default: - util::unreachable("unimplemented color buffer format %x", - bufferAttr.pixelFormat); - } - - auto &cache = getCache(memory); - auto tag = cache.createTag(); - - imageRef = cache.getImage( - tag, taskChain, buffer.address, surfFormat, channelType, - bufferAttr.tilingMode == 1 ? kTileModeDisplay_2dThin - : kTileModeDisplay_LinearAligned, - bufferAttr.width, bufferAttr.height, 1, bufferAttr.pitch, 4, 5, 6, 7, - shader::AccessOp::Load); - - auto initTask = taskChain.getLastTaskId(); - - auto presentTaskFn = [=](VkCommandBuffer cmdBuffer) { - transitionImageLayout(cmdBuffer, targetImage, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - VkImageBlit region{ - .srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}, - .srcOffsets = {{}, - {static_cast(bufferAttr.width), - static_cast(bufferAttr.height), 1}}, - .dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}, - .dstOffsets = {{}, - {static_cast(targetExtent.width), - static_cast(targetExtent.height), 1}}, - }; - - vkCmdBlitImage(cmdBuffer, imageRef->image.getHandle(), - VK_IMAGE_LAYOUT_GENERAL, targetImage, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion, - VK_FILTER_LINEAR); - - transitionImageLayout(cmdBuffer, targetImage, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); - }; - - auto submitCompleteTask = taskChain.createExternalTask(); - - auto submit = [=, &taskChain](VkQueue queue, VkCommandBuffer cmdBuffer) { - VkSemaphoreSubmitInfo signalSemSubmitInfos[] = { - { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = signalSemaphore, - .value = 1, - .stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, - }, - { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = taskChain.semaphore.getHandle(), - .value = submitCompleteTask, - .stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, - }, - }; - - VkSemaphoreSubmitInfo waitSemSubmitInfos[] = { - { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = waitSemaphore, - .value = 1, - .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - }, - { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = taskChain.semaphore.getHandle(), - .value = initTask, - .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - }, - }; - - VkCommandBufferSubmitInfo cmdBufferSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = cmdBuffer, - }; - - VkSubmitInfo2 submitInfo{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .waitSemaphoreInfoCount = static_cast(initTask ? 2 : 1), - .pWaitSemaphoreInfos = waitSemSubmitInfos, - .commandBufferInfoCount = 1, - .pCommandBufferInfos = &cmdBufferSubmitInfo, - .signalSemaphoreInfoCount = 2, - .pSignalSemaphoreInfos = signalSemSubmitInfos, - }; - - // vkQueueWaitIdle(queue); - Verify() << vkQueueSubmit2(queue, 1, &submitInfo, fence); - - // if (initTaskChain.semaphore.wait( - // submitCompleteTask, - // std::chrono::duration_cast( - // std::chrono::seconds(10)) - // .count())) { - // util::unreachable("gpu operation takes too long time. wait id = %lu\n", - // initTask); - // } - }; - - getGraphicsQueueScheduler().enqueue({ - .chain = Ref(&taskChain), - .waitId = initTask, - .invoke = std::move(presentTaskFn), - .submit = std::move(submit), - }); - - taskChain.add(submitCompleteTask, [=] { - imageRef->unlock(tag); - - g_bridge->flipBuffer[memory.vmId] = bufferIndex; - g_bridge->flipArg[memory.vmId] = arg; - g_bridge->flipCount[memory.vmId] = g_bridge->flipCount[memory.vmId] + 1; - auto bufferInUse = memory.getPointer( - g_bridge->bufferInUseAddress[memory.vmId]); - if (bufferInUse != nullptr) { - bufferInUse[bufferIndex] = 0; - } - }); - - taskChain.wait(); - - return true; -} - -AmdgpuDevice::AmdgpuDevice(amdgpu::bridge::BridgeHeader *bridge) { - g_bridge = bridge; -} - -AmdgpuDevice::~AmdgpuDevice() { - for (int vmid = 0; vmid < 6; ++vmid) { - getCache(RemoteMemory{vmid}).clear(); - } - - auto [gSetLayout, gPipelineLayout] = getGraphicsLayout(); - auto [cSetLayout, cPipelineLayout] = getComputeLayout(); - - vkDestroyDescriptorSetLayout(vk::g_vkDevice, gSetLayout, vk::g_vkAllocator); - vkDestroyDescriptorSetLayout(vk::g_vkDevice, cSetLayout, vk::g_vkAllocator); - - vkDestroyPipelineLayout(vk::g_vkDevice, gPipelineLayout, vk::g_vkAllocator); - vkDestroyPipelineLayout(vk::g_vkDevice, cPipelineLayout, vk::g_vkAllocator); -} diff --git a/hw/amdgpu/device/src/rect_list.geom.glsl b/hw/amdgpu/device/src/rect_list.geom.glsl deleted file mode 100644 index 287c864..0000000 --- a/hw/amdgpu/device/src/rect_list.geom.glsl +++ /dev/null @@ -1,40 +0,0 @@ -#version 450 - -layout (triangles, invocations = 1) in; -layout (triangle_strip, max_vertices = 4) out; - -void main(void) -{ - vec4 topLeft = gl_in[0].gl_Position; - vec4 right = gl_in[1].gl_Position; - vec4 bottomLeft = gl_in[2].gl_Position; - - vec4 topRight = vec4( - right.x, - topLeft.y, - topLeft.z, - topLeft.w - ); - - vec4 bottomRight = vec4( - right.x, - bottomLeft.y, - topLeft.z, - topLeft.w - ); - - - gl_Position = topLeft; - EmitVertex(); - - gl_Position = bottomLeft; - EmitVertex(); - - gl_Position = topRight; - EmitVertex(); - - gl_Position = bottomRight; - EmitVertex(); - - EndPrimitive(); -} diff --git a/hw/amdgpu/include/amdgpu/RemoteMemory.hpp b/hw/amdgpu/include/amdgpu/RemoteMemory.hpp deleted file mode 100644 index efcecfe..0000000 --- a/hw/amdgpu/include/amdgpu/RemoteMemory.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include - -namespace amdgpu { -struct RemoteMemory { - int vmId; - - template T *getPointer(std::uint64_t address) const { - return address ? reinterpret_cast( - static_cast(vmId) << 40 | address) - : nullptr; - } -}; -} // namespace amdgpu diff --git a/hw/amdgpu/include/util/SourceLocation.hpp b/hw/amdgpu/include/util/SourceLocation.hpp deleted file mode 100644 index 1275fbf..0000000 --- a/hw/amdgpu/include/util/SourceLocation.hpp +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -namespace util { -class SourceLocation { -public: - const char *mFileName = {}; - const char *mFunctionName = {}; - unsigned mLine = 0; - unsigned mColumn = 0; - -public: - constexpr SourceLocation(const char *fileName = __builtin_FILE(), - const char *functionName = __builtin_FUNCTION(), - unsigned line = __builtin_LINE(), - unsigned column = -#if __has_builtin(__builtin_COLUMN) - __builtin_COLUMN() -#else - 0 -#endif - ) noexcept - : mFileName(fileName), mFunctionName(functionName), mLine(line), - mColumn(column) { - } - - constexpr unsigned line() const noexcept { return mLine; } - constexpr unsigned column() const noexcept { return mColumn; } - constexpr const char *file_name() const noexcept { return mFileName; } - constexpr const char *function_name() const noexcept { return mFunctionName; } -}; -} // namespace util diff --git a/hw/amdgpu/include/util/Verify.hpp b/hw/amdgpu/include/util/Verify.hpp deleted file mode 100644 index ba40e82..0000000 --- a/hw/amdgpu/include/util/Verify.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include "SourceLocation.hpp" -#include "unreachable.hpp" - -class Verify { - util::SourceLocation mLocation; - -public: - util::SourceLocation location() const { return mLocation; } - - Verify(util::SourceLocation location = util::SourceLocation()) - : mLocation(location) {} - - Verify &operator<<(bool result) { - if (!result) { - util::unreachable("Verification failed at %s: %s:%u:%u", - mLocation.function_name(), mLocation.file_name(), - mLocation.line(), mLocation.column()); - } - - return *this; - } -}; diff --git a/hw/amdgpu/include/util/VerifyVulkan.hpp b/hw/amdgpu/include/util/VerifyVulkan.hpp deleted file mode 100644 index 4665990..0000000 --- a/hw/amdgpu/include/util/VerifyVulkan.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include "Verify.hpp" -#include - -inline Verify operator<<(Verify lhs, VkResult result) { - if (result < VK_SUCCESS) { - auto location = lhs.location(); - util::unreachable("Verification failed at %s: %s:%u:%u(res = %d)", - location.function_name(), location.file_name(), - location.line(), location.column(), result); - } - - return lhs; -} diff --git a/hw/amdgpu/include/util/area.hpp b/hw/amdgpu/include/util/area.hpp deleted file mode 100644 index 898870d..0000000 --- a/hw/amdgpu/include/util/area.hpp +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include - -namespace util { -using namespace rx; -} // namespace util diff --git a/hw/amdgpu/include/util/unreachable.hpp b/hw/amdgpu/include/util/unreachable.hpp deleted file mode 100644 index 64e8396..0000000 --- a/hw/amdgpu/include/util/unreachable.hpp +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "SourceLocation.hpp" -#include -#include - -namespace util { -[[noreturn]] inline void unreachable_impl() { - std::fflush(stdout); - __builtin_trap(); -} - -[[noreturn]] inline void unreachable(SourceLocation location = {}) { - std::printf("\n"); - std::fflush(stdout); - std::fprintf(stderr, "Unreachable at %s:%u:%u %s\n", location.file_name(), - location.line(), location.column(), location.function_name()); - unreachable_impl(); -} - -[[noreturn]] inline void unreachable(const char *fmt, ...) { - std::printf("\n"); - std::fflush(stdout); - va_list list; - va_start(list, fmt); - std::vfprintf(stderr, fmt, list); - va_end(list); - std::fprintf(stderr, "\n"); - - unreachable_impl(); -} -} // namespace util diff --git a/hw/amdgpu/lib/libspirv/CMakeLists.txt b/hw/amdgpu/lib/libspirv/CMakeLists.txt deleted file mode 100644 index 12f815c..0000000 --- a/hw/amdgpu/lib/libspirv/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -project(spirv) - -add_library(${PROJECT_NAME} INTERFACE) -target_include_directories(${PROJECT_NAME} INTERFACE include) diff --git a/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h b/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h deleted file mode 100644 index 54cc00e..0000000 --- a/hw/amdgpu/lib/libspirv/include/spirv/GLSL.std.450.h +++ /dev/null @@ -1,131 +0,0 @@ -/* -** Copyright (c) 2014-2016 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -#ifndef GLSLstd450_H -#define GLSLstd450_H - -static const int GLSLstd450Version = 100; -static const int GLSLstd450Revision = 3; - -enum GLSLstd450 { - GLSLstd450Bad = 0, // Don't use - - GLSLstd450Round = 1, - GLSLstd450RoundEven = 2, - GLSLstd450Trunc = 3, - GLSLstd450FAbs = 4, - GLSLstd450SAbs = 5, - GLSLstd450FSign = 6, - GLSLstd450SSign = 7, - GLSLstd450Floor = 8, - GLSLstd450Ceil = 9, - GLSLstd450Fract = 10, - - GLSLstd450Radians = 11, - GLSLstd450Degrees = 12, - GLSLstd450Sin = 13, - GLSLstd450Cos = 14, - GLSLstd450Tan = 15, - GLSLstd450Asin = 16, - GLSLstd450Acos = 17, - GLSLstd450Atan = 18, - GLSLstd450Sinh = 19, - GLSLstd450Cosh = 20, - GLSLstd450Tanh = 21, - GLSLstd450Asinh = 22, - GLSLstd450Acosh = 23, - GLSLstd450Atanh = 24, - GLSLstd450Atan2 = 25, - - GLSLstd450Pow = 26, - GLSLstd450Exp = 27, - GLSLstd450Log = 28, - GLSLstd450Exp2 = 29, - GLSLstd450Log2 = 30, - GLSLstd450Sqrt = 31, - GLSLstd450InverseSqrt = 32, - - GLSLstd450Determinant = 33, - GLSLstd450MatrixInverse = 34, - - GLSLstd450Modf = 35, // second operand needs an OpVariable to write to - GLSLstd450ModfStruct = 36, // no OpVariable operand - GLSLstd450FMin = 37, - GLSLstd450UMin = 38, - GLSLstd450SMin = 39, - GLSLstd450FMax = 40, - GLSLstd450UMax = 41, - GLSLstd450SMax = 42, - GLSLstd450FClamp = 43, - GLSLstd450UClamp = 44, - GLSLstd450SClamp = 45, - GLSLstd450FMix = 46, - GLSLstd450IMix = 47, // Reserved - GLSLstd450Step = 48, - GLSLstd450SmoothStep = 49, - - GLSLstd450Fma = 50, - GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to - GLSLstd450FrexpStruct = 52, // no OpVariable operand - GLSLstd450Ldexp = 53, - - GLSLstd450PackSnorm4x8 = 54, - GLSLstd450PackUnorm4x8 = 55, - GLSLstd450PackSnorm2x16 = 56, - GLSLstd450PackUnorm2x16 = 57, - GLSLstd450PackHalf2x16 = 58, - GLSLstd450PackDouble2x32 = 59, - GLSLstd450UnpackSnorm2x16 = 60, - GLSLstd450UnpackUnorm2x16 = 61, - GLSLstd450UnpackHalf2x16 = 62, - GLSLstd450UnpackSnorm4x8 = 63, - GLSLstd450UnpackUnorm4x8 = 64, - GLSLstd450UnpackDouble2x32 = 65, - - GLSLstd450Length = 66, - GLSLstd450Distance = 67, - GLSLstd450Cross = 68, - GLSLstd450Normalize = 69, - GLSLstd450FaceForward = 70, - GLSLstd450Reflect = 71, - GLSLstd450Refract = 72, - - GLSLstd450FindILsb = 73, - GLSLstd450FindSMsb = 74, - GLSLstd450FindUMsb = 75, - - GLSLstd450InterpolateAtCentroid = 76, - GLSLstd450InterpolateAtSample = 77, - GLSLstd450InterpolateAtOffset = 78, - - GLSLstd450NMin = 79, - GLSLstd450NMax = 80, - GLSLstd450NClamp = 81, - - GLSLstd450Count -}; - -#endif // #ifndef GLSLstd450_H diff --git a/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp b/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp deleted file mode 100644 index 9d68200..0000000 --- a/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp +++ /dev/null @@ -1,2248 +0,0 @@ -#pragma once - -#include "spirv.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace spirv { -struct Id { - unsigned id{}; - - Id() = default; - explicit Id(unsigned value) : id(value) {} - - explicit operator unsigned() const { - assert(id != 0); - return id; - } - explicit operator bool() const { return id != 0; } - - bool operator==(Id other) const { return id == other.id; } - bool operator!=(Id other) const { return id != other.id; } - bool operator<(Id other) const { return id < other.id; } - bool operator>(Id other) const { return id > other.id; } - bool operator<=(Id other) const { return id <= other.id; } - bool operator>=(Id other) const { return id >= other.id; } -}; - -struct Type : Id {}; -struct ScalarType : Type {}; -struct VoidType : Type {}; -struct BoolType : ScalarType {}; -struct IntType : ScalarType {}; -struct SIntType : IntType {}; -struct UIntType : IntType {}; -struct FloatType : ScalarType {}; -struct VectorType : Type {}; -struct MatrixType : Type {}; -struct SamplerType : Type {}; -struct ImageType : Type {}; -struct SampledImageType : Type {}; -struct ArrayType : Type {}; -struct RuntimeArrayType : Type {}; -struct StructType : Type {}; -struct PointerType : Type {}; -struct FunctionType : Type {}; - -struct ExtInstSet : Id {}; -struct Function : Id {}; -struct Block : Id {}; -struct Value : Id {}; - -struct BoolValue : Value {}; -struct IntValue : Value {}; -struct SIntValue : IntValue {}; -struct UIntValue : IntValue {}; -struct FloatValue : Value {}; -struct StructValue : Value {}; -struct PointerValue : Value {}; -struct VectorValue : Value {}; -struct ArrayValue : Value {}; -struct SamplerValue : Value {}; -struct ImageValue : Value {}; -struct SampledImageValue : Value {}; - -template - requires(std::is_base_of_v) -struct ConstantValue : T {}; - -struct AnyConstantValue : Value { - AnyConstantValue() = default; - - template AnyConstantValue(ConstantValue specialization) { - id = specialization.id; - } - - template - AnyConstantValue &operator=(ConstantValue specialization) { - id = specialization.id; - return *this; - } - - template explicit operator ConstantValue() { - ConstantValue result; - result.id = id; - return result; - } -}; - -template - requires(std::is_base_of_v) -struct VectorOfType : VectorType {}; - -template - requires(std::is_base_of_v) -struct ArrayOfType : ArrayType {}; - -template - requires(std::is_base_of_v) -struct VectorOfValue : VectorValue {}; - -template - requires(std::is_base_of_v) -struct ArrayOfValue : ArrayValue {}; - -template - requires(std::is_base_of_v) -struct PointerToType : PointerType {}; - -template - requires(std::is_base_of_v) -struct PointerToValue : PointerValue {}; - -struct StructPointerValue : Value {}; - -struct VariableValue : PointerValue {}; - -namespace detail { -template struct TypeToValueImpl; - -template <> struct TypeToValueImpl { - using type = Value; -}; -template <> struct TypeToValueImpl { - using type = BoolValue; -}; -template <> struct TypeToValueImpl { - using type = IntValue; -}; -template <> struct TypeToValueImpl { - using type = SIntValue; -}; -template <> struct TypeToValueImpl { - using type = UIntValue; -}; -template <> struct TypeToValueImpl { - using type = FloatValue; -}; -template <> struct TypeToValueImpl { - using type = StructValue; -}; -template <> struct TypeToValueImpl { - using type = PointerValue; -}; -template <> struct TypeToValueImpl { - using type = PointerValue; -}; -template <> struct TypeToValueImpl { - using type = VectorValue; -}; -template <> struct TypeToValueImpl { - using type = ArrayValue; -}; -template <> struct TypeToValueImpl { - using type = SamplerValue; -}; -template <> struct TypeToValueImpl { - using type = ImageValue; -}; -template <> struct TypeToValueImpl { - using type = SampledImageValue; -}; - -template struct TypeToValueImpl> { - using type = PointerToValue; -}; -template struct TypeToValueImpl> { - using type = VectorOfValue; -}; - -template struct TypeToValueImpl> { - using type = ArrayOfValue; -}; -} // namespace detail - -template -using TypeToValue = typename detail::TypeToValueImpl::type; - -template - requires(std::is_base_of_v) -struct ScalarOrVectorOfValue : Value { - ScalarOrVectorOfValue() = default; - - ScalarOrVectorOfValue(TypeToValue scalar) { id = scalar.id; } - ScalarOrVectorOfValue(VectorOfValue vector) { id = vector.id; } -}; - -using ConstantBool = ConstantValue; -using ConstantSInt = ConstantValue; -using ConstantUInt = ConstantValue; -using ConstantInt = ConstantValue; -using ConstantFloat = ConstantValue; - -template - requires(std::is_base_of_v && std::is_base_of_v) -ToT cast(FromT from) { - ToT result; - result.id = from.id; - return result; -} - -inline unsigned calcStringWordCount(std::string_view string) { - return (string.length() + 1 + (sizeof(std::uint32_t) - 1)) / - sizeof(std::uint32_t); -} - -using IdUsesTackerType = - std::unordered_map>; -using IdDefTackerType = std::unordered_map; - -class RegionPusher { - IdUsesTackerType *mIdUses = nullptr; - IdDefTackerType *mIdDefs = nullptr; - std::uint32_t *mBeginPtr = nullptr; - std::uint32_t *mPtr = nullptr; - std::size_t mCount = 0; - - RegionPusher &operator=(const RegionPusher &) = default; - -public: - RegionPusher() = default; - RegionPusher(const RegionPusher &) = delete; - RegionPusher(std::uint32_t *beginPtr, std::uint32_t *ptr, std::size_t count, - IdUsesTackerType *idUses, IdDefTackerType *idDefs) - : mIdUses(idUses), mIdDefs(idDefs), mBeginPtr(beginPtr), mPtr(ptr), - mCount(count) {} - RegionPusher(RegionPusher &&other) { *this = std::move(other); } - - RegionPusher &operator=(RegionPusher &&other) { - *this = other; - other.mCount = 0; - return *this; - } - - ~RegionPusher() { assert(mCount == 0); } - - void pushWord(unsigned word) { - assert(mCount > 0); - *mPtr++ = word; - --mCount; - } - - void pushIdDef(Id id) { - assert(id); - (*mIdDefs)[id.id] = mPtr - mBeginPtr; - pushWord(id.id); - } - - void pushIdUse(Id id) { - assert(id); - (*mIdUses)[id.id].push_back(mPtr - mBeginPtr); - - pushWord(id.id); - } - - void pushString(std::string_view string) { - auto nwords = calcStringWordCount(string); - assert(mCount >= nwords); - - auto dst = reinterpret_cast(mPtr); - std::memcpy(dst, string.data(), string.length()); - std::memset(dst + string.length(), 0, - nwords * sizeof(std::uint32_t) - string.length()); - mPtr += nwords; - mCount -= nwords; - } -}; - -struct IdGenerator { - std::uint32_t bounds = 1; - - template - requires(std::is_base_of_v) - T newId() { - T result; - result.id = bounds++; - return result; - } - - Id newId() { - Id result; - result.id = bounds++; - return result; - } - - void reset() { bounds = 1; } -}; - -class RegionPoint { - const std::vector *mData = nullptr; - std::size_t mOffset = 0; - -public: - RegionPoint() = default; - RegionPoint(const std::vector *data, std::size_t offset) - : mData(data), mOffset(offset) {} - - std::span operator-(RegionPoint other) const { - assert(mData == other.mData); - assert(mOffset >= other.mOffset); - - return {other.mData->data() + other.mOffset, mData->data() + mOffset}; - } -}; - -class Region { - std::vector mData; - IdUsesTackerType mIdUses; - IdDefTackerType mIdDefs; - -public: - Region() = default; - Region(std::size_t expInstCount) { mData.reserve(expInstCount); } - - bool isIdDefined(Id id) const { return mIdDefs.contains(id.id); } - bool isIdUsed(Id id) const { return mIdUses.contains(id.id); } - - void clear() { mData.clear(); } - - const std::uint32_t *data() const { return mData.data(); } - std::size_t size() const { return mData.size(); } - - RegionPoint getCurrentPosition() const { return {&mData, mData.size()}; } - - RegionPusher pushOp(spv::Op op, unsigned wordCount) { - assert(wordCount >= 1); - auto offset = mData.size(); - mData.resize(mData.size() + wordCount); - RegionPusher pusher(mData.data(), mData.data() + offset, wordCount, - &mIdUses, &mIdDefs); - pusher.pushWord((static_cast(op) & spv::OpCodeMask) | - (wordCount << spv::WordCountShift)); - - return pusher; - } - - void pushRegion(const Region &other) { - auto offset = mData.size(); - mData.resize(mData.size() + other.size()); - std::memcpy(mData.data() + offset, other.data(), - other.size() * sizeof(std::uint32_t)); - - for (auto &[id, def] : other.mIdDefs) { - mIdDefs[id] = offset + def; - } - - for (auto &[id, uses] : other.mIdUses) { - auto &idUses = mIdUses[id]; - idUses.reserve(idUses.size() + uses.size()); - - for (auto use : uses) { - idUses.push_back(offset + use); - } - } - } - - void recreateDefs(std::unordered_map &remap, - IdGenerator &generator) { - auto prevDefs = std::move(mIdDefs); - mIdDefs = {}; - - for (auto [id, def] : prevDefs) { - auto newId = generator.newId().id; - - remap[id] = newId; - mData[def] = newId; - mIdDefs[newId] = def; - } - } - - void - remapUses(const std::unordered_map &remap) { - auto prevUses = std::move(mIdUses); - mIdUses = {}; - - for (auto &[id, uses] : prevUses) { - auto it = remap.find(id); - assert(it != remap.end()); - auto newId = it->second; - - for (auto &use : uses) { - mData[use] = newId; - } - - mIdUses[newId] = std::move(uses); - } - } -}; - -class BlockBuilder { - IdGenerator *mIdGenerator = nullptr; - - template auto newId() -> decltype(mIdGenerator->newId()) { - return mIdGenerator->newId(); - } - -public: - Block id; - Region prefix; - Region phiRegion; - Region variablesRegion; - Region bodyRegion; - Region terminatorRegion; - - BlockBuilder() = default; - BlockBuilder(IdGenerator &idGenerator, Block id, - std::size_t expInstructionsCount) - : mIdGenerator(&idGenerator), id(id), bodyRegion{expInstructionsCount}, - terminatorRegion{1} {} - - void moveBlock(BlockBuilder &&other) { - prefix.pushRegion(other.prefix); - { - auto region = prefix.pushOp(spv::Op::OpLabel, 2); - region.pushIdDef(id); - } - prefix.pushRegion(phiRegion); - prefix.pushRegion(bodyRegion); - prefix.pushRegion(terminatorRegion); - - id = other.id; - phiRegion = std::move(other.phiRegion); - variablesRegion.pushRegion(other.variablesRegion); - bodyRegion = std::move(other.bodyRegion); - terminatorRegion = std::move(other.terminatorRegion); - } - - Value createExtInst(Type resultType, ExtInstSet set, - std::uint32_t instruction, - std::span operands) { - auto region = bodyRegion.pushOp(spv::Op::OpExtInst, 5 + operands.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(set); - region.pushWord(instruction); - for (auto operand : operands) { - region.pushIdUse(operand); - } - return id; - } - - VariableValue createVariable(Type type, spv::StorageClass storageClass, - std::optional initializer = {}) { - auto region = variablesRegion.pushOp(spv::Op::OpVariable, - 4 + (initializer.has_value() ? 1 : 0)); - auto id = newId(); - region.pushIdUse(type); - region.pushIdDef(id); - region.pushWord(static_cast(storageClass)); - if (initializer.has_value()) { - region.pushIdUse(initializer.value()); - } - return id; - } - - Value createFunctionCall(Type resultType, Function function, - std::span arguments) { - auto region = - bodyRegion.pushOp(spv::Op::OpFunctionCall, 4 + arguments.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(function); - for (auto argument : arguments) { - region.pushIdUse(argument); - } - return id; - } - - // composite - Value createVectorExtractDynamic(Type resultType, Value vector, - IntValue index) { - auto region = bodyRegion.pushOp(spv::Op::OpVectorExtractDynamic, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(vector); - region.pushIdUse(index); - return id; - } - - Value createVectorInsertDynamic(Type resultType, Value vector, - Value component, IntValue index) { - auto region = bodyRegion.pushOp(spv::Op::OpVectorInsertDynamic, 6); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(vector); - region.pushIdUse(component); - region.pushIdUse(index); - return id; - } - - Value createVectorShuffle(Type resultType, Value vector1, Value vector2, - std::span components) { - auto region = - bodyRegion.pushOp(spv::Op::OpVectorShuffle, 5 + components.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(vector1); - region.pushIdUse(vector2); - - for (auto component : components) { - region.pushWord(component); - } - return id; - } - - template - TypeToValue createCompositeConstruct(T resultType, - std::span constituents) { - auto region = bodyRegion.pushOp(spv::Op::OpCompositeConstruct, - 3 + constituents.size()); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - - for (auto constituent : constituents) { - region.pushIdUse(constituent); - } - return id; - } - - Value createCompositeExtract(Type resultType, Value composite, - std::span indexes) { - auto region = - bodyRegion.pushOp(spv::Op::OpCompositeExtract, 4 + indexes.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(composite); - - for (auto index : indexes) { - region.pushWord(index); - } - return id; - } - - // arithmetic - template - requires(std::is_base_of_v) - TypeToValue createInst(spv::Op op, T resultType, - std::span> operands) { - auto region = bodyRegion.pushOp(op, 3 + operands.size()); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - for (auto operand : operands) { - region.pushIdUse(operand); - } - return id; - } - - Value createInst(spv::Op op, Type resultType, - std::span operands) { - auto region = bodyRegion.pushOp(op, 3 + operands.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - for (auto operand : operands) { - region.pushIdUse(operand); - } - return id; - } - - template - VectorOfValue createInst(spv::Op op, VectorOfType resultType, - std::span> operands) { - auto region = bodyRegion.pushOp(op, 3 + operands.size()); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - for (auto operand : operands) { - region.pushIdUse(operand); - } - return id; - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createSNegate(T resultType, TypeToValue operand) { - return createInst(spv::Op::OpSNegate, resultType, std::array{operand}); - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createFNegate(T resultType, TypeToValue operand) { - return createInst(spv::Op::OpFNegate, resultType, std::array{operand}); - } - - template - requires(std::is_same_v || std::is_base_of_v || - std::is_same_v, T> || - std::is_same_v, T> || - std::is_same_v, T>) - TypeToValue createIAdd(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpIAdd, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createFAdd(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpFAdd, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || std::is_base_of_v || - std::is_same_v, T> || - std::is_same_v, T> || - std::is_same_v, T>) - TypeToValue createISub(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpISub, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createFSub(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpFSub, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || std::is_base_of_v || - std::is_same_v, T> || - std::is_same_v, T> || - std::is_same_v, T>) - TypeToValue createIMul(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpIMul, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createFMul(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpFMul, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createUDiv(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpUDiv, resultType, - std::array{operand1, operand2}); - } - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createSDiv(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpSDiv, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createFDiv(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpFDiv, resultType, - std::array{operand1, operand2}); - } - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createUMod(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpUMod, resultType, - std::array{operand1, operand2}); - } - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createSRem(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpSRem, resultType, - std::array{operand1, operand2}); - } - - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createSMod(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpSMod, resultType, - std::array{operand1, operand2}); - } - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createFRem(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpFRem, resultType, - std::array{operand1, operand2}); - } - template - requires(std::is_same_v || - std::is_same_v, T>) - TypeToValue createFMod(T resultType, TypeToValue operand1, - TypeToValue operand2) { - return createInst(spv::Op::OpFMod, resultType, - std::array{operand1, operand2}); - } - - Value createIAddCarry(Type resultType, Value operand1, Value operand2) { - return createInst(spv::Op::OpIAddCarry, resultType, - std::array{operand1, operand2}); - } - - Value createISubBorrow(Type resultType, Value operand1, Value operand2) { - return createInst(spv::Op::OpISubBorrow, resultType, - std::array{operand1, operand2}); - } - - Value createUMulExtended(Type resultType, Value operand1, Value operand2) { - return createInst(spv::Op::OpUMulExtended, resultType, - std::array{operand1, operand2}); - } - - Value createSMulExtended(Type resultType, Value operand1, Value operand2) { - return createInst(spv::Op::OpSMulExtended, resultType, - std::array{operand1, operand2}); - } - - Value createPhi(Type resultType, - std::span> values) { - auto region = phiRegion.pushOp(spv::Op::OpPhi, 3 + values.size() * 2); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - for (auto [variable, block] : values) { - region.pushIdUse(variable); - region.pushIdUse(block); - } - return id; - } - - void addBlockToPhis(spirv::Block block, - std::span values) { - auto phi = phiRegion.data(); - spirv::Region newPhi(phiRegion.size() * 2); - - assert(block); - - for (std::size_t i = 0, end = phiRegion.size(), index = 0; i < end; - index++) { - auto opWordCount = phi[i]; - - assert(static_cast(static_cast(opWordCount) & - spv::OpCodeMask) == spv::Op::OpPhi); - auto wordCount = - static_cast(opWordCount) >> spv::WordCountShift; - auto newOp = newPhi.pushOp(spv::Op::OpPhi, wordCount + 2); - - for (std::size_t j = 1; j < wordCount; ++j) { - newOp.pushWord(phi[i + j]); - } - - i += wordCount; - - assert(index < values.size()); - assert(values[index]); - - newOp.pushIdUse(values[index]); - newOp.pushIdUse(block); - } - - phiRegion = std::move(newPhi); - } - - void moveVariablesFrom(BlockBuilder &otherBlock) { - variablesRegion.pushRegion(otherBlock.variablesRegion); - otherBlock.variablesRegion.clear(); - } - - template - requires(std::is_base_of_v) - TypeToValue createPhi(T resultType, - std::span> values) { - return cast>( - createPhi(static_cast(resultType), values)); - } - - void createLoopMerge(Block mergeBlock, Block continueTarget, - spv::LoopControlMask loopControl, - std::span loopControlParameters) { - auto region = terminatorRegion.pushOp(spv::Op::OpLoopMerge, - 4 + loopControlParameters.size()); - region.pushIdUse(mergeBlock); - region.pushIdUse(continueTarget); - region.pushWord(static_cast(loopControl)); - - for (auto loopControlParameter : loopControlParameters) { - region.pushWord(static_cast(loopControlParameter)); - } - } - - void createSelectionMerge(Block mergeBlock, - spv::SelectionControlMask selectionControl) { - auto region = terminatorRegion.pushOp(spv::Op::OpSelectionMerge, 3); - region.pushIdUse(mergeBlock); - region.pushWord(static_cast(selectionControl)); - } - - void createBranch(Block label) { - auto region = terminatorRegion.pushOp(spv::Op::OpBranch, 2); - region.pushIdUse(label); - } - - void createBranchConditional( - BoolValue condition, Block trueLabel, Block falseLabel, - std::optional> weights = {}) { - auto region = terminatorRegion.pushOp(spv::Op::OpBranchConditional, - 4 + (weights.has_value() ? 1 : 0)); - region.pushIdUse(condition); - region.pushIdUse(trueLabel); - region.pushIdUse(falseLabel); - - if (weights.has_value()) { - region.pushWord(weights->first); - region.pushWord(weights->second); - } - } - - void createKill() { - assert(terminatorRegion.size() == 0); - terminatorRegion.pushOp(spv::Op::OpKill, 1); - } - - void createReturn() { - assert(terminatorRegion.size() == 0); - terminatorRegion.pushOp(spv::Op::OpReturn, 1); - } - - void createReturnValue(Value value) { - assert(terminatorRegion.size() == 0); - auto region = terminatorRegion.pushOp(spv::Op::OpReturnValue, 2); - region.pushIdUse(value); - } - - void createUnreachable() { - assert(terminatorRegion.size() == 0); - terminatorRegion.pushOp(spv::Op::OpUnreachable, 1); - } - - Value createLoad(Type resultType, PointerValue pointer, - spv::MemoryAccessMask memoryAccess, - std::span memoryAccessOperands) { - auto region = - bodyRegion.pushOp(spv::Op::OpLoad, 5 + memoryAccessOperands.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(pointer); - region.pushWord(static_cast(memoryAccess)); - - for (auto memoryAccessOperand : memoryAccessOperands) { - region.pushWord(static_cast(memoryAccessOperand)); - } - - return id; - } - - template - requires(std::is_base_of_v) - TypeToValue createLoad(T resultType, PointerValue pointer) { - auto region = bodyRegion.pushOp(spv::Op::OpLoad, 4); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(pointer); - return id; - } - - void createStore(PointerValue pointer, Value object) { - auto region = bodyRegion.pushOp(spv::Op::OpStore, 3); - region.pushIdUse(pointer); - region.pushIdUse(object); - } - - void createStore(PointerValue pointer, Value object, - spv::MemoryAccessMask memoryAccess, - std::span memoryAccessOperands) { - auto region = - bodyRegion.pushOp(spv::Op::OpStore, 4 + memoryAccessOperands.size()); - region.pushIdUse(pointer); - region.pushIdUse(object); - region.pushWord(static_cast(memoryAccess)); - - for (auto memoryAccessOperand : memoryAccessOperands) { - region.pushWord(memoryAccessOperand); - } - } - - void createCopyMemory(PointerValue targetPointer, - PointerValue sourcePointer) { - auto region = bodyRegion.pushOp(spv::Op::OpCopyMemory, 3); - region.pushIdUse(targetPointer); - region.pushIdUse(sourcePointer); - } - - void createCopyMemory(PointerValue targetPointer, PointerValue sourcePointer, - spv::MemoryAccessMask memoryAccess, - std::span memoryAccessOperands) { - auto region = bodyRegion.pushOp(spv::Op::OpCopyMemory, - 4 + memoryAccessOperands.size()); - region.pushIdUse(targetPointer); - region.pushIdUse(sourcePointer); - region.pushWord(static_cast(memoryAccess)); - for (auto memoryAccessOperand : memoryAccessOperands) { - region.pushWord(memoryAccessOperand); - } - } - - void - createCopyMemory(PointerValue targetPointer, PointerValue sourcePointer, - spv::MemoryAccessMask targetMemoryAccess, - std::span targetMemoryAccessOperands, - spv::MemoryAccessMask sourceMemoryAccess, - std::span sourceMemoryAccessOperands) { - auto region = bodyRegion.pushOp(spv::Op::OpCopyMemory, - 5 + targetMemoryAccessOperands.size() + - sourceMemoryAccessOperands.size()); - region.pushIdUse(targetPointer); - region.pushIdUse(sourcePointer); - region.pushWord(static_cast(targetMemoryAccess)); - for (auto memoryAccessOperand : targetMemoryAccessOperands) { - region.pushWord(static_cast(memoryAccessOperand)); - } - region.pushWord(static_cast(sourceMemoryAccess)); - for (auto memoryAccessOperand : sourceMemoryAccessOperands) { - region.pushWord(static_cast(memoryAccessOperand)); - } - } - - UIntValue createArrayLength(UIntType resultType, - PointerToValue structure, - std::uint32_t member) { - auto region = bodyRegion.pushOp(spv::Op::OpArrayLength, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(structure); - region.pushWord(member); - return id; - } - - BoolValue createPtrEqual(BoolType resultType, PointerValue operand1, - PointerValue operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpPtrEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - BoolValue createPtrNotEqual(BoolType resultType, PointerValue operand1, - PointerValue operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpPtrNotEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - PointerValue createAccessChain(PointerType resultType, PointerValue base, - std::span indices) { - auto region = bodyRegion.pushOp(spv::Op::OpAccessChain, 4 + indices.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(base); - - for (auto index : indices) { - region.pushIdUse(index); - } - return id; - } - - PointerValue createInBoundsAccessChain(PointerType resultType, - PointerValue base, - std::span indices) { - auto region = - bodyRegion.pushOp(spv::Op::OpInBoundsAccessChain, 4 + indices.size()); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(base); - - for (auto index : indices) { - region.pushIdUse(index); - } - return id; - } - - // conversion - Value createConvertFToU(Type resultType, Value operand) { - auto region = bodyRegion.pushOp(spv::Op::OpConvertFToU, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - Value createConvertFToS(Type resultType, Value operand) { - auto region = bodyRegion.pushOp(spv::Op::OpConvertFToS, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - template - TypeToValue createConvertSToF(T resultType, SIntValue operand) { - auto region = bodyRegion.pushOp(spv::Op::OpConvertSToF, 4); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - template - TypeToValue createConvertUToF(T resultType, UIntValue operand) { - auto region = bodyRegion.pushOp(spv::Op::OpConvertUToF, 4); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - template - TypeToValue createUConvert(T resultType, UIntValue operand) { - auto region = bodyRegion.pushOp(spv::Op::OpUConvert, 4); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - template - TypeToValue createSConvert(T resultType, SIntValue operand) { - auto region = bodyRegion.pushOp(spv::Op::OpSConvert, 4); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - Value createFConvert(Type resultType, Value operand) { - auto region = bodyRegion.pushOp(spv::Op::OpFConvert, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - template - TypeToValue createBitcast(T resultType, Value operand) { - auto region = bodyRegion.pushOp(spv::Op::OpBitcast, 4); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - - // bit - template - TypeToValue createShiftRightLogical(T resultType, TypeToValue base, - IntValue shift) { - auto region = bodyRegion.pushOp(spv::Op::OpShiftRightLogical, 5); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(base); - region.pushIdUse(shift); - return id; - } - - template - TypeToValue createShiftRightArithmetic(T resultType, TypeToValue base, - IntValue shift) { - auto region = bodyRegion.pushOp(spv::Op::OpShiftRightArithmetic, 5); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(base); - region.pushIdUse(shift); - return id; - } - - template - TypeToValue createShiftLeftLogical(T resultType, TypeToValue base, - IntValue shift) { - auto region = bodyRegion.pushOp(spv::Op::OpShiftLeftLogical, 5); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(base); - region.pushIdUse(shift); - return id; - } - - Value createBitwiseOr(Type resultType, Value operand1, Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpBitwiseOr, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - Value createBitwiseXor(Type resultType, Value operand1, Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpBitwiseXor, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - Value createBitwiseAnd(Type resultType, Value operand1, Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpBitwiseAnd, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - Value createNot(Type resultType, Value operand) { - auto region = bodyRegion.pushOp(spv::Op::OpNot, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - - // relational - BoolValue createIsNan(BoolType resultType, Value operand1) { - auto region = bodyRegion.pushOp(spv::Op::OpIsNan, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - return id; - } - - BoolValue createIsInf(BoolType resultType, Value operand1) { - auto region = bodyRegion.pushOp(spv::Op::OpIsInf, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - return id; - } - - // logic - BoolValue createLogicalEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpLogicalEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createLogicalNotEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpLogicalNotEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - BoolValue createLogicalOr(BoolType resultType, BoolValue operand1, - BoolValue operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpLogicalOr, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - BoolValue createLogicalAnd(BoolType resultType, BoolValue operand1, - BoolValue operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpLogicalAnd, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - BoolValue createLogicalNot(BoolType resultType, BoolValue operand) { - auto region = bodyRegion.pushOp(spv::Op::OpLogicalNot, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand); - return id; - } - - template - TypeToValue createSelect(T resultType, BoolValue condition, Value object1, - Value object2) { - auto region = bodyRegion.pushOp(spv::Op::OpSelect, 6); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(condition); - region.pushIdUse(object1); - region.pushIdUse(object2); - return id; - } - - BoolValue createIEqual(BoolType resultType, Value operand1, Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpIEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createINotEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpINotEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createUGreaterThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpUGreaterThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createSGreaterThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpSGreaterThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createUGreaterThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpUGreaterThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createSGreaterThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpSGreaterThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createULessThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpULessThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createSLessThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpSLessThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createULessThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpULessThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createSLessThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpSLessThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - BoolValue createFOrdEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFOrdEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFUnordEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFUnordEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFOrdNotEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFOrdNotEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFUnordNotEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFUnordNotEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFOrdLessThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFOrdLessThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFUnordLessThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFUnordLessThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFOrdLessThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFOrdLessThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFUnordLessThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFUnordLessThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFOrdGreaterThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFOrdGreaterThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFUnordGreaterThan(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFUnordGreaterThan, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFOrdGreaterThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFOrdGreaterThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - BoolValue createFUnordGreaterThanEqual(BoolType resultType, Value operand1, - Value operand2) { - auto region = bodyRegion.pushOp(spv::Op::OpFUnordGreaterThanEqual, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(operand1); - region.pushIdUse(operand2); - return id; - } - - // image - SampledImageValue createSampledImage(SampledImageType resultType, - ImageValue image, SamplerValue sampler) { - auto region = bodyRegion.pushOp(spv::Op::OpSampledImage, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(image); - region.pushIdUse(sampler); - return id; - } - VectorOfValue createImageSampleImplicitLod( - VectorOfType resultType, SampledImageValue sampledImage, - ScalarOrVectorOfValue coords, - spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone, - std::span args = {}) { - auto region = bodyRegion.pushOp( - spv::Op::OpImageSampleImplicitLod, - 5 + (operands == spv::ImageOperandsMask::MaskNone ? 0 - : 1 + args.size())); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(sampledImage); - region.pushIdUse(coords); - - if (operands != spv::ImageOperandsMask::MaskNone) { - region.pushWord(static_cast(operands)); - - for (auto arg : args) { - region.pushIdUse(arg); - } - } - - return id; - } - - VectorOfValue createImageSampleExplicitLod( - VectorOfType resultType, SampledImageValue sampledImage, - ScalarOrVectorOfValue coords, - spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone, - std::span args = {}) { - auto region = bodyRegion.pushOp( - spv::Op::OpImageSampleExplicitLod, - 5 + (operands == spv::ImageOperandsMask::MaskNone ? 0 - : 1 + args.size())); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(sampledImage); - region.pushIdUse(coords); - - if (operands != spv::ImageOperandsMask::MaskNone) { - region.pushWord(static_cast(operands)); - - for (auto arg : args) { - region.pushIdUse(arg); - } - } - - return id; - } - - VectorOfValue createImageRead( - VectorOfType resultType, ImageValue image, - ScalarOrVectorOfValue coords, - spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone, - std::span args = {}) { - auto region = bodyRegion.pushOp( - spv::Op::OpImageRead, - 5 + (operands == spv::ImageOperandsMask::MaskNone ? 0 - : 1 + args.size())); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(image); - region.pushIdUse(coords); - - if (operands != spv::ImageOperandsMask::MaskNone) { - region.pushWord(static_cast(operands)); - - for (auto arg : args) { - region.pushIdUse(arg); - } - } - - return id; - } - - void createImageWrite( - ImageValue image, ScalarOrVectorOfValue coords, Value texel, - spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone, - std::span args = {}) { - auto region = bodyRegion.pushOp( - spv::Op::OpImageWrite, - 4 + (operands == spv::ImageOperandsMask::MaskNone ? 0 - : 1 + args.size())); - region.pushIdUse(image); - region.pushIdUse(coords); - region.pushIdUse(texel); - - if (operands != spv::ImageOperandsMask::MaskNone) { - region.pushWord(static_cast(operands)); - - for (auto arg : args) { - region.pushIdUse(arg); - } - } - } - - Value createImageQuerySizeLod(Type resultType, ImageValue image, Value lod) { - auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySizeLod, 5); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(image); - region.pushIdUse(lod); - return id; - } - - Value createImageQuerySize(Type resultType, ImageValue image) { - auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySize, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(image); - return id; - } - - VectorOfValue - createImageQueryLod(VectorOfType resultType, - SampledImageValue sampledImage, - ScalarOrVectorOfValue coords) { - auto region = bodyRegion.pushOp(spv::Op::OpImageQueryLod, 5); - auto id = newId>(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(sampledImage); - region.pushIdUse(coords); - return id; - } - - IntValue createImageQueryLevels(IntType resultType, ImageValue sampledImage) { - auto region = bodyRegion.pushOp(spv::Op::OpImageQueryLevels, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(sampledImage); - return id; - } - - IntValue createImageQuerySamples(IntType resultType, - ImageValue sampledImage) { - auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySamples, 4); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushIdUse(sampledImage); - return id; - } -}; - -class FunctionBuilder { - IdGenerator *mIdGenerator = nullptr; - - template auto newId() -> decltype(mIdGenerator->newId()) { - return mIdGenerator->newId(); - } - -public: - Region paramsRegion; - Region bodyRegion; - Function id; - - FunctionBuilder() = default; - FunctionBuilder(IdGenerator &idGenerator, Function id, - std::size_t expInstructionsCount) - : mIdGenerator(&idGenerator), bodyRegion{expInstructionsCount}, id(id) {} - - Value createFunctionParameter(Type resultType) { - auto region = paramsRegion.pushOp(spv::Op::OpFunctionParameter, 3); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - return id; - } - - BlockBuilder createBlockBuilder(std::size_t expInstructionsCount) { - auto id = newId(); - - return BlockBuilder(*mIdGenerator, id, expInstructionsCount); - } - - void insertBlock(const BlockBuilder &builder) { - bodyRegion.pushRegion(builder.prefix); - - auto region = bodyRegion.pushOp(spv::Op::OpLabel, 2); - region.pushIdDef(builder.id); - - bodyRegion.pushRegion(builder.variablesRegion); - bodyRegion.pushRegion(builder.phiRegion); - bodyRegion.pushRegion(builder.bodyRegion); - bodyRegion.pushRegion(builder.terminatorRegion); - } -}; - -class SpirvBuilder { - IdGenerator *mIdGenerator = nullptr; - - Region capabilityRegion; - Region extensionRegion; - Region extInstRegion; - Region memoryModelRegion; - Region entryPointRegion; - Region executionModeRegion; - Region debugRegion; - Region annotationRegion; - Region globalRegion; - Region functionDeclRegion; - Region functionRegion; - - template auto newId() -> decltype(mIdGenerator->newId()) { - return mIdGenerator->newId(); - } - -private: - SpirvBuilder(const SpirvBuilder &) = default; - SpirvBuilder(SpirvBuilder &&) = default; - SpirvBuilder &operator=(SpirvBuilder &&) = default; - -public: - bool isIdDefined(Id id) const { - std::array regions = { - // &capabilityRegion, &extensionRegion, &extInstRegion, - // &memoryModelRegion, &entryPointRegion, &executionModeRegion, - // &debugRegion, &annotationRegion, &globalRegion, - &functionRegion, - }; - - for (auto reg : regions) { - if (reg->isIdDefined(id)) { - return true; - } - } - - return false; - } - bool isIdUsed(Id id) const { - std::array regions = { - &capabilityRegion, &extensionRegion, &extInstRegion, - &memoryModelRegion, &entryPointRegion, &executionModeRegion, - &debugRegion, &annotationRegion, &globalRegion, - &functionDeclRegion, &functionRegion, - }; - - for (auto reg : regions) { - if (reg->isIdUsed(id)) { - return true; - } - } - - return false; - } - SpirvBuilder() = default; - - SpirvBuilder(IdGenerator &idGenerator, std::size_t expInstructionsCount) - : mIdGenerator(&idGenerator), capabilityRegion{1}, extensionRegion{1}, - extInstRegion{4}, memoryModelRegion{3}, entryPointRegion{1}, - executionModeRegion{1}, debugRegion{0}, annotationRegion{1}, - globalRegion{1}, functionDeclRegion{1}, - functionRegion{expInstructionsCount} {} - - SpirvBuilder clone() const { return *this; } - - void swap(SpirvBuilder &other) { - std::swap(mIdGenerator, other.mIdGenerator); - std::swap(capabilityRegion, other.capabilityRegion); - std::swap(extensionRegion, other.extensionRegion); - std::swap(extInstRegion, other.extInstRegion); - std::swap(memoryModelRegion, other.memoryModelRegion); - std::swap(entryPointRegion, other.entryPointRegion); - std::swap(executionModeRegion, other.executionModeRegion); - std::swap(debugRegion, other.debugRegion); - std::swap(annotationRegion, other.annotationRegion); - std::swap(globalRegion, other.globalRegion); - std::swap(functionDeclRegion, other.functionDeclRegion); - std::swap(functionRegion, other.functionRegion); - } - - void reset() { - mIdGenerator->reset(); - capabilityRegion.clear(); - extensionRegion.clear(); - extInstRegion.clear(); - memoryModelRegion.clear(); - entryPointRegion.clear(); - executionModeRegion.clear(); - debugRegion.clear(); - annotationRegion.clear(); - globalRegion.clear(); - functionDeclRegion.clear(); - functionRegion.clear(); - } - - IdGenerator *getIdGenerator() const { return mIdGenerator; } - - std::vector build(std::uint32_t spirvVersion, - std::uint32_t generatorMagic) { - const std::size_t headerSize = 5; - std::size_t finalSize = headerSize; - - std::array regions = { - &capabilityRegion, &extensionRegion, &extInstRegion, - &memoryModelRegion, &entryPointRegion, &executionModeRegion, - &debugRegion, &annotationRegion, &globalRegion, - &functionDeclRegion, &functionRegion, - }; - - for (auto region : regions) { - finalSize += region->size(); - } - - std::vector result; - result.resize(finalSize); - - result[0] = spv::MagicNumber; - result[1] = spirvVersion; - result[2] = generatorMagic; - result[3] = mIdGenerator->bounds; - result[4] = 0; // instruction schema - - std::size_t currentOffset = headerSize; - - for (auto region : regions) { - std::memcpy(result.data() + currentOffset, region->data(), - region->size() * sizeof(std::uint32_t)); - currentOffset += region->size(); - } - - return result; - } - - // misc - Value createUndef(Type resultType) { - auto region = globalRegion.pushOp(spv::Op::OpUndef, 3); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - return id; - } - - template - requires(std::is_base_of_v) - TypeToValue createUndef(T resultType) { - return cast>(createUndef(resultType)); - } - - // annotation - void createDecorate(Id target, spv::Decoration decoration, - std::span decorationOperands) { - auto region = annotationRegion.pushOp(spv::Op::OpDecorate, - 3 + decorationOperands.size()); - region.pushIdUse(target); - region.pushWord(static_cast(decoration)); - - for (auto decorationOperand : decorationOperands) { - region.pushWord(decorationOperand); - } - } - - void createMemberDecorate(StructType structureType, std::uint32_t member, - spv::Decoration decoration, - std::span decorationOperands) { - auto region = annotationRegion.pushOp(spv::Op::OpMemberDecorate, - 4 + decorationOperands.size()); - region.pushIdUse(structureType); - region.pushWord(member); - region.pushWord(static_cast(decoration)); - - for (auto decorationOperand : decorationOperands) { - region.pushWord(decorationOperand); - } - } - - void createDecorateId(Id target, spv::Decoration decoration, - std::span decorationOperands) { - auto region = annotationRegion.pushOp(spv::Op::OpDecorateId, - 3 + decorationOperands.size()); - region.pushIdUse(target); - region.pushWord(static_cast(decoration)); - - for (auto decorationOperand : decorationOperands) { - region.pushIdUse(decorationOperand); - } - } - - void createDecorateString( - Id target, spv::Decoration decoration, - std::string_view firstDecorationOperand, - std::span decorationOperands = {}) { - std::size_t decorationOperandsLen = - calcStringWordCount(firstDecorationOperand); - - for (auto decorationOperand : decorationOperands) { - decorationOperandsLen += calcStringWordCount(decorationOperand); - } - - auto region = annotationRegion.pushOp(spv::Op::OpDecorateString, - 3 + decorationOperandsLen); - region.pushIdUse(target); - region.pushWord(static_cast(decoration)); - region.pushString(firstDecorationOperand); - - for (auto decorationOperand : decorationOperands) { - region.pushString(decorationOperand); - } - } - - void createMemberDecorateString( - StructType structType, std::uint32_t member, spv::Decoration decoration, - std::string_view firstDecorationOperand, - std::span decorationOperands = {}) { - std::size_t decorationOperandsLen = - calcStringWordCount(firstDecorationOperand); - - for (auto decorationOperand : decorationOperands) { - decorationOperandsLen += calcStringWordCount(decorationOperand); - } - - auto region = annotationRegion.pushOp(spv::Op::OpMemberDecorateString, - 4 + decorationOperandsLen); - region.pushIdUse(structType); - region.pushWord(member); - region.pushWord(static_cast(decoration)); - region.pushString(firstDecorationOperand); - - for (auto decorationOperand : decorationOperands) { - region.pushString(decorationOperand); - } - } - - // extension - void createExtension(std::string_view name) { - auto region = extensionRegion.pushOp(spv::Op::OpExtension, - 1 + calcStringWordCount(name)); - region.pushString(name); - } - - ExtInstSet createExtInstImport(std::string_view name) { - auto region = extInstRegion.pushOp(spv::Op::OpExtInstImport, - 2 + calcStringWordCount(name)); - auto id = newId(); - region.pushIdDef(id); - region.pushString(name); - return id; - } - - // mode set - void createCapability(spv::Capability cap) { - auto region = capabilityRegion.pushOp(spv::Op::OpCapability, 2); - region.pushWord(static_cast(cap)); - } - - void setMemoryModel(spv::AddressingModel addressingModel, - spv::MemoryModel memoryModel) { - memoryModelRegion.clear(); - auto region = memoryModelRegion.pushOp(spv::Op::OpMemoryModel, 3); - region.pushWord(static_cast(addressingModel)); - region.pushWord(static_cast(memoryModel)); - } - - void createEntryPoint(spv::ExecutionModel executionModel, Function entryPoint, - std::string_view name, - std::span interfaces) { - auto region = entryPointRegion.pushOp(spv::Op::OpEntryPoint, - 3 + calcStringWordCount(name) + - interfaces.size()); - region.pushWord(static_cast(executionModel)); - region.pushIdUse(entryPoint); - region.pushString(name); - for (auto iface : interfaces) { - region.pushIdUse(iface); - } - } - void createExecutionMode(Function entryPoint, spv::ExecutionMode mode, - std::span args) { - auto region = - executionModeRegion.pushOp(spv::Op::OpExecutionMode, 3 + args.size()); - region.pushIdUse(entryPoint); - region.pushWord(static_cast(mode)); - for (auto arg : args) { - region.pushWord(arg); - } - } - - void createExecutionModeId(Function entryPoint, spv::ExecutionMode mode, - std::span args) { - auto region = - executionModeRegion.pushOp(spv::Op::OpExecutionModeId, 3 + args.size()); - region.pushIdUse(entryPoint); - region.pushWord(static_cast(mode)); - for (auto arg : args) { - region.pushIdUse(arg); - } - } - - // type - VoidType createTypeVoid() { - auto region = globalRegion.pushOp(spv::Op::OpTypeVoid, 2); - auto id = newId(); - region.pushIdDef(id); - return id; - } - BoolType createTypeBool() { - auto region = globalRegion.pushOp(spv::Op::OpTypeBool, 2); - auto id = newId(); - region.pushIdDef(id); - return id; - } - IntType createTypeInt(std::uint32_t width, bool signedness) { - auto region = globalRegion.pushOp(spv::Op::OpTypeInt, 4); - auto id = newId(); - region.pushIdDef(id); - region.pushWord(width); - region.pushWord(static_cast(signedness)); - return id; - } - SIntType createTypeSInt(std::uint32_t width) { - return cast(createTypeInt(width, true)); - } - UIntType createTypeUInt(std::uint32_t width) { - return cast(createTypeInt(width, false)); - } - FloatType createTypeFloat(std::uint32_t width) { - auto region = globalRegion.pushOp(spv::Op::OpTypeFloat, 3); - auto id = newId(); - region.pushIdDef(id); - region.pushWord(width); - return id; - } - template - VectorOfType createTypeVector(T componentType, - std::uint32_t componentCount) { - auto region = globalRegion.pushOp(spv::Op::OpTypeVector, 4); - auto id = newId>(); - region.pushIdDef(id); - region.pushIdUse(componentType); - region.pushWord(componentCount); - return id; - } - MatrixType createTypeMatrix(VectorType columnType, - std::uint32_t coulumnCount) { - auto region = globalRegion.pushOp(spv::Op::OpTypeMatrix, 4); - auto id = newId(); - region.pushIdDef(id); - region.pushIdUse(columnType); - region.pushWord(coulumnCount); - return id; - } - - ImageType createTypeImage(Type sampledType, spv::Dim dim, std::uint32_t depth, - std::uint32_t arrayed, std::uint32_t ms, - std::uint32_t sampled, spv::ImageFormat imageFormat, - std::optional access = {}) { - auto region = globalRegion.pushOp(spv::Op::OpTypeImage, - 9 + (access.has_value() ? 1 : 0)); - auto id = newId(); - region.pushIdDef(id); - region.pushIdUse(sampledType); - region.pushWord(static_cast(dim)); - region.pushWord(depth); - region.pushWord(arrayed); - region.pushWord(ms); - region.pushWord(sampled); - region.pushWord(static_cast(imageFormat)); - - if (access.has_value()) { - region.pushWord(static_cast(*access)); - } - - return id; - } - - SamplerType createTypeSampler() { - auto region = globalRegion.pushOp(spv::Op::OpTypeSampler, 2); - auto id = newId(); - region.pushIdDef(id); - return id; - } - - SampledImageType createTypeSampledImage(ImageType imageType) { - auto region = globalRegion.pushOp(spv::Op::OpTypeSampledImage, 3); - auto id = newId(); - region.pushIdDef(id); - region.pushIdUse(imageType); - return id; - } - - ArrayType createTypeArray(Type elementType, AnyConstantValue count) { - auto region = globalRegion.pushOp(spv::Op::OpTypeArray, 4); - auto id = newId(); - region.pushIdDef(id); - region.pushIdUse(elementType); - region.pushIdUse(count); - return id; - } - - RuntimeArrayType createTypeRuntimeArray(Type elementType) { - auto region = globalRegion.pushOp(spv::Op::OpTypeRuntimeArray, 3); - auto id = newId(); - region.pushIdDef(id); - region.pushIdUse(elementType); - - return id; - } - - StructType createTypeStruct(std::span members) { - auto region = - globalRegion.pushOp(spv::Op::OpTypeStruct, 2 + members.size()); - auto id = newId(); - region.pushIdDef(id); - - for (auto member : members) { - region.pushIdUse(member); - } - - return id; - } - - PointerType createTypePointer(spv::StorageClass storageClass, Type type) { - auto region = globalRegion.pushOp(spv::Op::OpTypePointer, 4); - auto id = newId(); - region.pushIdDef(id); - region.pushWord(static_cast(storageClass)); - region.pushIdUse(type); - return id; - } - - template - requires(std::is_base_of_v) - PointerToType createTypePointer(spv::StorageClass storageClass, T type) { - return cast>( - createTypePointer(storageClass, static_cast(type))); - } - - FunctionType createTypeFunction(Type returnType, - std::span parameters) { - auto region = - globalRegion.pushOp(spv::Op::OpTypeFunction, 3 + parameters.size()); - auto id = newId(); - region.pushIdDef(id); - region.pushIdUse(returnType); - - for (auto param : parameters) { - region.pushIdUse(param); - } - - return id; - } - - // constant - ConstantBool createConstantTrue(BoolType type) { - auto region = globalRegion.pushOp(spv::Op::OpConstantTrue, 3); - auto id = newId(); - region.pushIdUse(type); - region.pushIdDef(id); - return id; - } - - ConstantBool createConstantFalse(BoolType type) { - auto region = globalRegion.pushOp(spv::Op::OpConstantFalse, 3); - auto id = newId(); - region.pushIdUse(type); - region.pushIdDef(id); - return id; - } - - template - requires(std::is_base_of_v) - ConstantValue> - createConstant(T type, std::span values) { - auto region = globalRegion.pushOp(spv::Op::OpConstant, 3 + values.size()); - auto id = newId>>(); - region.pushIdUse(type); - region.pushIdDef(id); - for (auto value : values) { - region.pushWord(value); - } - return id; - } - - template - requires(std::is_base_of_v) - ConstantValue> createConstant32(T type, std::uint32_t value) { - return createConstant(type, std::array{value}); - } - - template - requires(std::is_base_of_v) - ConstantValue> createConstant64(T type, std::uint64_t value) { - return createConstant(type, - std::array{static_cast(value), - static_cast(value >> 32)}); - } - - // memory - VariableValue createVariable(Type type, spv::StorageClass storageClass, - std::optional initializer = {}) { - auto region = globalRegion.pushOp(spv::Op::OpVariable, - 4 + (initializer.has_value() ? 1 : 0)); - auto id = newId(); - region.pushIdUse(type); - region.pushIdDef(id); - region.pushWord(static_cast(storageClass)); - if (initializer.has_value()) { - region.pushIdUse(initializer.value()); - } - return id; - } - -private: - void createFunction(Function id, Type resultType, - spv::FunctionControlMask functionControl, - Type functionType) { - auto region = functionRegion.pushOp(spv::Op::OpFunction, 5); - region.pushIdUse(resultType); - region.pushIdDef(id); - region.pushWord(static_cast(functionControl)); - region.pushIdUse(functionType); - } - - Value createFunctionParameter(Type resultType) { - auto region = functionRegion.pushOp(spv::Op::OpFunctionParameter, 3); - auto id = newId(); - region.pushIdUse(resultType); - region.pushIdDef(id); - return id; - } - - void createFunctionEnd() { functionRegion.pushOp(spv::Op::OpFunctionEnd, 1); } - -public: - FunctionBuilder createFunctionBuilder(std::size_t expInstructionsCount) { - auto id = newId(); - return FunctionBuilder(*mIdGenerator, id, expInstructionsCount); - } - - void insertFunctionDeclaration(const FunctionBuilder &function, - Type resultType, - spv::FunctionControlMask functionControl, - Type functionType) { - createFunction(function.id, resultType, functionControl, functionType); - functionRegion.pushRegion(function.paramsRegion); - createFunctionEnd(); - } - - void insertFunction(const FunctionBuilder &function, Type resultType, - spv::FunctionControlMask functionControl, - Type functionType) { - createFunction(function.id, resultType, functionControl, functionType); - functionRegion.pushRegion(function.paramsRegion); - functionRegion.pushRegion(function.bodyRegion); - createFunctionEnd(); - } - - BlockBuilder createBlockBuilder(std::size_t expInstructionsCount) { - auto id = newId(); - - return BlockBuilder(*mIdGenerator, id, expInstructionsCount); - } -}; -} // namespace spirv diff --git a/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp b/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp deleted file mode 100644 index 36dc271..0000000 --- a/hw/amdgpu/lib/libspirv/include/spirv/spirv-instruction.hpp +++ /dev/null @@ -1,2414 +0,0 @@ -#pragma once -#include "spirv.hpp" -#include -#include -#include - -namespace spirv { -enum class OperandKind { - Invalid, - ValueId, - TypeId, - Word, - String, - VariadicId, - VariadicWord, -}; - -enum class OperandDirection { - In, - Out, -}; - -enum class InstructionFlags { - None = 0, - HasResult = 1 << 0, - HasResultType = 1 << 1, -}; - -inline InstructionFlags operator|(InstructionFlags lhs, InstructionFlags rhs) { - return static_cast(static_cast(lhs) | - static_cast(rhs)); -} -inline InstructionFlags operator&(InstructionFlags lhs, InstructionFlags rhs) { - return static_cast(static_cast(lhs) & - static_cast(rhs)); -} - -struct InstructionInfo { - const char *name; - InstructionFlags flags; - OperandKind operands[16]; -}; - -inline const InstructionInfo *getInstructionInfo(spv::Op opcode) { - switch (opcode) { - default: /* unknown opcode */ - break; - case spv::Op::OpNop: { - static InstructionInfo result = {"OpNop", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpUndef: { - static InstructionInfo result = {"OpUndef", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSourceContinued: { - static InstructionInfo result = { - "OpSourceContinued", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpSource: { - static InstructionInfo result = {"OpSource", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpSourceExtension: { - static InstructionInfo result = { - "OpSourceExtension", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpName: { - static InstructionInfo result = {"OpName", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpMemberName: { - static InstructionInfo result = { - "OpMemberName", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpString: { - static InstructionInfo result = { - "OpString", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpLine: { - static InstructionInfo result = {"OpLine", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpExtension: { - static InstructionInfo result = {"OpExtension", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpExtInstImport: { - static InstructionInfo result = { - "OpExtInstImport", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpExtInst: { - static InstructionInfo result = {"OpExtInst", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpMemoryModel: { - static InstructionInfo result = { - "OpMemoryModel", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpEntryPoint: { - static InstructionInfo result = { - "OpEntryPoint", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpExecutionMode: { - static InstructionInfo result = { - "OpExecutionMode", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpCapability: { - static InstructionInfo result = { - "OpCapability", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpTypeVoid: { - static InstructionInfo result = { - "OpTypeVoid", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeBool: { - static InstructionInfo result = { - "OpTypeBool", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeInt: { - static InstructionInfo result = { - "OpTypeInt", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeFloat: { - static InstructionInfo result = { - "OpTypeFloat", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeVector: { - static InstructionInfo result = { - "OpTypeVector", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeMatrix: { - static InstructionInfo result = { - "OpTypeMatrix", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeImage: { - static InstructionInfo result = { - "OpTypeImage", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeSampler: { - static InstructionInfo result = { - "OpTypeSampler", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeSampledImage: { - static InstructionInfo result = { - "OpTypeSampledImage", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeArray: { - static InstructionInfo result = { - "OpTypeArray", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeRuntimeArray: { - static InstructionInfo result = { - "OpTypeRuntimeArray", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeStruct: { - static InstructionInfo result = { - "OpTypeStruct", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeOpaque: { - static InstructionInfo result = { - "OpTypeOpaque", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypePointer: { - static InstructionInfo result = { - "OpTypePointer", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeFunction: { - static InstructionInfo result = { - "OpTypeFunction", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeEvent: { - static InstructionInfo result = { - "OpTypeEvent", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeDeviceEvent: { - static InstructionInfo result = { - "OpTypeDeviceEvent", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeReserveId: { - static InstructionInfo result = { - "OpTypeReserveId", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeQueue: { - static InstructionInfo result = { - "OpTypeQueue", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypePipe: { - static InstructionInfo result = { - "OpTypePipe", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpTypeForwardPointer: { - static InstructionInfo result = { - "OpTypeForwardPointer", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpConstantTrue: { - static InstructionInfo result = {"OpConstantTrue", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConstantFalse: { - static InstructionInfo result = {"OpConstantFalse", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConstant: { - static InstructionInfo result = {"OpConstant", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConstantComposite: { - static InstructionInfo result = {"OpConstantComposite", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConstantSampler: { - static InstructionInfo result = {"OpConstantSampler", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConstantNull: { - static InstructionInfo result = {"OpConstantNull", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSpecConstantTrue: { - static InstructionInfo result = {"OpSpecConstantTrue", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSpecConstantFalse: { - static InstructionInfo result = {"OpSpecConstantFalse", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSpecConstant: { - static InstructionInfo result = {"OpSpecConstant", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSpecConstantComposite: { - static InstructionInfo result = {"OpSpecConstantComposite", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSpecConstantOp: { - static InstructionInfo result = {"OpSpecConstantOp", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFunction: { - static InstructionInfo result = {"OpFunction", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFunctionParameter: { - static InstructionInfo result = {"OpFunctionParameter", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFunctionEnd: { - static InstructionInfo result = { - "OpFunctionEnd", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpFunctionCall: { - static InstructionInfo result = {"OpFunctionCall", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpVariable: { - static InstructionInfo result = {"OpVariable", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageTexelPointer: { - static InstructionInfo result = {"OpImageTexelPointer", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLoad: { - static InstructionInfo result = {"OpLoad", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpStore: { - static InstructionInfo result = {"OpStore", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpCopyMemory: { - static InstructionInfo result = { - "OpCopyMemory", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpCopyMemorySized: { - static InstructionInfo result = { - "OpCopyMemorySized", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpAccessChain: { - static InstructionInfo result = {"OpAccessChain", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpInBoundsAccessChain: { - static InstructionInfo result = {"OpInBoundsAccessChain", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpPtrAccessChain: { - static InstructionInfo result = {"OpPtrAccessChain", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpArrayLength: { - static InstructionInfo result = {"OpArrayLength", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGenericPtrMemSemantics: { - static InstructionInfo result = {"OpGenericPtrMemSemantics", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpInBoundsPtrAccessChain: { - static InstructionInfo result = {"OpInBoundsPtrAccessChain", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDecorate: { - static InstructionInfo result = {"OpDecorate", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpMemberDecorate: { - static InstructionInfo result = { - "OpMemberDecorate", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpDecorationGroup: { - static InstructionInfo result = { - "OpDecorationGroup", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpGroupDecorate: { - static InstructionInfo result = { - "OpGroupDecorate", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpGroupMemberDecorate: { - static InstructionInfo result = { - "OpGroupMemberDecorate", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpVectorExtractDynamic: { - static InstructionInfo result = {"OpVectorExtractDynamic", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpVectorInsertDynamic: { - static InstructionInfo result = {"OpVectorInsertDynamic", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpVectorShuffle: { - static InstructionInfo result = {"OpVectorShuffle", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpCompositeConstruct: { - static InstructionInfo result = {"OpCompositeConstruct", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpCompositeExtract: { - static InstructionInfo result = {"OpCompositeExtract", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpCompositeInsert: { - static InstructionInfo result = {"OpCompositeInsert", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpCopyObject: { - static InstructionInfo result = {"OpCopyObject", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpTranspose: { - static InstructionInfo result = {"OpTranspose", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSampledImage: { - static InstructionInfo result = {"OpSampledImage", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleImplicitLod: { - static InstructionInfo result = {"OpImageSampleImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleExplicitLod: { - static InstructionInfo result = {"OpImageSampleExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleDrefImplicitLod: { - static InstructionInfo result = {"OpImageSampleDrefImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleDrefExplicitLod: { - static InstructionInfo result = {"OpImageSampleDrefExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleProjImplicitLod: { - static InstructionInfo result = {"OpImageSampleProjImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleProjExplicitLod: { - static InstructionInfo result = {"OpImageSampleProjExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleProjDrefImplicitLod: { - static InstructionInfo result = {"OpImageSampleProjDrefImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSampleProjDrefExplicitLod: { - static InstructionInfo result = {"OpImageSampleProjDrefExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageFetch: { - static InstructionInfo result = {"OpImageFetch", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageGather: { - static InstructionInfo result = {"OpImageGather", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageDrefGather: { - static InstructionInfo result = {"OpImageDrefGather", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageRead: { - static InstructionInfo result = {"OpImageRead", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageWrite: { - static InstructionInfo result = { - "OpImageWrite", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpImage: { - static InstructionInfo result = {"OpImage", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageQueryFormat: { - static InstructionInfo result = {"OpImageQueryFormat", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageQueryOrder: { - static InstructionInfo result = {"OpImageQueryOrder", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageQuerySizeLod: { - static InstructionInfo result = {"OpImageQuerySizeLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageQuerySize: { - static InstructionInfo result = {"OpImageQuerySize", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageQueryLod: { - static InstructionInfo result = {"OpImageQueryLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageQueryLevels: { - static InstructionInfo result = {"OpImageQueryLevels", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageQuerySamples: { - static InstructionInfo result = {"OpImageQuerySamples", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConvertFToU: { - static InstructionInfo result = {"OpConvertFToU", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConvertFToS: { - static InstructionInfo result = {"OpConvertFToS", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConvertSToF: { - static InstructionInfo result = {"OpConvertSToF", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConvertUToF: { - static InstructionInfo result = {"OpConvertUToF", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpUConvert: { - static InstructionInfo result = {"OpUConvert", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSConvert: { - static InstructionInfo result = {"OpSConvert", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFConvert: { - static InstructionInfo result = {"OpFConvert", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpQuantizeToF16: { - static InstructionInfo result = {"OpQuantizeToF16", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConvertPtrToU: { - static InstructionInfo result = {"OpConvertPtrToU", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSatConvertSToU: { - static InstructionInfo result = {"OpSatConvertSToU", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSatConvertUToS: { - static InstructionInfo result = {"OpSatConvertUToS", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpConvertUToPtr: { - static InstructionInfo result = {"OpConvertUToPtr", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpPtrCastToGeneric: { - static InstructionInfo result = {"OpPtrCastToGeneric", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGenericCastToPtr: { - static InstructionInfo result = {"OpGenericCastToPtr", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGenericCastToPtrExplicit: { - static InstructionInfo result = {"OpGenericCastToPtrExplicit", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitcast: { - static InstructionInfo result = {"OpBitcast", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSNegate: { - static InstructionInfo result = {"OpSNegate", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFNegate: { - static InstructionInfo result = {"OpFNegate", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIAdd: { - static InstructionInfo result = {"OpIAdd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFAdd: { - static InstructionInfo result = {"OpFAdd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpISub: { - static InstructionInfo result = {"OpISub", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFSub: { - static InstructionInfo result = {"OpFSub", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIMul: { - static InstructionInfo result = {"OpIMul", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFMul: { - static InstructionInfo result = {"OpFMul", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpUDiv: { - static InstructionInfo result = {"OpUDiv", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSDiv: { - static InstructionInfo result = {"OpSDiv", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFDiv: { - static InstructionInfo result = {"OpFDiv", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpUMod: { - static InstructionInfo result = {"OpUMod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSRem: { - static InstructionInfo result = {"OpSRem", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSMod: { - static InstructionInfo result = {"OpSMod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFRem: { - static InstructionInfo result = {"OpFRem", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFMod: { - static InstructionInfo result = {"OpFMod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpVectorTimesScalar: { - static InstructionInfo result = {"OpVectorTimesScalar", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpMatrixTimesScalar: { - static InstructionInfo result = {"OpMatrixTimesScalar", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpVectorTimesMatrix: { - static InstructionInfo result = {"OpVectorTimesMatrix", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpMatrixTimesVector: { - static InstructionInfo result = {"OpMatrixTimesVector", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpMatrixTimesMatrix: { - static InstructionInfo result = {"OpMatrixTimesMatrix", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpOuterProduct: { - static InstructionInfo result = {"OpOuterProduct", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDot: { - static InstructionInfo result = {"OpDot", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIAddCarry: { - static InstructionInfo result = {"OpIAddCarry", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpISubBorrow: { - static InstructionInfo result = {"OpISubBorrow", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpUMulExtended: { - static InstructionInfo result = {"OpUMulExtended", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSMulExtended: { - static InstructionInfo result = {"OpSMulExtended", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAny: { - static InstructionInfo result = {"OpAny", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAll: { - static InstructionInfo result = {"OpAll", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIsNan: { - static InstructionInfo result = {"OpIsNan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIsInf: { - static InstructionInfo result = {"OpIsInf", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIsFinite: { - static InstructionInfo result = {"OpIsFinite", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIsNormal: { - static InstructionInfo result = {"OpIsNormal", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSignBitSet: { - static InstructionInfo result = {"OpSignBitSet", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLessOrGreater: { - static InstructionInfo result = {"OpLessOrGreater", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpOrdered: { - static InstructionInfo result = {"OpOrdered", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpUnordered: { - static InstructionInfo result = {"OpUnordered", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLogicalEqual: { - static InstructionInfo result = {"OpLogicalEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLogicalNotEqual: { - static InstructionInfo result = {"OpLogicalNotEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLogicalOr: { - static InstructionInfo result = {"OpLogicalOr", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLogicalAnd: { - static InstructionInfo result = {"OpLogicalAnd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLogicalNot: { - static InstructionInfo result = {"OpLogicalNot", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSelect: { - static InstructionInfo result = {"OpSelect", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIEqual: { - static InstructionInfo result = {"OpIEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpINotEqual: { - static InstructionInfo result = {"OpINotEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpUGreaterThan: { - static InstructionInfo result = {"OpUGreaterThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSGreaterThan: { - static InstructionInfo result = {"OpSGreaterThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpUGreaterThanEqual: { - static InstructionInfo result = {"OpUGreaterThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSGreaterThanEqual: { - static InstructionInfo result = {"OpSGreaterThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpULessThan: { - static InstructionInfo result = {"OpULessThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSLessThan: { - static InstructionInfo result = {"OpSLessThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpULessThanEqual: { - static InstructionInfo result = {"OpULessThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSLessThanEqual: { - static InstructionInfo result = {"OpSLessThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFOrdEqual: { - static InstructionInfo result = {"OpFOrdEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFUnordEqual: { - static InstructionInfo result = {"OpFUnordEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFOrdNotEqual: { - static InstructionInfo result = {"OpFOrdNotEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFUnordNotEqual: { - static InstructionInfo result = {"OpFUnordNotEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFOrdLessThan: { - static InstructionInfo result = {"OpFOrdLessThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFUnordLessThan: { - static InstructionInfo result = {"OpFUnordLessThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFOrdGreaterThan: { - static InstructionInfo result = {"OpFOrdGreaterThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFUnordGreaterThan: { - static InstructionInfo result = {"OpFUnordGreaterThan", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFOrdLessThanEqual: { - static InstructionInfo result = {"OpFOrdLessThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFUnordLessThanEqual: { - static InstructionInfo result = {"OpFUnordLessThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFOrdGreaterThanEqual: { - static InstructionInfo result = {"OpFOrdGreaterThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFUnordGreaterThanEqual: { - static InstructionInfo result = {"OpFUnordGreaterThanEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpShiftRightLogical: { - static InstructionInfo result = {"OpShiftRightLogical", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpShiftRightArithmetic: { - static InstructionInfo result = {"OpShiftRightArithmetic", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpShiftLeftLogical: { - static InstructionInfo result = {"OpShiftLeftLogical", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitwiseOr: { - static InstructionInfo result = {"OpBitwiseOr", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitwiseXor: { - static InstructionInfo result = {"OpBitwiseXor", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitwiseAnd: { - static InstructionInfo result = {"OpBitwiseAnd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpNot: { - static InstructionInfo result = {"OpNot", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitFieldInsert: { - static InstructionInfo result = {"OpBitFieldInsert", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitFieldSExtract: { - static InstructionInfo result = {"OpBitFieldSExtract", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitFieldUExtract: { - static InstructionInfo result = {"OpBitFieldUExtract", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitReverse: { - static InstructionInfo result = {"OpBitReverse", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBitCount: { - static InstructionInfo result = {"OpBitCount", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDPdx: { - static InstructionInfo result = {"OpDPdx", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDPdy: { - static InstructionInfo result = {"OpDPdy", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFwidth: { - static InstructionInfo result = {"OpFwidth", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDPdxFine: { - static InstructionInfo result = {"OpDPdxFine", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDPdyFine: { - static InstructionInfo result = {"OpDPdyFine", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFwidthFine: { - static InstructionInfo result = {"OpFwidthFine", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDPdxCoarse: { - static InstructionInfo result = {"OpDPdxCoarse", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpDPdyCoarse: { - static InstructionInfo result = {"OpDPdyCoarse", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpFwidthCoarse: { - static InstructionInfo result = {"OpFwidthCoarse", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpEmitVertex: { - static InstructionInfo result = { - "OpEmitVertex", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpEndPrimitive: { - static InstructionInfo result = { - "OpEndPrimitive", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpEmitStreamVertex: { - static InstructionInfo result = { - "OpEmitStreamVertex", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpEndStreamPrimitive: { - static InstructionInfo result = { - "OpEndStreamPrimitive", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpControlBarrier: { - static InstructionInfo result = { - "OpControlBarrier", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpMemoryBarrier: { - static InstructionInfo result = { - "OpMemoryBarrier", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpAtomicLoad: { - static InstructionInfo result = {"OpAtomicLoad", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicStore: { - static InstructionInfo result = { - "OpAtomicStore", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpAtomicExchange: { - static InstructionInfo result = {"OpAtomicExchange", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicCompareExchange: { - static InstructionInfo result = {"OpAtomicCompareExchange", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicCompareExchangeWeak: { - static InstructionInfo result = {"OpAtomicCompareExchangeWeak", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicIIncrement: { - static InstructionInfo result = {"OpAtomicIIncrement", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicIDecrement: { - static InstructionInfo result = {"OpAtomicIDecrement", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicIAdd: { - static InstructionInfo result = {"OpAtomicIAdd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicISub: { - static InstructionInfo result = {"OpAtomicISub", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicSMin: { - static InstructionInfo result = {"OpAtomicSMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicUMin: { - static InstructionInfo result = {"OpAtomicUMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicSMax: { - static InstructionInfo result = {"OpAtomicSMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicUMax: { - static InstructionInfo result = {"OpAtomicUMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicAnd: { - static InstructionInfo result = {"OpAtomicAnd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicOr: { - static InstructionInfo result = {"OpAtomicOr", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicXor: { - static InstructionInfo result = {"OpAtomicXor", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpPhi: { - static InstructionInfo result = {"OpPhi", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpLoopMerge: { - static InstructionInfo result = {"OpLoopMerge", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpSelectionMerge: { - static InstructionInfo result = { - "OpSelectionMerge", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpLabel: { - static InstructionInfo result = { - "OpLabel", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpBranch: { - static InstructionInfo result = {"OpBranch", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpBranchConditional: { - static InstructionInfo result = { - "OpBranchConditional", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpSwitch: { - static InstructionInfo result = {"OpSwitch", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpKill: { - static InstructionInfo result = {"OpKill", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpReturn: { - static InstructionInfo result = {"OpReturn", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpReturnValue: { - static InstructionInfo result = { - "OpReturnValue", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpUnreachable: { - static InstructionInfo result = { - "OpUnreachable", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpLifetimeStart: { - static InstructionInfo result = { - "OpLifetimeStart", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpLifetimeStop: { - static InstructionInfo result = { - "OpLifetimeStop", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpGroupAsyncCopy: { - static InstructionInfo result = {"OpGroupAsyncCopy", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupWaitEvents: { - static InstructionInfo result = { - "OpGroupWaitEvents", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpGroupAll: { - static InstructionInfo result = {"OpGroupAll", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupAny: { - static InstructionInfo result = {"OpGroupAny", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupBroadcast: { - static InstructionInfo result = {"OpGroupBroadcast", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupIAdd: { - static InstructionInfo result = {"OpGroupIAdd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupFAdd: { - static InstructionInfo result = {"OpGroupFAdd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupFMin: { - static InstructionInfo result = {"OpGroupFMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupUMin: { - static InstructionInfo result = {"OpGroupUMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupSMin: { - static InstructionInfo result = {"OpGroupSMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupFMax: { - static InstructionInfo result = {"OpGroupFMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupUMax: { - static InstructionInfo result = {"OpGroupUMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupSMax: { - static InstructionInfo result = {"OpGroupSMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpReadPipe: { - static InstructionInfo result = {"OpReadPipe", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpWritePipe: { - static InstructionInfo result = {"OpWritePipe", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpReservedReadPipe: { - static InstructionInfo result = {"OpReservedReadPipe", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpReservedWritePipe: { - static InstructionInfo result = {"OpReservedWritePipe", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpReserveReadPipePackets: { - static InstructionInfo result = {"OpReserveReadPipePackets", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpReserveWritePipePackets: { - static InstructionInfo result = {"OpReserveWritePipePackets", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpCommitReadPipe: { - static InstructionInfo result = { - "OpCommitReadPipe", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpCommitWritePipe: { - static InstructionInfo result = { - "OpCommitWritePipe", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpIsValidReserveId: { - static InstructionInfo result = {"OpIsValidReserveId", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetNumPipePackets: { - static InstructionInfo result = {"OpGetNumPipePackets", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetMaxPipePackets: { - static InstructionInfo result = {"OpGetMaxPipePackets", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupReserveReadPipePackets: { - static InstructionInfo result = {"OpGroupReserveReadPipePackets", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupReserveWritePipePackets: { - static InstructionInfo result = {"OpGroupReserveWritePipePackets", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupCommitReadPipe: { - static InstructionInfo result = { - "OpGroupCommitReadPipe", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpGroupCommitWritePipe: { - static InstructionInfo result = { - "OpGroupCommitWritePipe", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpEnqueueMarker: { - static InstructionInfo result = {"OpEnqueueMarker", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpEnqueueKernel: { - static InstructionInfo result = {"OpEnqueueKernel", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetKernelNDrangeSubGroupCount: { - static InstructionInfo result = {"OpGetKernelNDrangeSubGroupCount", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetKernelNDrangeMaxSubGroupSize: { - static InstructionInfo result = {"OpGetKernelNDrangeMaxSubGroupSize", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetKernelWorkGroupSize: { - static InstructionInfo result = {"OpGetKernelWorkGroupSize", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetKernelPreferredWorkGroupSizeMultiple: { - static InstructionInfo result = { - "OpGetKernelPreferredWorkGroupSizeMultiple", - InstructionFlags::HasResult | InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpRetainEvent: { - static InstructionInfo result = { - "OpRetainEvent", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpReleaseEvent: { - static InstructionInfo result = { - "OpReleaseEvent", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpCreateUserEvent: { - static InstructionInfo result = {"OpCreateUserEvent", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpIsValidEvent: { - static InstructionInfo result = {"OpIsValidEvent", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSetUserEventStatus: { - static InstructionInfo result = { - "OpSetUserEventStatus", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpCaptureEventProfilingInfo: { - static InstructionInfo result = { - "OpCaptureEventProfilingInfo", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpGetDefaultQueue: { - static InstructionInfo result = {"OpGetDefaultQueue", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpBuildNDRange: { - static InstructionInfo result = {"OpBuildNDRange", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleImplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleExplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleDrefImplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleDrefImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleDrefExplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleDrefExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleProjImplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleProjImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleProjExplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleProjExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleProjDrefImplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleProjDrefImplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseSampleProjDrefExplicitLod: { - static InstructionInfo result = {"OpImageSparseSampleProjDrefExplicitLod", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseFetch: { - static InstructionInfo result = {"OpImageSparseFetch", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseGather: { - static InstructionInfo result = {"OpImageSparseGather", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseDrefGather: { - static InstructionInfo result = {"OpImageSparseDrefGather", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpImageSparseTexelsResident: { - static InstructionInfo result = {"OpImageSparseTexelsResident", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpNoLine: { - static InstructionInfo result = {"OpNoLine", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpAtomicFlagTestAndSet: { - static InstructionInfo result = {"OpAtomicFlagTestAndSet", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpAtomicFlagClear: { - static InstructionInfo result = { - "OpAtomicFlagClear", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpImageSparseRead: { - static InstructionInfo result = {"OpImageSparseRead", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpSizeOf: { - static InstructionInfo result = {"OpSizeOf", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpTypePipeStorage: { - static InstructionInfo result = { - "OpTypePipeStorage", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpConstantPipeStorage: { - static InstructionInfo result = {"OpConstantPipeStorage", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpCreatePipeFromPipeStorage: { - static InstructionInfo result = {"OpCreatePipeFromPipeStorage", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetKernelLocalSizeForSubgroupCount: { - static InstructionInfo result = {"OpGetKernelLocalSizeForSubgroupCount", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGetKernelMaxNumSubgroups: { - static InstructionInfo result = {"OpGetKernelMaxNumSubgroups", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpTypeNamedBarrier: { - static InstructionInfo result = { - "OpTypeNamedBarrier", InstructionFlags::HasResult, {}}; - return &result; - } - case spv::Op::OpNamedBarrierInitialize: { - static InstructionInfo result = {"OpNamedBarrierInitialize", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpMemoryNamedBarrier: { - static InstructionInfo result = { - "OpMemoryNamedBarrier", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpModuleProcessed: { - static InstructionInfo result = { - "OpModuleProcessed", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpExecutionModeId: { - static InstructionInfo result = { - "OpExecutionModeId", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpDecorateId: { - static InstructionInfo result = { - "OpDecorateId", InstructionFlags::None, {}}; - return &result; - } - case spv::Op::OpGroupNonUniformElect: { - static InstructionInfo result = {"OpGroupNonUniformElect", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformAll: { - static InstructionInfo result = {"OpGroupNonUniformAll", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformAny: { - static InstructionInfo result = {"OpGroupNonUniformAny", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformAllEqual: { - static InstructionInfo result = {"OpGroupNonUniformAllEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBroadcast: { - static InstructionInfo result = {"OpGroupNonUniformBroadcast", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBroadcastFirst: { - static InstructionInfo result = {"OpGroupNonUniformBroadcastFirst", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBallot: { - static InstructionInfo result = {"OpGroupNonUniformBallot", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformInverseBallot: { - static InstructionInfo result = {"OpGroupNonUniformInverseBallot", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBallotBitExtract: { - static InstructionInfo result = {"OpGroupNonUniformBallotBitExtract", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBallotBitCount: { - static InstructionInfo result = {"OpGroupNonUniformBallotBitCount", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBallotFindLSB: { - static InstructionInfo result = {"OpGroupNonUniformBallotFindLSB", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBallotFindMSB: { - static InstructionInfo result = {"OpGroupNonUniformBallotFindMSB", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformShuffle: { - static InstructionInfo result = {"OpGroupNonUniformShuffle", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformShuffleXor: { - static InstructionInfo result = {"OpGroupNonUniformShuffleXor", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformShuffleUp: { - static InstructionInfo result = {"OpGroupNonUniformShuffleUp", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformShuffleDown: { - static InstructionInfo result = {"OpGroupNonUniformShuffleDown", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformIAdd: { - static InstructionInfo result = {"OpGroupNonUniformIAdd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformFAdd: { - static InstructionInfo result = {"OpGroupNonUniformFAdd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformIMul: { - static InstructionInfo result = {"OpGroupNonUniformIMul", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformFMul: { - static InstructionInfo result = {"OpGroupNonUniformFMul", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformSMin: { - static InstructionInfo result = {"OpGroupNonUniformSMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformUMin: { - static InstructionInfo result = {"OpGroupNonUniformUMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformFMin: { - static InstructionInfo result = {"OpGroupNonUniformFMin", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformSMax: { - static InstructionInfo result = {"OpGroupNonUniformSMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformUMax: { - static InstructionInfo result = {"OpGroupNonUniformUMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformFMax: { - static InstructionInfo result = {"OpGroupNonUniformFMax", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBitwiseAnd: { - static InstructionInfo result = {"OpGroupNonUniformBitwiseAnd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBitwiseOr: { - static InstructionInfo result = {"OpGroupNonUniformBitwiseOr", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformBitwiseXor: { - static InstructionInfo result = {"OpGroupNonUniformBitwiseXor", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformLogicalAnd: { - static InstructionInfo result = {"OpGroupNonUniformLogicalAnd", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformLogicalOr: { - static InstructionInfo result = {"OpGroupNonUniformLogicalOr", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformLogicalXor: { - static InstructionInfo result = {"OpGroupNonUniformLogicalXor", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformQuadBroadcast: { - static InstructionInfo result = {"OpGroupNonUniformQuadBroadcast", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpGroupNonUniformQuadSwap: { - static InstructionInfo result = {"OpGroupNonUniformQuadSwap", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpCopyLogical: { - static InstructionInfo result = {"OpCopyLogical", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpPtrEqual: { - static InstructionInfo result = {"OpPtrEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpPtrNotEqual: { - static InstructionInfo result = {"OpPtrNotEqual", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - case spv::Op::OpPtrDiff: { - static InstructionInfo result = {"OpPtrDiff", - InstructionFlags::HasResult | - InstructionFlags::HasResultType, - {}}; - return &result; - } - } - - return nullptr; -} - -inline void dump(std::span range, - void (*printId)(std::uint32_t id) = nullptr) { - if (printId == nullptr) { - printId = [](uint32_t id) { std::printf("%%%u", id); }; - } - - while (!range.empty()) { - auto opWordCount = range[0]; - auto op = static_cast(opWordCount & spv::OpCodeMask); - auto wordCount = opWordCount >> spv::WordCountShift; - - if (range.size() < wordCount || wordCount == 0) { - std::printf("\n"); - - for (auto word : range) { - std::printf("%08x ", (unsigned)word); - } - - std::printf("\n"); - - break; - } - - auto info = getInstructionInfo(op); - - if (info == nullptr) { - std::printf("unknown instruction\n"); - range = range.subspan(wordCount); - continue; - } - - auto word = range.data() + 1; - auto wordEnd = range.data() + wordCount; - bool isFirst = true; - - if ((info->flags & InstructionFlags::HasResult) == - InstructionFlags::HasResult) { - std::uint32_t outputTypeId = 0; - - if ((info->flags & InstructionFlags::HasResultType) == - InstructionFlags::HasResultType) { - if (word < wordEnd) { - outputTypeId = *word++; - } - } - - std::uint32_t outputId = word < wordEnd ? *word++ : 0; - - printId(outputId); - if ((info->flags & InstructionFlags::HasResultType) == - InstructionFlags::HasResultType) { - std::printf(": "); - printId(outputTypeId); - } - - std::printf(" = "); - } - - std::printf("%s(", info->name); - - for (auto &op : std::span(info->operands)) { - if (op == OperandKind::Invalid) { - break; - } - - if (word >= wordEnd) { - if (op == OperandKind::VariadicWord || op == OperandKind::VariadicId) { - break; - } - - std::printf("\n"); - break; - } - - auto currentWord = *word++; - - if (isFirst) { - isFirst = false; - } else { - std::printf(", "); - } - - if (op == OperandKind::VariadicId || op == OperandKind::TypeId || - op == OperandKind::ValueId) { - printId(currentWord); - } else if (op == OperandKind::Word || op == OperandKind::VariadicWord) { - std::printf("%u", currentWord); - } else if (op == OperandKind::String) { - bool foundEnd = false; - while (true) { - if (reinterpret_cast(currentWord)[3] == '\0') { - foundEnd = true; - break; - } - - if (word >= wordEnd) { - break; - } - - currentWord = *word++; - } - - if (foundEnd) { - std::printf("'%s'", reinterpret_cast(word - 1)); - } else { - std::printf(""); - } - } else { - std::printf(""); - } - } - - while (word < wordEnd) { - if (isFirst) { - isFirst = false; - } else { - std::printf(", "); - } - - auto currentWord = *word++; - - std::printf("%u", currentWord); - } - - std::printf(")\n"); - range = range.subspan(wordCount); - } -} -} // namespace spirv \ No newline at end of file diff --git a/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp b/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp deleted file mode 100644 index 242b2e0..0000000 --- a/hw/amdgpu/lib/libspirv/include/spirv/spirv.hpp +++ /dev/null @@ -1,4968 +0,0 @@ -// Copyright (c) 2014-2020 The Khronos Group Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. - -// This header is automatically generated by the same tool that creates -// the Binary Section of the SPIR-V specification. - -// Enumeration tokens for SPIR-V, in various styles: -// C, C++, C++11, JSON, Lua, Python, C#, D, Beef -// -// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -// - C++11 will use enum classes in the spv namespace, e.g.: -// spv::SourceLanguage::GLSL -// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -// - C# will use enum classes in the Specification class located in the "Spv" -// namespace, -// e.g.: Spv.Specification.SourceLanguage.GLSL -// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL -// - Beef will use enum classes in the Specification class located in the "Spv" -// namespace, -// e.g.: Spv.Specification.SourceLanguage.GLSL -// -// Some tokens act like mask values, which can be OR'd together, -// while others are mutually exclusive. The mask-like ones have -// "Mask" in their name, and a parallel enum that has the shift -// amount (1 << x) for each corresponding enumerant. - -#ifndef spirv_HPP -#define spirv_HPP - -namespace spv { - -typedef unsigned int Id; - -#define SPV_VERSION 0x10600 -#define SPV_REVISION 1 - -static const unsigned int MagicNumber = 0x07230203; -static const unsigned int Version = 0x00010600; -static const unsigned int Revision = 1; -static const unsigned int OpCodeMask = 0xffff; -static const unsigned int WordCountShift = 16; - -enum class SourceLanguage : unsigned { - Unknown = 0, - ESSL = 1, - GLSL = 2, - OpenCL_C = 3, - OpenCL_CPP = 4, - HLSL = 5, - CPP_for_OpenCL = 6, - SYCL = 7, - HERO_C = 8, - Max = 0x7fffffff, -}; - -enum class ExecutionModel : unsigned { - Vertex = 0, - TessellationControl = 1, - TessellationEvaluation = 2, - Geometry = 3, - Fragment = 4, - GLCompute = 5, - Kernel = 6, - TaskNV = 5267, - MeshNV = 5268, - RayGenerationKHR = 5313, - RayGenerationNV = 5313, - IntersectionKHR = 5314, - IntersectionNV = 5314, - AnyHitKHR = 5315, - AnyHitNV = 5315, - ClosestHitKHR = 5316, - ClosestHitNV = 5316, - MissKHR = 5317, - MissNV = 5317, - CallableKHR = 5318, - CallableNV = 5318, - TaskEXT = 5364, - MeshEXT = 5365, - Max = 0x7fffffff, -}; - -enum class AddressingModel : unsigned { - Logical = 0, - Physical32 = 1, - Physical64 = 2, - PhysicalStorageBuffer64 = 5348, - PhysicalStorageBuffer64EXT = 5348, - Max = 0x7fffffff, -}; - -enum class MemoryModel : unsigned { - Simple = 0, - GLSL450 = 1, - OpenCL = 2, - Vulkan = 3, - VulkanKHR = 3, - Max = 0x7fffffff, -}; - -enum class ExecutionMode : unsigned { - Invocations = 0, - SpacingEqual = 1, - SpacingFractionalEven = 2, - SpacingFractionalOdd = 3, - VertexOrderCw = 4, - VertexOrderCcw = 5, - PixelCenterInteger = 6, - OriginUpperLeft = 7, - OriginLowerLeft = 8, - EarlyFragmentTests = 9, - PointMode = 10, - Xfb = 11, - DepthReplacing = 12, - DepthGreater = 14, - DepthLess = 15, - DepthUnchanged = 16, - LocalSize = 17, - LocalSizeHint = 18, - InputPoints = 19, - InputLines = 20, - InputLinesAdjacency = 21, - Triangles = 22, - InputTrianglesAdjacency = 23, - Quads = 24, - Isolines = 25, - OutputVertices = 26, - OutputPoints = 27, - OutputLineStrip = 28, - OutputTriangleStrip = 29, - VecTypeHint = 30, - ContractionOff = 31, - Initializer = 33, - Finalizer = 34, - SubgroupSize = 35, - SubgroupsPerWorkgroup = 36, - SubgroupsPerWorkgroupId = 37, - LocalSizeId = 38, - LocalSizeHintId = 39, - NonCoherentColorAttachmentReadEXT = 4169, - NonCoherentDepthAttachmentReadEXT = 4170, - NonCoherentStencilAttachmentReadEXT = 4171, - SubgroupUniformControlFlowKHR = 4421, - PostDepthCoverage = 4446, - DenormPreserve = 4459, - DenormFlushToZero = 4460, - SignedZeroInfNanPreserve = 4461, - RoundingModeRTE = 4462, - RoundingModeRTZ = 4463, - EarlyAndLateFragmentTestsAMD = 5017, - StencilRefReplacingEXT = 5027, - StencilRefUnchangedFrontAMD = 5079, - StencilRefGreaterFrontAMD = 5080, - StencilRefLessFrontAMD = 5081, - StencilRefUnchangedBackAMD = 5082, - StencilRefGreaterBackAMD = 5083, - StencilRefLessBackAMD = 5084, - OutputLinesEXT = 5269, - OutputLinesNV = 5269, - OutputPrimitivesEXT = 5270, - OutputPrimitivesNV = 5270, - DerivativeGroupQuadsNV = 5289, - DerivativeGroupLinearNV = 5290, - OutputTrianglesEXT = 5298, - OutputTrianglesNV = 5298, - PixelInterlockOrderedEXT = 5366, - PixelInterlockUnorderedEXT = 5367, - SampleInterlockOrderedEXT = 5368, - SampleInterlockUnorderedEXT = 5369, - ShadingRateInterlockOrderedEXT = 5370, - ShadingRateInterlockUnorderedEXT = 5371, - SharedLocalMemorySizeINTEL = 5618, - RoundingModeRTPINTEL = 5620, - RoundingModeRTNINTEL = 5621, - FloatingPointModeALTINTEL = 5622, - FloatingPointModeIEEEINTEL = 5623, - MaxWorkgroupSizeINTEL = 5893, - MaxWorkDimINTEL = 5894, - NoGlobalOffsetINTEL = 5895, - NumSIMDWorkitemsINTEL = 5896, - SchedulerTargetFmaxMhzINTEL = 5903, - StreamingInterfaceINTEL = 6154, - RegisterMapInterfaceINTEL = 6160, - NamedBarrierCountINTEL = 6417, - Max = 0x7fffffff, -}; - -enum class StorageClass : unsigned { - UniformConstant = 0, - Input = 1, - Uniform = 2, - Output = 3, - Workgroup = 4, - CrossWorkgroup = 5, - Private = 6, - Function = 7, - Generic = 8, - PushConstant = 9, - AtomicCounter = 10, - Image = 11, - StorageBuffer = 12, - TileImageEXT = 4172, - CallableDataKHR = 5328, - CallableDataNV = 5328, - IncomingCallableDataKHR = 5329, - IncomingCallableDataNV = 5329, - RayPayloadKHR = 5338, - RayPayloadNV = 5338, - HitAttributeKHR = 5339, - HitAttributeNV = 5339, - IncomingRayPayloadKHR = 5342, - IncomingRayPayloadNV = 5342, - ShaderRecordBufferKHR = 5343, - ShaderRecordBufferNV = 5343, - PhysicalStorageBuffer = 5349, - PhysicalStorageBufferEXT = 5349, - HitObjectAttributeNV = 5385, - TaskPayloadWorkgroupEXT = 5402, - CodeSectionINTEL = 5605, - DeviceOnlyINTEL = 5936, - HostOnlyINTEL = 5937, - Max = 0x7fffffff, -}; - -enum class Dim : unsigned { - Dim1D = 0, - Dim2D = 1, - Dim3D = 2, - Cube = 3, - Rect = 4, - Buffer = 5, - SubpassData = 6, - TileImageDataEXT = 4173, - Max = 0x7fffffff, -}; - -enum class SamplerAddressingMode : unsigned { - None = 0, - ClampToEdge = 1, - Clamp = 2, - Repeat = 3, - RepeatMirrored = 4, - Max = 0x7fffffff, -}; - -enum class SamplerFilterMode : unsigned { - Nearest = 0, - Linear = 1, - Max = 0x7fffffff, -}; - -enum class ImageFormat : unsigned { - Unknown = 0, - Rgba32f = 1, - Rgba16f = 2, - R32f = 3, - Rgba8 = 4, - Rgba8Snorm = 5, - Rg32f = 6, - Rg16f = 7, - R11fG11fB10f = 8, - R16f = 9, - Rgba16 = 10, - Rgb10A2 = 11, - Rg16 = 12, - Rg8 = 13, - R16 = 14, - R8 = 15, - Rgba16Snorm = 16, - Rg16Snorm = 17, - Rg8Snorm = 18, - R16Snorm = 19, - R8Snorm = 20, - Rgba32i = 21, - Rgba16i = 22, - Rgba8i = 23, - R32i = 24, - Rg32i = 25, - Rg16i = 26, - Rg8i = 27, - R16i = 28, - R8i = 29, - Rgba32ui = 30, - Rgba16ui = 31, - Rgba8ui = 32, - R32ui = 33, - Rgb10a2ui = 34, - Rg32ui = 35, - Rg16ui = 36, - Rg8ui = 37, - R16ui = 38, - R8ui = 39, - R64ui = 40, - R64i = 41, - Max = 0x7fffffff, -}; - -enum class ImageChannelOrder : unsigned { - R = 0, - A = 1, - RG = 2, - RA = 3, - RGB = 4, - RGBA = 5, - BGRA = 6, - ARGB = 7, - Intensity = 8, - Luminance = 9, - Rx = 10, - RGx = 11, - RGBx = 12, - Depth = 13, - DepthStencil = 14, - sRGB = 15, - sRGBx = 16, - sRGBA = 17, - sBGRA = 18, - ABGR = 19, - Max = 0x7fffffff, -}; - -enum class ImageChannelDataType : unsigned { - SnormInt8 = 0, - SnormInt16 = 1, - UnormInt8 = 2, - UnormInt16 = 3, - UnormShort565 = 4, - UnormShort555 = 5, - UnormInt101010 = 6, - SignedInt8 = 7, - SignedInt16 = 8, - SignedInt32 = 9, - UnsignedInt8 = 10, - UnsignedInt16 = 11, - UnsignedInt32 = 12, - HalfFloat = 13, - Float = 14, - UnormInt24 = 15, - UnormInt101010_2 = 16, - Max = 0x7fffffff, -}; - -enum class ImageOperandsShift : unsigned { - Bias = 0, - Lod = 1, - Grad = 2, - ConstOffset = 3, - Offset = 4, - ConstOffsets = 5, - Sample = 6, - MinLod = 7, - MakeTexelAvailable = 8, - MakeTexelAvailableKHR = 8, - MakeTexelVisible = 9, - MakeTexelVisibleKHR = 9, - NonPrivateTexel = 10, - NonPrivateTexelKHR = 10, - VolatileTexel = 11, - VolatileTexelKHR = 11, - SignExtend = 12, - ZeroExtend = 13, - Nontemporal = 14, - Offsets = 16, - Max = 0x7fffffff, -}; - -enum class ImageOperandsMask : unsigned { - MaskNone = 0, - Bias = 0x00000001, - Lod = 0x00000002, - Grad = 0x00000004, - ConstOffset = 0x00000008, - Offset = 0x00000010, - ConstOffsets = 0x00000020, - Sample = 0x00000040, - MinLod = 0x00000080, - MakeTexelAvailable = 0x00000100, - MakeTexelAvailableKHR = 0x00000100, - MakeTexelVisible = 0x00000200, - MakeTexelVisibleKHR = 0x00000200, - NonPrivateTexel = 0x00000400, - NonPrivateTexelKHR = 0x00000400, - VolatileTexel = 0x00000800, - VolatileTexelKHR = 0x00000800, - SignExtend = 0x00001000, - ZeroExtend = 0x00002000, - Nontemporal = 0x00004000, - Offsets = 0x00010000, -}; - -enum class FPFastMathModeShift : unsigned { - NotNaN = 0, - NotInf = 1, - NSZ = 2, - AllowRecip = 3, - Fast = 4, - AllowContractFastINTEL = 16, - AllowReassocINTEL = 17, - Max = 0x7fffffff, -}; - -enum class FPFastMathModeMask : unsigned { - MaskNone = 0, - NotNaN = 0x00000001, - NotInf = 0x00000002, - NSZ = 0x00000004, - AllowRecip = 0x00000008, - Fast = 0x00000010, - AllowContractFastINTEL = 0x00010000, - AllowReassocINTEL = 0x00020000, -}; - -enum class FPRoundingMode : unsigned { - RTE = 0, - RTZ = 1, - RTP = 2, - RTN = 3, - Max = 0x7fffffff, -}; - -enum class LinkageType : unsigned { - Export = 0, - Import = 1, - LinkOnceODR = 2, - Max = 0x7fffffff, -}; - -enum class AccessQualifier : unsigned { - ReadOnly = 0, - WriteOnly = 1, - ReadWrite = 2, - Max = 0x7fffffff, -}; - -enum class FunctionParameterAttribute : unsigned { - Zext = 0, - Sext = 1, - ByVal = 2, - Sret = 3, - NoAlias = 4, - NoCapture = 5, - NoWrite = 6, - NoReadWrite = 7, - RuntimeAlignedINTEL = 5940, - Max = 0x7fffffff, -}; - -enum class Decoration : unsigned { - RelaxedPrecision = 0, - SpecId = 1, - Block = 2, - BufferBlock = 3, - RowMajor = 4, - ColMajor = 5, - ArrayStride = 6, - MatrixStride = 7, - GLSLShared = 8, - GLSLPacked = 9, - CPacked = 10, - BuiltIn = 11, - NoPerspective = 13, - Flat = 14, - Patch = 15, - Centroid = 16, - Sample = 17, - Invariant = 18, - Restrict = 19, - Aliased = 20, - Volatile = 21, - Constant = 22, - Coherent = 23, - NonWritable = 24, - NonReadable = 25, - Uniform = 26, - UniformId = 27, - SaturatedConversion = 28, - Stream = 29, - Location = 30, - Component = 31, - Index = 32, - Binding = 33, - DescriptorSet = 34, - Offset = 35, - XfbBuffer = 36, - XfbStride = 37, - FuncParamAttr = 38, - FPRoundingMode = 39, - FPFastMathMode = 40, - LinkageAttributes = 41, - NoContraction = 42, - InputAttachmentIndex = 43, - Alignment = 44, - MaxByteOffset = 45, - AlignmentId = 46, - MaxByteOffsetId = 47, - NoSignedWrap = 4469, - NoUnsignedWrap = 4470, - WeightTextureQCOM = 4487, - BlockMatchTextureQCOM = 4488, - ExplicitInterpAMD = 4999, - OverrideCoverageNV = 5248, - PassthroughNV = 5250, - ViewportRelativeNV = 5252, - SecondaryViewportRelativeNV = 5256, - PerPrimitiveEXT = 5271, - PerPrimitiveNV = 5271, - PerViewNV = 5272, - PerTaskNV = 5273, - PerVertexKHR = 5285, - PerVertexNV = 5285, - NonUniform = 5300, - NonUniformEXT = 5300, - RestrictPointer = 5355, - RestrictPointerEXT = 5355, - AliasedPointer = 5356, - AliasedPointerEXT = 5356, - HitObjectShaderRecordBufferNV = 5386, - BindlessSamplerNV = 5398, - BindlessImageNV = 5399, - BoundSamplerNV = 5400, - BoundImageNV = 5401, - SIMTCallINTEL = 5599, - ReferencedIndirectlyINTEL = 5602, - ClobberINTEL = 5607, - SideEffectsINTEL = 5608, - VectorComputeVariableINTEL = 5624, - FuncParamIOKindINTEL = 5625, - VectorComputeFunctionINTEL = 5626, - StackCallINTEL = 5627, - GlobalVariableOffsetINTEL = 5628, - CounterBuffer = 5634, - HlslCounterBufferGOOGLE = 5634, - HlslSemanticGOOGLE = 5635, - UserSemantic = 5635, - UserTypeGOOGLE = 5636, - FunctionRoundingModeINTEL = 5822, - FunctionDenormModeINTEL = 5823, - RegisterINTEL = 5825, - MemoryINTEL = 5826, - NumbanksINTEL = 5827, - BankwidthINTEL = 5828, - MaxPrivateCopiesINTEL = 5829, - SinglepumpINTEL = 5830, - DoublepumpINTEL = 5831, - MaxReplicatesINTEL = 5832, - SimpleDualPortINTEL = 5833, - MergeINTEL = 5834, - BankBitsINTEL = 5835, - ForcePow2DepthINTEL = 5836, - BurstCoalesceINTEL = 5899, - CacheSizeINTEL = 5900, - DontStaticallyCoalesceINTEL = 5901, - PrefetchINTEL = 5902, - StallEnableINTEL = 5905, - FuseLoopsInFunctionINTEL = 5907, - MathOpDSPModeINTEL = 5909, - AliasScopeINTEL = 5914, - NoAliasINTEL = 5915, - InitiationIntervalINTEL = 5917, - MaxConcurrencyINTEL = 5918, - PipelineEnableINTEL = 5919, - BufferLocationINTEL = 5921, - IOPipeStorageINTEL = 5944, - FunctionFloatingPointModeINTEL = 6080, - SingleElementVectorINTEL = 6085, - VectorComputeCallableFunctionINTEL = 6087, - MediaBlockIOINTEL = 6140, - LatencyControlLabelINTEL = 6172, - LatencyControlConstraintINTEL = 6173, - ConduitKernelArgumentINTEL = 6175, - RegisterMapKernelArgumentINTEL = 6176, - MMHostInterfaceAddressWidthINTEL = 6177, - MMHostInterfaceDataWidthINTEL = 6178, - MMHostInterfaceLatencyINTEL = 6179, - MMHostInterfaceReadWriteModeINTEL = 6180, - MMHostInterfaceMaxBurstINTEL = 6181, - MMHostInterfaceWaitRequestINTEL = 6182, - StableKernelArgumentINTEL = 6183, - Max = 0x7fffffff, -}; - -enum class BuiltIn : unsigned { - Position = 0, - PointSize = 1, - ClipDistance = 3, - CullDistance = 4, - VertexId = 5, - InstanceId = 6, - PrimitiveId = 7, - InvocationId = 8, - Layer = 9, - ViewportIndex = 10, - TessLevelOuter = 11, - TessLevelInner = 12, - TessCoord = 13, - PatchVertices = 14, - FragCoord = 15, - PointCoord = 16, - FrontFacing = 17, - SampleId = 18, - SamplePosition = 19, - SampleMask = 20, - FragDepth = 22, - HelperInvocation = 23, - NumWorkgroups = 24, - WorkgroupSize = 25, - WorkgroupId = 26, - LocalInvocationId = 27, - GlobalInvocationId = 28, - LocalInvocationIndex = 29, - WorkDim = 30, - GlobalSize = 31, - EnqueuedWorkgroupSize = 32, - GlobalOffset = 33, - GlobalLinearId = 34, - SubgroupSize = 36, - SubgroupMaxSize = 37, - NumSubgroups = 38, - NumEnqueuedSubgroups = 39, - SubgroupId = 40, - SubgroupLocalInvocationId = 41, - VertexIndex = 42, - InstanceIndex = 43, - CoreIDARM = 4160, - CoreCountARM = 4161, - CoreMaxIDARM = 4162, - WarpIDARM = 4163, - WarpMaxIDARM = 4164, - SubgroupEqMask = 4416, - SubgroupEqMaskKHR = 4416, - SubgroupGeMask = 4417, - SubgroupGeMaskKHR = 4417, - SubgroupGtMask = 4418, - SubgroupGtMaskKHR = 4418, - SubgroupLeMask = 4419, - SubgroupLeMaskKHR = 4419, - SubgroupLtMask = 4420, - SubgroupLtMaskKHR = 4420, - BaseVertex = 4424, - BaseInstance = 4425, - DrawIndex = 4426, - PrimitiveShadingRateKHR = 4432, - DeviceIndex = 4438, - ViewIndex = 4440, - ShadingRateKHR = 4444, - BaryCoordNoPerspAMD = 4992, - BaryCoordNoPerspCentroidAMD = 4993, - BaryCoordNoPerspSampleAMD = 4994, - BaryCoordSmoothAMD = 4995, - BaryCoordSmoothCentroidAMD = 4996, - BaryCoordSmoothSampleAMD = 4997, - BaryCoordPullModelAMD = 4998, - FragStencilRefEXT = 5014, - ViewportMaskNV = 5253, - SecondaryPositionNV = 5257, - SecondaryViewportMaskNV = 5258, - PositionPerViewNV = 5261, - ViewportMaskPerViewNV = 5262, - FullyCoveredEXT = 5264, - TaskCountNV = 5274, - PrimitiveCountNV = 5275, - PrimitiveIndicesNV = 5276, - ClipDistancePerViewNV = 5277, - CullDistancePerViewNV = 5278, - LayerPerViewNV = 5279, - MeshViewCountNV = 5280, - MeshViewIndicesNV = 5281, - BaryCoordKHR = 5286, - BaryCoordNV = 5286, - BaryCoordNoPerspKHR = 5287, - BaryCoordNoPerspNV = 5287, - FragSizeEXT = 5292, - FragmentSizeNV = 5292, - FragInvocationCountEXT = 5293, - InvocationsPerPixelNV = 5293, - PrimitivePointIndicesEXT = 5294, - PrimitiveLineIndicesEXT = 5295, - PrimitiveTriangleIndicesEXT = 5296, - CullPrimitiveEXT = 5299, - LaunchIdKHR = 5319, - LaunchIdNV = 5319, - LaunchSizeKHR = 5320, - LaunchSizeNV = 5320, - WorldRayOriginKHR = 5321, - WorldRayOriginNV = 5321, - WorldRayDirectionKHR = 5322, - WorldRayDirectionNV = 5322, - ObjectRayOriginKHR = 5323, - ObjectRayOriginNV = 5323, - ObjectRayDirectionKHR = 5324, - ObjectRayDirectionNV = 5324, - RayTminKHR = 5325, - RayTminNV = 5325, - RayTmaxKHR = 5326, - RayTmaxNV = 5326, - InstanceCustomIndexKHR = 5327, - InstanceCustomIndexNV = 5327, - ObjectToWorldKHR = 5330, - ObjectToWorldNV = 5330, - WorldToObjectKHR = 5331, - WorldToObjectNV = 5331, - HitTNV = 5332, - HitKindKHR = 5333, - HitKindNV = 5333, - CurrentRayTimeNV = 5334, - HitTriangleVertexPositionsKHR = 5335, - IncomingRayFlagsKHR = 5351, - IncomingRayFlagsNV = 5351, - RayGeometryIndexKHR = 5352, - WarpsPerSMNV = 5374, - SMCountNV = 5375, - WarpIDNV = 5376, - SMIDNV = 5377, - CullMaskKHR = 6021, - Max = 0x7fffffff, -}; - -enum class SelectionControlShift : unsigned { - Flatten = 0, - DontFlatten = 1, - Max = 0x7fffffff, -}; - -enum class SelectionControlMask : unsigned { - MaskNone = 0, - Flatten = 0x00000001, - DontFlatten = 0x00000002, -}; - -enum class LoopControlShift : unsigned { - Unroll = 0, - DontUnroll = 1, - DependencyInfinite = 2, - DependencyLength = 3, - MinIterations = 4, - MaxIterations = 5, - IterationMultiple = 6, - PeelCount = 7, - PartialCount = 8, - InitiationIntervalINTEL = 16, - MaxConcurrencyINTEL = 17, - DependencyArrayINTEL = 18, - PipelineEnableINTEL = 19, - LoopCoalesceINTEL = 20, - MaxInterleavingINTEL = 21, - SpeculatedIterationsINTEL = 22, - NoFusionINTEL = 23, - LoopCountINTEL = 24, - MaxReinvocationDelayINTEL = 25, - Max = 0x7fffffff, -}; - -enum class LoopControlMask : unsigned { - MaskNone = 0, - Unroll = 0x00000001, - DontUnroll = 0x00000002, - DependencyInfinite = 0x00000004, - DependencyLength = 0x00000008, - MinIterations = 0x00000010, - MaxIterations = 0x00000020, - IterationMultiple = 0x00000040, - PeelCount = 0x00000080, - PartialCount = 0x00000100, - InitiationIntervalINTEL = 0x00010000, - MaxConcurrencyINTEL = 0x00020000, - DependencyArrayINTEL = 0x00040000, - PipelineEnableINTEL = 0x00080000, - LoopCoalesceINTEL = 0x00100000, - MaxInterleavingINTEL = 0x00200000, - SpeculatedIterationsINTEL = 0x00400000, - NoFusionINTEL = 0x00800000, - LoopCountINTEL = 0x01000000, - MaxReinvocationDelayINTEL = 0x02000000, -}; - -enum class FunctionControlShift : unsigned { - Inline = 0, - DontInline = 1, - Pure = 2, - Const = 3, - OptNoneINTEL = 16, - Max = 0x7fffffff, -}; - -enum class FunctionControlMask : unsigned { - MaskNone = 0, - Inline = 0x00000001, - DontInline = 0x00000002, - Pure = 0x00000004, - Const = 0x00000008, - OptNoneINTEL = 0x00010000, -}; - -enum class MemorySemanticsShift : unsigned { - Acquire = 1, - Release = 2, - AcquireRelease = 3, - SequentiallyConsistent = 4, - UniformMemory = 6, - SubgroupMemory = 7, - WorkgroupMemory = 8, - CrossWorkgroupMemory = 9, - AtomicCounterMemory = 10, - ImageMemory = 11, - OutputMemory = 12, - OutputMemoryKHR = 12, - MakeAvailable = 13, - MakeAvailableKHR = 13, - MakeVisible = 14, - MakeVisibleKHR = 14, - Volatile = 15, - Max = 0x7fffffff, -}; - -enum class MemorySemanticsMask : unsigned { - MaskNone = 0, - Acquire = 0x00000002, - Release = 0x00000004, - AcquireRelease = 0x00000008, - SequentiallyConsistent = 0x00000010, - UniformMemory = 0x00000040, - SubgroupMemory = 0x00000080, - WorkgroupMemory = 0x00000100, - CrossWorkgroupMemory = 0x00000200, - AtomicCounterMemory = 0x00000400, - ImageMemory = 0x00000800, - OutputMemory = 0x00001000, - OutputMemoryKHR = 0x00001000, - MakeAvailable = 0x00002000, - MakeAvailableKHR = 0x00002000, - MakeVisible = 0x00004000, - MakeVisibleKHR = 0x00004000, - Volatile = 0x00008000, -}; - -enum class MemoryAccessShift : unsigned { - Volatile = 0, - Aligned = 1, - Nontemporal = 2, - MakePointerAvailable = 3, - MakePointerAvailableKHR = 3, - MakePointerVisible = 4, - MakePointerVisibleKHR = 4, - NonPrivatePointer = 5, - NonPrivatePointerKHR = 5, - AliasScopeINTELMask = 16, - NoAliasINTELMask = 17, - Max = 0x7fffffff, -}; - -enum class MemoryAccessMask : unsigned { - MaskNone = 0, - Volatile = 0x00000001, - Aligned = 0x00000002, - Nontemporal = 0x00000004, - MakePointerAvailable = 0x00000008, - MakePointerAvailableKHR = 0x00000008, - MakePointerVisible = 0x00000010, - MakePointerVisibleKHR = 0x00000010, - NonPrivatePointer = 0x00000020, - NonPrivatePointerKHR = 0x00000020, - AliasScopeINTELMask = 0x00010000, - NoAliasINTELMask = 0x00020000, -}; - -enum class Scope : unsigned { - CrossDevice = 0, - Device = 1, - Workgroup = 2, - Subgroup = 3, - Invocation = 4, - QueueFamily = 5, - QueueFamilyKHR = 5, - ShaderCallKHR = 6, - Max = 0x7fffffff, -}; - -enum class GroupOperation : unsigned { - Reduce = 0, - InclusiveScan = 1, - ExclusiveScan = 2, - ClusteredReduce = 3, - PartitionedReduceNV = 6, - PartitionedInclusiveScanNV = 7, - PartitionedExclusiveScanNV = 8, - Max = 0x7fffffff, -}; - -enum class KernelEnqueueFlags : unsigned { - NoWait = 0, - WaitKernel = 1, - WaitWorkGroup = 2, - Max = 0x7fffffff, -}; - -enum class KernelProfilingInfoShift : unsigned { - CmdExecTime = 0, - Max = 0x7fffffff, -}; - -enum class KernelProfilingInfoMask : unsigned { - MaskNone = 0, - CmdExecTime = 0x00000001, -}; - -enum class Capability : unsigned { - Matrix = 0, - Shader = 1, - Geometry = 2, - Tessellation = 3, - Addresses = 4, - Linkage = 5, - Kernel = 6, - Vector16 = 7, - Float16Buffer = 8, - Float16 = 9, - Float64 = 10, - Int64 = 11, - Int64Atomics = 12, - ImageBasic = 13, - ImageReadWrite = 14, - ImageMipmap = 15, - Pipes = 17, - Groups = 18, - DeviceEnqueue = 19, - LiteralSampler = 20, - AtomicStorage = 21, - Int16 = 22, - TessellationPointSize = 23, - GeometryPointSize = 24, - ImageGatherExtended = 25, - StorageImageMultisample = 27, - UniformBufferArrayDynamicIndexing = 28, - SampledImageArrayDynamicIndexing = 29, - StorageBufferArrayDynamicIndexing = 30, - StorageImageArrayDynamicIndexing = 31, - ClipDistance = 32, - CullDistance = 33, - ImageCubeArray = 34, - SampleRateShading = 35, - ImageRect = 36, - SampledRect = 37, - GenericPointer = 38, - Int8 = 39, - InputAttachment = 40, - SparseResidency = 41, - MinLod = 42, - Sampled1D = 43, - Image1D = 44, - SampledCubeArray = 45, - SampledBuffer = 46, - ImageBuffer = 47, - ImageMSArray = 48, - StorageImageExtendedFormats = 49, - ImageQuery = 50, - DerivativeControl = 51, - InterpolationFunction = 52, - TransformFeedback = 53, - GeometryStreams = 54, - StorageImageReadWithoutFormat = 55, - StorageImageWriteWithoutFormat = 56, - MultiViewport = 57, - SubgroupDispatch = 58, - NamedBarrier = 59, - PipeStorage = 60, - GroupNonUniform = 61, - GroupNonUniformVote = 62, - GroupNonUniformArithmetic = 63, - GroupNonUniformBallot = 64, - GroupNonUniformShuffle = 65, - GroupNonUniformShuffleRelative = 66, - GroupNonUniformClustered = 67, - GroupNonUniformQuad = 68, - ShaderLayer = 69, - ShaderViewportIndex = 70, - UniformDecoration = 71, - CoreBuiltinsARM = 4165, - TileImageColorReadAccessEXT = 4166, - TileImageDepthReadAccessEXT = 4167, - TileImageStencilReadAccessEXT = 4168, - FragmentShadingRateKHR = 4422, - SubgroupBallotKHR = 4423, - DrawParameters = 4427, - WorkgroupMemoryExplicitLayoutKHR = 4428, - WorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, - WorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, - SubgroupVoteKHR = 4431, - StorageBuffer16BitAccess = 4433, - StorageUniformBufferBlock16 = 4433, - StorageUniform16 = 4434, - UniformAndStorageBuffer16BitAccess = 4434, - StoragePushConstant16 = 4435, - StorageInputOutput16 = 4436, - DeviceGroup = 4437, - MultiView = 4439, - VariablePointersStorageBuffer = 4441, - VariablePointers = 4442, - AtomicStorageOps = 4445, - SampleMaskPostDepthCoverage = 4447, - StorageBuffer8BitAccess = 4448, - UniformAndStorageBuffer8BitAccess = 4449, - StoragePushConstant8 = 4450, - DenormPreserve = 4464, - DenormFlushToZero = 4465, - SignedZeroInfNanPreserve = 4466, - RoundingModeRTE = 4467, - RoundingModeRTZ = 4468, - RayQueryProvisionalKHR = 4471, - RayQueryKHR = 4472, - RayTraversalPrimitiveCullingKHR = 4478, - RayTracingKHR = 4479, - TextureSampleWeightedQCOM = 4484, - TextureBoxFilterQCOM = 4485, - TextureBlockMatchQCOM = 4486, - Float16ImageAMD = 5008, - ImageGatherBiasLodAMD = 5009, - FragmentMaskAMD = 5010, - StencilExportEXT = 5013, - ImageReadWriteLodAMD = 5015, - Int64ImageEXT = 5016, - ShaderClockKHR = 5055, - SampleMaskOverrideCoverageNV = 5249, - GeometryShaderPassthroughNV = 5251, - ShaderViewportIndexLayerEXT = 5254, - ShaderViewportIndexLayerNV = 5254, - ShaderViewportMaskNV = 5255, - ShaderStereoViewNV = 5259, - PerViewAttributesNV = 5260, - FragmentFullyCoveredEXT = 5265, - MeshShadingNV = 5266, - ImageFootprintNV = 5282, - MeshShadingEXT = 5283, - FragmentBarycentricKHR = 5284, - FragmentBarycentricNV = 5284, - ComputeDerivativeGroupQuadsNV = 5288, - FragmentDensityEXT = 5291, - ShadingRateNV = 5291, - GroupNonUniformPartitionedNV = 5297, - ShaderNonUniform = 5301, - ShaderNonUniformEXT = 5301, - RuntimeDescriptorArray = 5302, - RuntimeDescriptorArrayEXT = 5302, - InputAttachmentArrayDynamicIndexing = 5303, - InputAttachmentArrayDynamicIndexingEXT = 5303, - UniformTexelBufferArrayDynamicIndexing = 5304, - UniformTexelBufferArrayDynamicIndexingEXT = 5304, - StorageTexelBufferArrayDynamicIndexing = 5305, - StorageTexelBufferArrayDynamicIndexingEXT = 5305, - UniformBufferArrayNonUniformIndexing = 5306, - UniformBufferArrayNonUniformIndexingEXT = 5306, - SampledImageArrayNonUniformIndexing = 5307, - SampledImageArrayNonUniformIndexingEXT = 5307, - StorageBufferArrayNonUniformIndexing = 5308, - StorageBufferArrayNonUniformIndexingEXT = 5308, - StorageImageArrayNonUniformIndexing = 5309, - StorageImageArrayNonUniformIndexingEXT = 5309, - InputAttachmentArrayNonUniformIndexing = 5310, - InputAttachmentArrayNonUniformIndexingEXT = 5310, - UniformTexelBufferArrayNonUniformIndexing = 5311, - UniformTexelBufferArrayNonUniformIndexingEXT = 5311, - StorageTexelBufferArrayNonUniformIndexing = 5312, - StorageTexelBufferArrayNonUniformIndexingEXT = 5312, - RayTracingPositionFetchKHR = 5336, - RayTracingNV = 5340, - RayTracingMotionBlurNV = 5341, - VulkanMemoryModel = 5345, - VulkanMemoryModelKHR = 5345, - VulkanMemoryModelDeviceScope = 5346, - VulkanMemoryModelDeviceScopeKHR = 5346, - PhysicalStorageBufferAddresses = 5347, - PhysicalStorageBufferAddressesEXT = 5347, - ComputeDerivativeGroupLinearNV = 5350, - RayTracingProvisionalKHR = 5353, - CooperativeMatrixNV = 5357, - FragmentShaderSampleInterlockEXT = 5363, - FragmentShaderShadingRateInterlockEXT = 5372, - ShaderSMBuiltinsNV = 5373, - FragmentShaderPixelInterlockEXT = 5378, - DemoteToHelperInvocation = 5379, - DemoteToHelperInvocationEXT = 5379, - RayTracingOpacityMicromapEXT = 5381, - ShaderInvocationReorderNV = 5383, - BindlessTextureNV = 5390, - RayQueryPositionFetchKHR = 5391, - SubgroupShuffleINTEL = 5568, - SubgroupBufferBlockIOINTEL = 5569, - SubgroupImageBlockIOINTEL = 5570, - SubgroupImageMediaBlockIOINTEL = 5579, - RoundToInfinityINTEL = 5582, - FloatingPointModeINTEL = 5583, - IntegerFunctions2INTEL = 5584, - FunctionPointersINTEL = 5603, - IndirectReferencesINTEL = 5604, - AsmINTEL = 5606, - AtomicFloat32MinMaxEXT = 5612, - AtomicFloat64MinMaxEXT = 5613, - AtomicFloat16MinMaxEXT = 5616, - VectorComputeINTEL = 5617, - VectorAnyINTEL = 5619, - ExpectAssumeKHR = 5629, - SubgroupAvcMotionEstimationINTEL = 5696, - SubgroupAvcMotionEstimationIntraINTEL = 5697, - SubgroupAvcMotionEstimationChromaINTEL = 5698, - VariableLengthArrayINTEL = 5817, - FunctionFloatControlINTEL = 5821, - FPGAMemoryAttributesINTEL = 5824, - FPFastMathModeINTEL = 5837, - ArbitraryPrecisionIntegersINTEL = 5844, - ArbitraryPrecisionFloatingPointINTEL = 5845, - UnstructuredLoopControlsINTEL = 5886, - FPGALoopControlsINTEL = 5888, - KernelAttributesINTEL = 5892, - FPGAKernelAttributesINTEL = 5897, - FPGAMemoryAccessesINTEL = 5898, - FPGAClusterAttributesINTEL = 5904, - LoopFuseINTEL = 5906, - FPGADSPControlINTEL = 5908, - MemoryAccessAliasingINTEL = 5910, - FPGAInvocationPipeliningAttributesINTEL = 5916, - FPGABufferLocationINTEL = 5920, - ArbitraryPrecisionFixedPointINTEL = 5922, - USMStorageClassesINTEL = 5935, - RuntimeAlignedAttributeINTEL = 5939, - IOPipesINTEL = 5943, - BlockingPipesINTEL = 5945, - FPGARegINTEL = 5948, - DotProductInputAll = 6016, - DotProductInputAllKHR = 6016, - DotProductInput4x8Bit = 6017, - DotProductInput4x8BitKHR = 6017, - DotProductInput4x8BitPacked = 6018, - DotProductInput4x8BitPackedKHR = 6018, - DotProduct = 6019, - DotProductKHR = 6019, - RayCullMaskKHR = 6020, - BitInstructions = 6025, - GroupNonUniformRotateKHR = 6026, - AtomicFloat32AddEXT = 6033, - AtomicFloat64AddEXT = 6034, - LongConstantCompositeINTEL = 6089, - OptNoneINTEL = 6094, - AtomicFloat16AddEXT = 6095, - DebugInfoModuleINTEL = 6114, - BFloat16ConversionINTEL = 6115, - SplitBarrierINTEL = 6141, - FPGAKernelAttributesv2INTEL = 6161, - FPGALatencyControlINTEL = 6171, - FPGAArgumentInterfacesINTEL = 6174, - GroupUniformArithmeticKHR = 6400, - Max = 0x7fffffff, -}; - -enum class RayFlagsShift : unsigned { - OpaqueKHR = 0, - NoOpaqueKHR = 1, - TerminateOnFirstHitKHR = 2, - SkipClosestHitShaderKHR = 3, - CullBackFacingTrianglesKHR = 4, - CullFrontFacingTrianglesKHR = 5, - CullOpaqueKHR = 6, - CullNoOpaqueKHR = 7, - SkipTrianglesKHR = 8, - SkipAABBsKHR = 9, - ForceOpacityMicromap2StateEXT = 10, - Max = 0x7fffffff, -}; - -enum class RayFlagsMask : unsigned { - MaskNone = 0, - OpaqueKHR = 0x00000001, - NoOpaqueKHR = 0x00000002, - TerminateOnFirstHitKHR = 0x00000004, - SkipClosestHitShaderKHR = 0x00000008, - CullBackFacingTrianglesKHR = 0x00000010, - CullFrontFacingTrianglesKHR = 0x00000020, - CullOpaqueKHR = 0x00000040, - CullNoOpaqueKHR = 0x00000080, - SkipTrianglesKHR = 0x00000100, - SkipAABBsKHR = 0x00000200, - ForceOpacityMicromap2StateEXT = 0x00000400, -}; - -enum class RayQueryIntersection : unsigned { - RayQueryCandidateIntersectionKHR = 0, - RayQueryCommittedIntersectionKHR = 1, - Max = 0x7fffffff, -}; - -enum class RayQueryCommittedIntersectionType : unsigned { - RayQueryCommittedIntersectionNoneKHR = 0, - RayQueryCommittedIntersectionTriangleKHR = 1, - RayQueryCommittedIntersectionGeneratedKHR = 2, - Max = 0x7fffffff, -}; - -enum class RayQueryCandidateIntersectionType : unsigned { - RayQueryCandidateIntersectionTriangleKHR = 0, - RayQueryCandidateIntersectionAABBKHR = 1, - Max = 0x7fffffff, -}; - -enum class FragmentShadingRateShift : unsigned { - Vertical2Pixels = 0, - Vertical4Pixels = 1, - Horizontal2Pixels = 2, - Horizontal4Pixels = 3, - Max = 0x7fffffff, -}; - -enum class FragmentShadingRateMask : unsigned { - MaskNone = 0, - Vertical2Pixels = 0x00000001, - Vertical4Pixels = 0x00000002, - Horizontal2Pixels = 0x00000004, - Horizontal4Pixels = 0x00000008, -}; - -enum class FPDenormMode : unsigned { - Preserve = 0, - FlushToZero = 1, - Max = 0x7fffffff, -}; - -enum class FPOperationMode : unsigned { - IEEE = 0, - ALT = 1, - Max = 0x7fffffff, -}; - -enum class QuantizationModes : unsigned { - TRN = 0, - TRN_ZERO = 1, - RND = 2, - RND_ZERO = 3, - RND_INF = 4, - RND_MIN_INF = 5, - RND_CONV = 6, - RND_CONV_ODD = 7, - Max = 0x7fffffff, -}; - -enum class OverflowModes : unsigned { - WRAP = 0, - SAT = 1, - SAT_ZERO = 2, - SAT_SYM = 3, - Max = 0x7fffffff, -}; - -enum class PackedVectorFormat : unsigned { - PackedVectorFormat4x8Bit = 0, - PackedVectorFormat4x8BitKHR = 0, - Max = 0x7fffffff, -}; - -enum class Op : unsigned { - OpNop = 0, - OpUndef = 1, - OpSourceContinued = 2, - OpSource = 3, - OpSourceExtension = 4, - OpName = 5, - OpMemberName = 6, - OpString = 7, - OpLine = 8, - OpExtension = 10, - OpExtInstImport = 11, - OpExtInst = 12, - OpMemoryModel = 14, - OpEntryPoint = 15, - OpExecutionMode = 16, - OpCapability = 17, - OpTypeVoid = 19, - OpTypeBool = 20, - OpTypeInt = 21, - OpTypeFloat = 22, - OpTypeVector = 23, - OpTypeMatrix = 24, - OpTypeImage = 25, - OpTypeSampler = 26, - OpTypeSampledImage = 27, - OpTypeArray = 28, - OpTypeRuntimeArray = 29, - OpTypeStruct = 30, - OpTypeOpaque = 31, - OpTypePointer = 32, - OpTypeFunction = 33, - OpTypeEvent = 34, - OpTypeDeviceEvent = 35, - OpTypeReserveId = 36, - OpTypeQueue = 37, - OpTypePipe = 38, - OpTypeForwardPointer = 39, - OpConstantTrue = 41, - OpConstantFalse = 42, - OpConstant = 43, - OpConstantComposite = 44, - OpConstantSampler = 45, - OpConstantNull = 46, - OpSpecConstantTrue = 48, - OpSpecConstantFalse = 49, - OpSpecConstant = 50, - OpSpecConstantComposite = 51, - OpSpecConstantOp = 52, - OpFunction = 54, - OpFunctionParameter = 55, - OpFunctionEnd = 56, - OpFunctionCall = 57, - OpVariable = 59, - OpImageTexelPointer = 60, - OpLoad = 61, - OpStore = 62, - OpCopyMemory = 63, - OpCopyMemorySized = 64, - OpAccessChain = 65, - OpInBoundsAccessChain = 66, - OpPtrAccessChain = 67, - OpArrayLength = 68, - OpGenericPtrMemSemantics = 69, - OpInBoundsPtrAccessChain = 70, - OpDecorate = 71, - OpMemberDecorate = 72, - OpDecorationGroup = 73, - OpGroupDecorate = 74, - OpGroupMemberDecorate = 75, - OpVectorExtractDynamic = 77, - OpVectorInsertDynamic = 78, - OpVectorShuffle = 79, - OpCompositeConstruct = 80, - OpCompositeExtract = 81, - OpCompositeInsert = 82, - OpCopyObject = 83, - OpTranspose = 84, - OpSampledImage = 86, - OpImageSampleImplicitLod = 87, - OpImageSampleExplicitLod = 88, - OpImageSampleDrefImplicitLod = 89, - OpImageSampleDrefExplicitLod = 90, - OpImageSampleProjImplicitLod = 91, - OpImageSampleProjExplicitLod = 92, - OpImageSampleProjDrefImplicitLod = 93, - OpImageSampleProjDrefExplicitLod = 94, - OpImageFetch = 95, - OpImageGather = 96, - OpImageDrefGather = 97, - OpImageRead = 98, - OpImageWrite = 99, - OpImage = 100, - OpImageQueryFormat = 101, - OpImageQueryOrder = 102, - OpImageQuerySizeLod = 103, - OpImageQuerySize = 104, - OpImageQueryLod = 105, - OpImageQueryLevels = 106, - OpImageQuerySamples = 107, - OpConvertFToU = 109, - OpConvertFToS = 110, - OpConvertSToF = 111, - OpConvertUToF = 112, - OpUConvert = 113, - OpSConvert = 114, - OpFConvert = 115, - OpQuantizeToF16 = 116, - OpConvertPtrToU = 117, - OpSatConvertSToU = 118, - OpSatConvertUToS = 119, - OpConvertUToPtr = 120, - OpPtrCastToGeneric = 121, - OpGenericCastToPtr = 122, - OpGenericCastToPtrExplicit = 123, - OpBitcast = 124, - OpSNegate = 126, - OpFNegate = 127, - OpIAdd = 128, - OpFAdd = 129, - OpISub = 130, - OpFSub = 131, - OpIMul = 132, - OpFMul = 133, - OpUDiv = 134, - OpSDiv = 135, - OpFDiv = 136, - OpUMod = 137, - OpSRem = 138, - OpSMod = 139, - OpFRem = 140, - OpFMod = 141, - OpVectorTimesScalar = 142, - OpMatrixTimesScalar = 143, - OpVectorTimesMatrix = 144, - OpMatrixTimesVector = 145, - OpMatrixTimesMatrix = 146, - OpOuterProduct = 147, - OpDot = 148, - OpIAddCarry = 149, - OpISubBorrow = 150, - OpUMulExtended = 151, - OpSMulExtended = 152, - OpAny = 154, - OpAll = 155, - OpIsNan = 156, - OpIsInf = 157, - OpIsFinite = 158, - OpIsNormal = 159, - OpSignBitSet = 160, - OpLessOrGreater = 161, - OpOrdered = 162, - OpUnordered = 163, - OpLogicalEqual = 164, - OpLogicalNotEqual = 165, - OpLogicalOr = 166, - OpLogicalAnd = 167, - OpLogicalNot = 168, - OpSelect = 169, - OpIEqual = 170, - OpINotEqual = 171, - OpUGreaterThan = 172, - OpSGreaterThan = 173, - OpUGreaterThanEqual = 174, - OpSGreaterThanEqual = 175, - OpULessThan = 176, - OpSLessThan = 177, - OpULessThanEqual = 178, - OpSLessThanEqual = 179, - OpFOrdEqual = 180, - OpFUnordEqual = 181, - OpFOrdNotEqual = 182, - OpFUnordNotEqual = 183, - OpFOrdLessThan = 184, - OpFUnordLessThan = 185, - OpFOrdGreaterThan = 186, - OpFUnordGreaterThan = 187, - OpFOrdLessThanEqual = 188, - OpFUnordLessThanEqual = 189, - OpFOrdGreaterThanEqual = 190, - OpFUnordGreaterThanEqual = 191, - OpShiftRightLogical = 194, - OpShiftRightArithmetic = 195, - OpShiftLeftLogical = 196, - OpBitwiseOr = 197, - OpBitwiseXor = 198, - OpBitwiseAnd = 199, - OpNot = 200, - OpBitFieldInsert = 201, - OpBitFieldSExtract = 202, - OpBitFieldUExtract = 203, - OpBitReverse = 204, - OpBitCount = 205, - OpDPdx = 207, - OpDPdy = 208, - OpFwidth = 209, - OpDPdxFine = 210, - OpDPdyFine = 211, - OpFwidthFine = 212, - OpDPdxCoarse = 213, - OpDPdyCoarse = 214, - OpFwidthCoarse = 215, - OpEmitVertex = 218, - OpEndPrimitive = 219, - OpEmitStreamVertex = 220, - OpEndStreamPrimitive = 221, - OpControlBarrier = 224, - OpMemoryBarrier = 225, - OpAtomicLoad = 227, - OpAtomicStore = 228, - OpAtomicExchange = 229, - OpAtomicCompareExchange = 230, - OpAtomicCompareExchangeWeak = 231, - OpAtomicIIncrement = 232, - OpAtomicIDecrement = 233, - OpAtomicIAdd = 234, - OpAtomicISub = 235, - OpAtomicSMin = 236, - OpAtomicUMin = 237, - OpAtomicSMax = 238, - OpAtomicUMax = 239, - OpAtomicAnd = 240, - OpAtomicOr = 241, - OpAtomicXor = 242, - OpPhi = 245, - OpLoopMerge = 246, - OpSelectionMerge = 247, - OpLabel = 248, - OpBranch = 249, - OpBranchConditional = 250, - OpSwitch = 251, - OpKill = 252, - OpReturn = 253, - OpReturnValue = 254, - OpUnreachable = 255, - OpLifetimeStart = 256, - OpLifetimeStop = 257, - OpGroupAsyncCopy = 259, - OpGroupWaitEvents = 260, - OpGroupAll = 261, - OpGroupAny = 262, - OpGroupBroadcast = 263, - OpGroupIAdd = 264, - OpGroupFAdd = 265, - OpGroupFMin = 266, - OpGroupUMin = 267, - OpGroupSMin = 268, - OpGroupFMax = 269, - OpGroupUMax = 270, - OpGroupSMax = 271, - OpReadPipe = 274, - OpWritePipe = 275, - OpReservedReadPipe = 276, - OpReservedWritePipe = 277, - OpReserveReadPipePackets = 278, - OpReserveWritePipePackets = 279, - OpCommitReadPipe = 280, - OpCommitWritePipe = 281, - OpIsValidReserveId = 282, - OpGetNumPipePackets = 283, - OpGetMaxPipePackets = 284, - OpGroupReserveReadPipePackets = 285, - OpGroupReserveWritePipePackets = 286, - OpGroupCommitReadPipe = 287, - OpGroupCommitWritePipe = 288, - OpEnqueueMarker = 291, - OpEnqueueKernel = 292, - OpGetKernelNDrangeSubGroupCount = 293, - OpGetKernelNDrangeMaxSubGroupSize = 294, - OpGetKernelWorkGroupSize = 295, - OpGetKernelPreferredWorkGroupSizeMultiple = 296, - OpRetainEvent = 297, - OpReleaseEvent = 298, - OpCreateUserEvent = 299, - OpIsValidEvent = 300, - OpSetUserEventStatus = 301, - OpCaptureEventProfilingInfo = 302, - OpGetDefaultQueue = 303, - OpBuildNDRange = 304, - OpImageSparseSampleImplicitLod = 305, - OpImageSparseSampleExplicitLod = 306, - OpImageSparseSampleDrefImplicitLod = 307, - OpImageSparseSampleDrefExplicitLod = 308, - OpImageSparseSampleProjImplicitLod = 309, - OpImageSparseSampleProjExplicitLod = 310, - OpImageSparseSampleProjDrefImplicitLod = 311, - OpImageSparseSampleProjDrefExplicitLod = 312, - OpImageSparseFetch = 313, - OpImageSparseGather = 314, - OpImageSparseDrefGather = 315, - OpImageSparseTexelsResident = 316, - OpNoLine = 317, - OpAtomicFlagTestAndSet = 318, - OpAtomicFlagClear = 319, - OpImageSparseRead = 320, - OpSizeOf = 321, - OpTypePipeStorage = 322, - OpConstantPipeStorage = 323, - OpCreatePipeFromPipeStorage = 324, - OpGetKernelLocalSizeForSubgroupCount = 325, - OpGetKernelMaxNumSubgroups = 326, - OpTypeNamedBarrier = 327, - OpNamedBarrierInitialize = 328, - OpMemoryNamedBarrier = 329, - OpModuleProcessed = 330, - OpExecutionModeId = 331, - OpDecorateId = 332, - OpGroupNonUniformElect = 333, - OpGroupNonUniformAll = 334, - OpGroupNonUniformAny = 335, - OpGroupNonUniformAllEqual = 336, - OpGroupNonUniformBroadcast = 337, - OpGroupNonUniformBroadcastFirst = 338, - OpGroupNonUniformBallot = 339, - OpGroupNonUniformInverseBallot = 340, - OpGroupNonUniformBallotBitExtract = 341, - OpGroupNonUniformBallotBitCount = 342, - OpGroupNonUniformBallotFindLSB = 343, - OpGroupNonUniformBallotFindMSB = 344, - OpGroupNonUniformShuffle = 345, - OpGroupNonUniformShuffleXor = 346, - OpGroupNonUniformShuffleUp = 347, - OpGroupNonUniformShuffleDown = 348, - OpGroupNonUniformIAdd = 349, - OpGroupNonUniformFAdd = 350, - OpGroupNonUniformIMul = 351, - OpGroupNonUniformFMul = 352, - OpGroupNonUniformSMin = 353, - OpGroupNonUniformUMin = 354, - OpGroupNonUniformFMin = 355, - OpGroupNonUniformSMax = 356, - OpGroupNonUniformUMax = 357, - OpGroupNonUniformFMax = 358, - OpGroupNonUniformBitwiseAnd = 359, - OpGroupNonUniformBitwiseOr = 360, - OpGroupNonUniformBitwiseXor = 361, - OpGroupNonUniformLogicalAnd = 362, - OpGroupNonUniformLogicalOr = 363, - OpGroupNonUniformLogicalXor = 364, - OpGroupNonUniformQuadBroadcast = 365, - OpGroupNonUniformQuadSwap = 366, - OpCopyLogical = 400, - OpPtrEqual = 401, - OpPtrNotEqual = 402, - OpPtrDiff = 403, - OpColorAttachmentReadEXT = 4160, - OpDepthAttachmentReadEXT = 4161, - OpStencilAttachmentReadEXT = 4162, - OpTerminateInvocation = 4416, - OpSubgroupBallotKHR = 4421, - OpSubgroupFirstInvocationKHR = 4422, - OpSubgroupAllKHR = 4428, - OpSubgroupAnyKHR = 4429, - OpSubgroupAllEqualKHR = 4430, - OpGroupNonUniformRotateKHR = 4431, - OpSubgroupReadInvocationKHR = 4432, - OpTraceRayKHR = 4445, - OpExecuteCallableKHR = 4446, - OpConvertUToAccelerationStructureKHR = 4447, - OpIgnoreIntersectionKHR = 4448, - OpTerminateRayKHR = 4449, - OpSDot = 4450, - OpSDotKHR = 4450, - OpUDot = 4451, - OpUDotKHR = 4451, - OpSUDot = 4452, - OpSUDotKHR = 4452, - OpSDotAccSat = 4453, - OpSDotAccSatKHR = 4453, - OpUDotAccSat = 4454, - OpUDotAccSatKHR = 4454, - OpSUDotAccSat = 4455, - OpSUDotAccSatKHR = 4455, - OpTypeRayQueryKHR = 4472, - OpRayQueryInitializeKHR = 4473, - OpRayQueryTerminateKHR = 4474, - OpRayQueryGenerateIntersectionKHR = 4475, - OpRayQueryConfirmIntersectionKHR = 4476, - OpRayQueryProceedKHR = 4477, - OpRayQueryGetIntersectionTypeKHR = 4479, - OpImageSampleWeightedQCOM = 4480, - OpImageBoxFilterQCOM = 4481, - OpImageBlockMatchSSDQCOM = 4482, - OpImageBlockMatchSADQCOM = 4483, - OpGroupIAddNonUniformAMD = 5000, - OpGroupFAddNonUniformAMD = 5001, - OpGroupFMinNonUniformAMD = 5002, - OpGroupUMinNonUniformAMD = 5003, - OpGroupSMinNonUniformAMD = 5004, - OpGroupFMaxNonUniformAMD = 5005, - OpGroupUMaxNonUniformAMD = 5006, - OpGroupSMaxNonUniformAMD = 5007, - OpFragmentMaskFetchAMD = 5011, - OpFragmentFetchAMD = 5012, - OpReadClockKHR = 5056, - OpHitObjectRecordHitMotionNV = 5249, - OpHitObjectRecordHitWithIndexMotionNV = 5250, - OpHitObjectRecordMissMotionNV = 5251, - OpHitObjectGetWorldToObjectNV = 5252, - OpHitObjectGetObjectToWorldNV = 5253, - OpHitObjectGetObjectRayDirectionNV = 5254, - OpHitObjectGetObjectRayOriginNV = 5255, - OpHitObjectTraceRayMotionNV = 5256, - OpHitObjectGetShaderRecordBufferHandleNV = 5257, - OpHitObjectGetShaderBindingTableRecordIndexNV = 5258, - OpHitObjectRecordEmptyNV = 5259, - OpHitObjectTraceRayNV = 5260, - OpHitObjectRecordHitNV = 5261, - OpHitObjectRecordHitWithIndexNV = 5262, - OpHitObjectRecordMissNV = 5263, - OpHitObjectExecuteShaderNV = 5264, - OpHitObjectGetCurrentTimeNV = 5265, - OpHitObjectGetAttributesNV = 5266, - OpHitObjectGetHitKindNV = 5267, - OpHitObjectGetPrimitiveIndexNV = 5268, - OpHitObjectGetGeometryIndexNV = 5269, - OpHitObjectGetInstanceIdNV = 5270, - OpHitObjectGetInstanceCustomIndexNV = 5271, - OpHitObjectGetWorldRayDirectionNV = 5272, - OpHitObjectGetWorldRayOriginNV = 5273, - OpHitObjectGetRayTMaxNV = 5274, - OpHitObjectGetRayTMinNV = 5275, - OpHitObjectIsEmptyNV = 5276, - OpHitObjectIsHitNV = 5277, - OpHitObjectIsMissNV = 5278, - OpReorderThreadWithHitObjectNV = 5279, - OpReorderThreadWithHintNV = 5280, - OpTypeHitObjectNV = 5281, - OpImageSampleFootprintNV = 5283, - OpEmitMeshTasksEXT = 5294, - OpSetMeshOutputsEXT = 5295, - OpGroupNonUniformPartitionNV = 5296, - OpWritePackedPrimitiveIndices4x8NV = 5299, - OpReportIntersectionKHR = 5334, - OpReportIntersectionNV = 5334, - OpIgnoreIntersectionNV = 5335, - OpTerminateRayNV = 5336, - OpTraceNV = 5337, - OpTraceMotionNV = 5338, - OpTraceRayMotionNV = 5339, - OpRayQueryGetIntersectionTriangleVertexPositionsKHR = 5340, - OpTypeAccelerationStructureKHR = 5341, - OpTypeAccelerationStructureNV = 5341, - OpExecuteCallableNV = 5344, - OpTypeCooperativeMatrixNV = 5358, - OpCooperativeMatrixLoadNV = 5359, - OpCooperativeMatrixStoreNV = 5360, - OpCooperativeMatrixMulAddNV = 5361, - OpCooperativeMatrixLengthNV = 5362, - OpBeginInvocationInterlockEXT = 5364, - OpEndInvocationInterlockEXT = 5365, - OpDemoteToHelperInvocation = 5380, - OpDemoteToHelperInvocationEXT = 5380, - OpIsHelperInvocationEXT = 5381, - OpConvertUToImageNV = 5391, - OpConvertUToSamplerNV = 5392, - OpConvertImageToUNV = 5393, - OpConvertSamplerToUNV = 5394, - OpConvertUToSampledImageNV = 5395, - OpConvertSampledImageToUNV = 5396, - OpSamplerImageAddressingModeNV = 5397, - OpSubgroupShuffleINTEL = 5571, - OpSubgroupShuffleDownINTEL = 5572, - OpSubgroupShuffleUpINTEL = 5573, - OpSubgroupShuffleXorINTEL = 5574, - OpSubgroupBlockReadINTEL = 5575, - OpSubgroupBlockWriteINTEL = 5576, - OpSubgroupImageBlockReadINTEL = 5577, - OpSubgroupImageBlockWriteINTEL = 5578, - OpSubgroupImageMediaBlockReadINTEL = 5580, - OpSubgroupImageMediaBlockWriteINTEL = 5581, - OpUCountLeadingZerosINTEL = 5585, - OpUCountTrailingZerosINTEL = 5586, - OpAbsISubINTEL = 5587, - OpAbsUSubINTEL = 5588, - OpIAddSatINTEL = 5589, - OpUAddSatINTEL = 5590, - OpIAverageINTEL = 5591, - OpUAverageINTEL = 5592, - OpIAverageRoundedINTEL = 5593, - OpUAverageRoundedINTEL = 5594, - OpISubSatINTEL = 5595, - OpUSubSatINTEL = 5596, - OpIMul32x16INTEL = 5597, - OpUMul32x16INTEL = 5598, - OpConstantFunctionPointerINTEL = 5600, - OpFunctionPointerCallINTEL = 5601, - OpAsmTargetINTEL = 5609, - OpAsmINTEL = 5610, - OpAsmCallINTEL = 5611, - OpAtomicFMinEXT = 5614, - OpAtomicFMaxEXT = 5615, - OpAssumeTrueKHR = 5630, - OpExpectKHR = 5631, - OpDecorateString = 5632, - OpDecorateStringGOOGLE = 5632, - OpMemberDecorateString = 5633, - OpMemberDecorateStringGOOGLE = 5633, - OpVmeImageINTEL = 5699, - OpTypeVmeImageINTEL = 5700, - OpTypeAvcImePayloadINTEL = 5701, - OpTypeAvcRefPayloadINTEL = 5702, - OpTypeAvcSicPayloadINTEL = 5703, - OpTypeAvcMcePayloadINTEL = 5704, - OpTypeAvcMceResultINTEL = 5705, - OpTypeAvcImeResultINTEL = 5706, - OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, - OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, - OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, - OpTypeAvcImeDualReferenceStreaminINTEL = 5710, - OpTypeAvcRefResultINTEL = 5711, - OpTypeAvcSicResultINTEL = 5712, - OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, - OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, - OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, - OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, - OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, - OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, - OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, - OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, - OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, - OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, - OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, - OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, - OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, - OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, - OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, - OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, - OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, - OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, - OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, - OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, - OpSubgroupAvcMceConvertToImeResultINTEL = 5733, - OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, - OpSubgroupAvcMceConvertToRefResultINTEL = 5735, - OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, - OpSubgroupAvcMceConvertToSicResultINTEL = 5737, - OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, - OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, - OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, - OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, - OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, - OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, - OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, - OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, - OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, - OpSubgroupAvcImeInitializeINTEL = 5747, - OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, - OpSubgroupAvcImeSetDualReferenceINTEL = 5749, - OpSubgroupAvcImeRefWindowSizeINTEL = 5750, - OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, - OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, - OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, - OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, - OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, - OpSubgroupAvcImeSetWeightedSadINTEL = 5756, - OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, - OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, - OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, - OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, - OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, - OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, - OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, - OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, - OpSubgroupAvcImeConvertToMceResultINTEL = 5765, - OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, - OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, - OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, - OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, - OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = - 5770, - OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, - OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, - OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, - OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, - OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, - OpSubgroupAvcImeGetBorderReachedINTEL = 5776, - OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, - OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, - OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, - OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, - OpSubgroupAvcFmeInitializeINTEL = 5781, - OpSubgroupAvcBmeInitializeINTEL = 5782, - OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, - OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, - OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, - OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, - OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, - OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, - OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, - OpSubgroupAvcRefConvertToMceResultINTEL = 5790, - OpSubgroupAvcSicInitializeINTEL = 5791, - OpSubgroupAvcSicConfigureSkcINTEL = 5792, - OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, - OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, - OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, - OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, - OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, - OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, - OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, - OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, - OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, - OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, - OpSubgroupAvcSicEvaluateIpeINTEL = 5803, - OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, - OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, - OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, - OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, - OpSubgroupAvcSicConvertToMceResultINTEL = 5808, - OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, - OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, - OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, - OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, - OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, - OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, - OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, - OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, - OpVariableLengthArrayINTEL = 5818, - OpSaveMemoryINTEL = 5819, - OpRestoreMemoryINTEL = 5820, - OpArbitraryFloatSinCosPiINTEL = 5840, - OpArbitraryFloatCastINTEL = 5841, - OpArbitraryFloatCastFromIntINTEL = 5842, - OpArbitraryFloatCastToIntINTEL = 5843, - OpArbitraryFloatAddINTEL = 5846, - OpArbitraryFloatSubINTEL = 5847, - OpArbitraryFloatMulINTEL = 5848, - OpArbitraryFloatDivINTEL = 5849, - OpArbitraryFloatGTINTEL = 5850, - OpArbitraryFloatGEINTEL = 5851, - OpArbitraryFloatLTINTEL = 5852, - OpArbitraryFloatLEINTEL = 5853, - OpArbitraryFloatEQINTEL = 5854, - OpArbitraryFloatRecipINTEL = 5855, - OpArbitraryFloatRSqrtINTEL = 5856, - OpArbitraryFloatCbrtINTEL = 5857, - OpArbitraryFloatHypotINTEL = 5858, - OpArbitraryFloatSqrtINTEL = 5859, - OpArbitraryFloatLogINTEL = 5860, - OpArbitraryFloatLog2INTEL = 5861, - OpArbitraryFloatLog10INTEL = 5862, - OpArbitraryFloatLog1pINTEL = 5863, - OpArbitraryFloatExpINTEL = 5864, - OpArbitraryFloatExp2INTEL = 5865, - OpArbitraryFloatExp10INTEL = 5866, - OpArbitraryFloatExpm1INTEL = 5867, - OpArbitraryFloatSinINTEL = 5868, - OpArbitraryFloatCosINTEL = 5869, - OpArbitraryFloatSinCosINTEL = 5870, - OpArbitraryFloatSinPiINTEL = 5871, - OpArbitraryFloatCosPiINTEL = 5872, - OpArbitraryFloatASinINTEL = 5873, - OpArbitraryFloatASinPiINTEL = 5874, - OpArbitraryFloatACosINTEL = 5875, - OpArbitraryFloatACosPiINTEL = 5876, - OpArbitraryFloatATanINTEL = 5877, - OpArbitraryFloatATanPiINTEL = 5878, - OpArbitraryFloatATan2INTEL = 5879, - OpArbitraryFloatPowINTEL = 5880, - OpArbitraryFloatPowRINTEL = 5881, - OpArbitraryFloatPowNINTEL = 5882, - OpLoopControlINTEL = 5887, - OpAliasDomainDeclINTEL = 5911, - OpAliasScopeDeclINTEL = 5912, - OpAliasScopeListDeclINTEL = 5913, - OpFixedSqrtINTEL = 5923, - OpFixedRecipINTEL = 5924, - OpFixedRsqrtINTEL = 5925, - OpFixedSinINTEL = 5926, - OpFixedCosINTEL = 5927, - OpFixedSinCosINTEL = 5928, - OpFixedSinPiINTEL = 5929, - OpFixedCosPiINTEL = 5930, - OpFixedSinCosPiINTEL = 5931, - OpFixedLogINTEL = 5932, - OpFixedExpINTEL = 5933, - OpPtrCastToCrossWorkgroupINTEL = 5934, - OpCrossWorkgroupCastToPtrINTEL = 5938, - OpReadPipeBlockingINTEL = 5946, - OpWritePipeBlockingINTEL = 5947, - OpFPGARegINTEL = 5949, - OpRayQueryGetRayTMinKHR = 6016, - OpRayQueryGetRayFlagsKHR = 6017, - OpRayQueryGetIntersectionTKHR = 6018, - OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, - OpRayQueryGetIntersectionInstanceIdKHR = 6020, - OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, - OpRayQueryGetIntersectionGeometryIndexKHR = 6022, - OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, - OpRayQueryGetIntersectionBarycentricsKHR = 6024, - OpRayQueryGetIntersectionFrontFaceKHR = 6025, - OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, - OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, - OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, - OpRayQueryGetWorldRayDirectionKHR = 6029, - OpRayQueryGetWorldRayOriginKHR = 6030, - OpRayQueryGetIntersectionObjectToWorldKHR = 6031, - OpRayQueryGetIntersectionWorldToObjectKHR = 6032, - OpAtomicFAddEXT = 6035, - OpTypeBufferSurfaceINTEL = 6086, - OpTypeStructContinuedINTEL = 6090, - OpConstantCompositeContinuedINTEL = 6091, - OpSpecConstantCompositeContinuedINTEL = 6092, - OpConvertFToBF16INTEL = 6116, - OpConvertBF16ToFINTEL = 6117, - OpControlBarrierArriveINTEL = 6142, - OpControlBarrierWaitINTEL = 6143, - OpGroupIMulKHR = 6401, - OpGroupFMulKHR = 6402, - OpGroupBitwiseAndKHR = 6403, - OpGroupBitwiseOrKHR = 6404, - OpGroupBitwiseXorKHR = 6405, - OpGroupLogicalAndKHR = 6406, - OpGroupLogicalOrKHR = 6407, - OpGroupLogicalXorKHR = 6408, - Max = 0x7fffffff, -}; - -#ifdef SPV_ENABLE_UTILITY_CODE -#ifndef __cplusplus -#include -#endif -inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { - *hasResult = *hasResultType = false; - switch (opcode) { - default: /* unknown opcode */ - break; - case Op::OpNop: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpUndef: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSourceContinued: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSource: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSourceExtension: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpName: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpMemberName: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpString: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpLine: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpExtension: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpExtInstImport: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpExtInst: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpMemoryModel: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpEntryPoint: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpExecutionMode: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpCapability: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTypeVoid: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeBool: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeInt: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeFloat: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeVector: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeMatrix: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeImage: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeSampler: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeSampledImage: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeArray: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeRuntimeArray: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeStruct: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeOpaque: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypePointer: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeFunction: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeEvent: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeDeviceEvent: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeReserveId: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeQueue: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypePipe: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeForwardPointer: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpConstantTrue: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConstantFalse: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConstant: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConstantComposite: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConstantSampler: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConstantNull: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSpecConstantTrue: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSpecConstantFalse: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSpecConstant: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSpecConstantComposite: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSpecConstantOp: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFunction: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFunctionParameter: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFunctionEnd: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpFunctionCall: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpVariable: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageTexelPointer: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLoad: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpStore: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpCopyMemory: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpCopyMemorySized: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpAccessChain: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpInBoundsAccessChain: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpPtrAccessChain: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArrayLength: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGenericPtrMemSemantics: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpInBoundsPtrAccessChain: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDecorate: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpMemberDecorate: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpDecorationGroup: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpGroupDecorate: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGroupMemberDecorate: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpVectorExtractDynamic: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpVectorInsertDynamic: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpVectorShuffle: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCompositeConstruct: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCompositeExtract: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCompositeInsert: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCopyObject: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTranspose: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSampledImage: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleDrefImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleDrefExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleProjImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleProjExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleProjDrefImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleProjDrefExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageFetch: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageGather: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageDrefGather: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageRead: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageWrite: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpImage: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageQueryFormat: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageQueryOrder: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageQuerySizeLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageQuerySize: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageQueryLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageQueryLevels: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageQuerySamples: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertFToU: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertFToS: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertSToF: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertUToF: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUConvert: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSConvert: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFConvert: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpQuantizeToF16: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertPtrToU: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSatConvertSToU: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSatConvertUToS: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertUToPtr: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpPtrCastToGeneric: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGenericCastToPtr: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGenericCastToPtrExplicit: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitcast: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSNegate: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFNegate: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIAdd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFAdd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpISub: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFSub: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIMul: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFMul: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUDiv: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSDiv: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFDiv: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUMod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSRem: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSMod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFRem: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFMod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpVectorTimesScalar: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpMatrixTimesScalar: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpVectorTimesMatrix: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpMatrixTimesVector: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpMatrixTimesMatrix: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpOuterProduct: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIAddCarry: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpISubBorrow: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUMulExtended: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSMulExtended: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAny: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAll: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIsNan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIsInf: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIsFinite: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIsNormal: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSignBitSet: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLessOrGreater: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpOrdered: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUnordered: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLogicalEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLogicalNotEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLogicalOr: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLogicalAnd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLogicalNot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSelect: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpINotEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUGreaterThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSGreaterThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUGreaterThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSGreaterThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpULessThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSLessThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpULessThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSLessThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFOrdEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFUnordEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFOrdNotEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFUnordNotEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFOrdLessThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFUnordLessThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFOrdGreaterThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFUnordGreaterThan: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFOrdLessThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFUnordLessThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFOrdGreaterThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFUnordGreaterThanEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpShiftRightLogical: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpShiftRightArithmetic: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpShiftLeftLogical: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitwiseOr: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitwiseXor: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitwiseAnd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpNot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitFieldInsert: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitFieldSExtract: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitFieldUExtract: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitReverse: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBitCount: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDPdx: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDPdy: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFwidth: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDPdxFine: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDPdyFine: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFwidthFine: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDPdxCoarse: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDPdyCoarse: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFwidthCoarse: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpEmitVertex: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpEndPrimitive: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpEmitStreamVertex: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpEndStreamPrimitive: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpControlBarrier: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpMemoryBarrier: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpAtomicLoad: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicStore: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpAtomicExchange: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicCompareExchange: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicCompareExchangeWeak: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicIIncrement: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicIDecrement: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicIAdd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicISub: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicSMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicUMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicSMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicUMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicAnd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicOr: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicXor: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpPhi: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLoopMerge: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSelectionMerge: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpLabel: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpBranch: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpBranchConditional: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSwitch: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpKill: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpReturn: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpReturnValue: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpUnreachable: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpLifetimeStart: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpLifetimeStop: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGroupAsyncCopy: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupWaitEvents: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGroupAll: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupAny: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupBroadcast: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupIAdd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupFAdd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupFMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupUMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupSMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupFMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupUMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupSMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReadPipe: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpWritePipe: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReservedReadPipe: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReservedWritePipe: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReserveReadPipePackets: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReserveWritePipePackets: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCommitReadPipe: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpCommitWritePipe: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpIsValidReserveId: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetNumPipePackets: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetMaxPipePackets: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupReserveReadPipePackets: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupReserveWritePipePackets: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupCommitReadPipe: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGroupCommitWritePipe: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpEnqueueMarker: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpEnqueueKernel: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetKernelNDrangeSubGroupCount: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetKernelNDrangeMaxSubGroupSize: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetKernelWorkGroupSize: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetKernelPreferredWorkGroupSizeMultiple: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRetainEvent: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpReleaseEvent: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpCreateUserEvent: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIsValidEvent: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSetUserEventStatus: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpCaptureEventProfilingInfo: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGetDefaultQueue: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBuildNDRange: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleDrefImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleDrefExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleProjImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleProjExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleProjDrefImplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseSampleProjDrefExplicitLod: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseFetch: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseGather: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseDrefGather: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSparseTexelsResident: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpNoLine: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpAtomicFlagTestAndSet: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicFlagClear: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpImageSparseRead: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSizeOf: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTypePipeStorage: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpConstantPipeStorage: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCreatePipeFromPipeStorage: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetKernelLocalSizeForSubgroupCount: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGetKernelMaxNumSubgroups: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTypeNamedBarrier: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpNamedBarrierInitialize: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpMemoryNamedBarrier: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpModuleProcessed: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpExecutionModeId: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpDecorateId: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGroupNonUniformElect: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformAll: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformAny: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformAllEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBroadcast: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBroadcastFirst: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBallot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformInverseBallot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBallotBitExtract: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBallotBitCount: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBallotFindLSB: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBallotFindMSB: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformShuffle: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformShuffleXor: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformShuffleUp: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformShuffleDown: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformIAdd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformFAdd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformIMul: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformFMul: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformSMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformUMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformFMin: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformSMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformUMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformFMax: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBitwiseAnd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBitwiseOr: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformBitwiseXor: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformLogicalAnd: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformLogicalOr: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformLogicalXor: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformQuadBroadcast: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformQuadSwap: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCopyLogical: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpPtrEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpPtrNotEqual: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpPtrDiff: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpColorAttachmentReadEXT: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDepthAttachmentReadEXT: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpStencilAttachmentReadEXT: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTerminateInvocation: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSubgroupBallotKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupFirstInvocationKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAllKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAnyKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAllEqualKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupNonUniformRotateKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupReadInvocationKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTraceRayKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpExecuteCallableKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpConvertUToAccelerationStructureKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIgnoreIntersectionKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTerminateRayKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSDot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUDot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSUDot: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSDotAccSat: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUDotAccSat: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSUDotAccSat: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTypeRayQueryKHR: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpRayQueryInitializeKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpRayQueryTerminateKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpRayQueryGenerateIntersectionKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpRayQueryConfirmIntersectionKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpRayQueryProceedKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionTypeKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageSampleWeightedQCOM: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageBoxFilterQCOM: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageBlockMatchSSDQCOM: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpImageBlockMatchSADQCOM: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupIAddNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupFAddNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupFMinNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupUMinNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupSMinNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupFMaxNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupUMaxNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupSMaxNonUniformAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFragmentMaskFetchAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFragmentFetchAMD: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReadClockKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectRecordHitMotionNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectRecordHitWithIndexMotionNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectRecordMissMotionNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectGetWorldToObjectNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetObjectToWorldNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetObjectRayDirectionNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetObjectRayOriginNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectTraceRayMotionNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectGetShaderRecordBufferHandleNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetShaderBindingTableRecordIndexNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectRecordEmptyNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectTraceRayNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectRecordHitNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectRecordHitWithIndexNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectRecordMissNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectExecuteShaderNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectGetCurrentTimeNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetAttributesNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpHitObjectGetHitKindNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetPrimitiveIndexNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetGeometryIndexNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetInstanceIdNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetInstanceCustomIndexNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetWorldRayDirectionNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetWorldRayOriginNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetRayTMaxNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectGetRayTMinNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectIsEmptyNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectIsHitNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpHitObjectIsMissNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReorderThreadWithHitObjectNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpReorderThreadWithHintNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTypeHitObjectNV: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpImageSampleFootprintNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpEmitMeshTasksEXT: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSetMeshOutputsEXT: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGroupNonUniformPartitionNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpWritePackedPrimitiveIndices4x8NV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpReportIntersectionNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIgnoreIntersectionNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTerminateRayNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTraceNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTraceMotionNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTraceRayMotionNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpRayQueryGetIntersectionTriangleVertexPositionsKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTypeAccelerationStructureNV: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpExecuteCallableNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpTypeCooperativeMatrixNV: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpCooperativeMatrixLoadNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCooperativeMatrixStoreNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpCooperativeMatrixMulAddNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCooperativeMatrixLengthNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpBeginInvocationInterlockEXT: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpEndInvocationInterlockEXT: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpDemoteToHelperInvocation: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpIsHelperInvocationEXT: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertUToImageNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertUToSamplerNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertImageToUNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertSamplerToUNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertUToSampledImageNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertSampledImageToUNV: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSamplerImageAddressingModeNV: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSubgroupShuffleINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupShuffleDownINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupShuffleUpINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupShuffleXorINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupBlockReadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupBlockWriteINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSubgroupImageBlockReadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupImageBlockWriteINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSubgroupImageMediaBlockReadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupImageMediaBlockWriteINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpUCountLeadingZerosINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUCountTrailingZerosINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAbsISubINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAbsUSubINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIAddSatINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUAddSatINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIAverageINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUAverageINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIAverageRoundedINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUAverageRoundedINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpISubSatINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUSubSatINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpIMul32x16INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpUMul32x16INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConstantFunctionPointerINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFunctionPointerCallINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAsmTargetINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAsmINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAsmCallINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicFMinEXT: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicFMaxEXT: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAssumeTrueKHR: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpExpectKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpDecorateString: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpMemberDecorateString: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpVmeImageINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTypeVmeImageINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcImePayloadINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcRefPayloadINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcSicPayloadINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcMcePayloadINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcMceResultINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcImeResultINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcImeResultSingleReferenceStreamoutINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcImeResultDualReferenceStreamoutINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcImeSingleReferenceStreaminINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcImeDualReferenceStreaminINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcRefResultINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeAvcSicResultINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetInterShapePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetAcOnlyHaarINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceConvertToImePayloadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceConvertToImeResultINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceConvertToRefPayloadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceConvertToRefResultINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceConvertToSicPayloadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceConvertToSicResultINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetMotionVectorsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetInterDistortionsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetBestInterDistortionsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetInterMajorShapeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetInterMinorShapeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetInterDirectionsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetInterMotionVectorCountINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetInterReferenceIdsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeInitializeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeSetSingleReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeSetDualReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeRefWindowSizeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeAdjustRefOffsetINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeConvertToMcePayloadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeSetWeightedSadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeConvertToMceResultINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetDualReferenceStreaminINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op:: - OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op:: - OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op:: - OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op:: - OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetBorderReachedINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcFmeInitializeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcBmeInitializeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefConvertToMcePayloadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefSetBilinearFilterEnableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcRefConvertToMceResultINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicInitializeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicConfigureSkcINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicConfigureIpeLumaINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetMotionVectorMaskINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicConvertToMcePayloadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicSetBilinearFilterEnableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicEvaluateIpeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicConvertToMceResultINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetIpeLumaShapeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetIpeChromaModeINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSubgroupAvcSicGetInterRawSadsINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpVariableLengthArrayINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpSaveMemoryINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRestoreMemoryINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpArbitraryFloatSinCosPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatCastINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatCastFromIntINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatCastToIntINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatAddINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatSubINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatMulINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatDivINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatGTINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatGEINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatLTINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatLEINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatEQINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatRecipINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatRSqrtINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatCbrtINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatHypotINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatSqrtINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatLogINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatLog2INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatLog10INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatLog1pINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatExpINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatExp2INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatExp10INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatExpm1INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatSinINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatCosINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatSinCosINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatSinPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatCosPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatASinINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatASinPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatACosINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatACosPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatATanINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatATanPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatATan2INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatPowINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatPowRINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpArbitraryFloatPowNINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpLoopControlINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpAliasDomainDeclINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpAliasScopeDeclINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpAliasScopeListDeclINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpFixedSqrtINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedRecipINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedRsqrtINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedSinINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedCosINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedSinCosINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedSinPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedCosPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedSinCosPiINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedLogINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFixedExpINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpPtrCastToCrossWorkgroupINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpCrossWorkgroupCastToPtrINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpReadPipeBlockingINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpWritePipeBlockingINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpFPGARegINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetRayTMinKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetRayFlagsKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionTKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionInstanceCustomIndexKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionInstanceIdKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionGeometryIndexKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionPrimitiveIndexKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionBarycentricsKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionFrontFaceKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionObjectRayDirectionKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionObjectRayOriginKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetWorldRayDirectionKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetWorldRayOriginKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionObjectToWorldKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpRayQueryGetIntersectionWorldToObjectKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpAtomicFAddEXT: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpTypeBufferSurfaceINTEL: - *hasResult = true; - *hasResultType = false; - break; - case Op::OpTypeStructContinuedINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpConstantCompositeContinuedINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpSpecConstantCompositeContinuedINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpConvertFToBF16INTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpConvertBF16ToFINTEL: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpControlBarrierArriveINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpControlBarrierWaitINTEL: - *hasResult = false; - *hasResultType = false; - break; - case Op::OpGroupIMulKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupFMulKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupBitwiseAndKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupBitwiseOrKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupBitwiseXorKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupLogicalAndKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupLogicalOrKHR: - *hasResult = true; - *hasResultType = true; - break; - case Op::OpGroupLogicalXorKHR: - *hasResult = true; - *hasResultType = true; - break; - } -} -#endif /* SPV_ENABLE_UTILITY_CODE */ - -// Overload bitwise operators for mask bit combining - -constexpr ImageOperandsMask operator|(ImageOperandsMask a, - ImageOperandsMask b) { - return ImageOperandsMask(unsigned(a) | unsigned(b)); -} -constexpr ImageOperandsMask operator&(ImageOperandsMask a, - ImageOperandsMask b) { - return ImageOperandsMask(unsigned(a) & unsigned(b)); -} -constexpr ImageOperandsMask operator^(ImageOperandsMask a, - ImageOperandsMask b) { - return ImageOperandsMask(unsigned(a) ^ unsigned(b)); -} -constexpr ImageOperandsMask operator~(ImageOperandsMask a) { - return ImageOperandsMask(~unsigned(a)); -} -constexpr FPFastMathModeMask operator|(FPFastMathModeMask a, - FPFastMathModeMask b) { - return FPFastMathModeMask(unsigned(a) | unsigned(b)); -} -constexpr FPFastMathModeMask operator&(FPFastMathModeMask a, - FPFastMathModeMask b) { - return FPFastMathModeMask(unsigned(a) & unsigned(b)); -} -constexpr FPFastMathModeMask operator^(FPFastMathModeMask a, - FPFastMathModeMask b) { - return FPFastMathModeMask(unsigned(a) ^ unsigned(b)); -} -constexpr FPFastMathModeMask operator~(FPFastMathModeMask a) { - return FPFastMathModeMask(~unsigned(a)); -} -constexpr SelectionControlMask operator|(SelectionControlMask a, - SelectionControlMask b) { - return SelectionControlMask(unsigned(a) | unsigned(b)); -} -constexpr SelectionControlMask operator&(SelectionControlMask a, - SelectionControlMask b) { - return SelectionControlMask(unsigned(a) & unsigned(b)); -} -constexpr SelectionControlMask operator^(SelectionControlMask a, - SelectionControlMask b) { - return SelectionControlMask(unsigned(a) ^ unsigned(b)); -} -constexpr SelectionControlMask operator~(SelectionControlMask a) { - return SelectionControlMask(~unsigned(a)); -} -constexpr LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { - return LoopControlMask(unsigned(a) | unsigned(b)); -} -constexpr LoopControlMask operator&(LoopControlMask a, LoopControlMask b) { - return LoopControlMask(unsigned(a) & unsigned(b)); -} -constexpr LoopControlMask operator^(LoopControlMask a, LoopControlMask b) { - return LoopControlMask(unsigned(a) ^ unsigned(b)); -} -constexpr LoopControlMask operator~(LoopControlMask a) { - return LoopControlMask(~unsigned(a)); -} -constexpr FunctionControlMask operator|(FunctionControlMask a, - FunctionControlMask b) { - return FunctionControlMask(unsigned(a) | unsigned(b)); -} -constexpr FunctionControlMask operator&(FunctionControlMask a, - FunctionControlMask b) { - return FunctionControlMask(unsigned(a) & unsigned(b)); -} -constexpr FunctionControlMask operator^(FunctionControlMask a, - FunctionControlMask b) { - return FunctionControlMask(unsigned(a) ^ unsigned(b)); -} -constexpr FunctionControlMask operator~(FunctionControlMask a) { - return FunctionControlMask(~unsigned(a)); -} -constexpr MemorySemanticsMask operator|(MemorySemanticsMask a, - MemorySemanticsMask b) { - return MemorySemanticsMask(unsigned(a) | unsigned(b)); -} -constexpr MemorySemanticsMask operator&(MemorySemanticsMask a, - MemorySemanticsMask b) { - return MemorySemanticsMask(unsigned(a) & unsigned(b)); -} -constexpr MemorySemanticsMask operator^(MemorySemanticsMask a, - MemorySemanticsMask b) { - return MemorySemanticsMask(unsigned(a) ^ unsigned(b)); -} -constexpr MemorySemanticsMask operator~(MemorySemanticsMask a) { - return MemorySemanticsMask(~unsigned(a)); -} -constexpr MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { - return MemoryAccessMask(unsigned(a) | unsigned(b)); -} -constexpr MemoryAccessMask operator&(MemoryAccessMask a, MemoryAccessMask b) { - return MemoryAccessMask(unsigned(a) & unsigned(b)); -} -constexpr MemoryAccessMask operator^(MemoryAccessMask a, MemoryAccessMask b) { - return MemoryAccessMask(unsigned(a) ^ unsigned(b)); -} -constexpr MemoryAccessMask operator~(MemoryAccessMask a) { - return MemoryAccessMask(~unsigned(a)); -} -constexpr KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, - KernelProfilingInfoMask b) { - return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); -} -constexpr KernelProfilingInfoMask operator&(KernelProfilingInfoMask a, - KernelProfilingInfoMask b) { - return KernelProfilingInfoMask(unsigned(a) & unsigned(b)); -} -constexpr KernelProfilingInfoMask operator^(KernelProfilingInfoMask a, - KernelProfilingInfoMask b) { - return KernelProfilingInfoMask(unsigned(a) ^ unsigned(b)); -} -constexpr KernelProfilingInfoMask operator~(KernelProfilingInfoMask a) { - return KernelProfilingInfoMask(~unsigned(a)); -} -constexpr RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { - return RayFlagsMask(unsigned(a) | unsigned(b)); -} -constexpr RayFlagsMask operator&(RayFlagsMask a, RayFlagsMask b) { - return RayFlagsMask(unsigned(a) & unsigned(b)); -} -constexpr RayFlagsMask operator^(RayFlagsMask a, RayFlagsMask b) { - return RayFlagsMask(unsigned(a) ^ unsigned(b)); -} -constexpr RayFlagsMask operator~(RayFlagsMask a) { - return RayFlagsMask(~unsigned(a)); -} -constexpr FragmentShadingRateMask operator|(FragmentShadingRateMask a, - FragmentShadingRateMask b) { - return FragmentShadingRateMask(unsigned(a) | unsigned(b)); -} -constexpr FragmentShadingRateMask operator&(FragmentShadingRateMask a, - FragmentShadingRateMask b) { - return FragmentShadingRateMask(unsigned(a) & unsigned(b)); -} -constexpr FragmentShadingRateMask operator^(FragmentShadingRateMask a, - FragmentShadingRateMask b) { - return FragmentShadingRateMask(unsigned(a) ^ unsigned(b)); -} -constexpr FragmentShadingRateMask operator~(FragmentShadingRateMask a) { - return FragmentShadingRateMask(~unsigned(a)); -} - -} // end namespace spv - -#endif // #ifndef spirv_HPP diff --git a/hw/amdgpu/shader/CMakeLists.txt b/hw/amdgpu/shader/CMakeLists.txt deleted file mode 100644 index 9ff008e..0000000 --- a/hw/amdgpu/shader/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -project(libamdgpu-shader) -set(PROJECT_PATH amdgpu/shader) - -set(SRC - src/cf.cpp - src/scf.cpp - src/CfBuilder.cpp - src/Converter.cpp - src/ConverterContext.cpp - src/Fragment.cpp - src/Function.cpp - src/Instruction.cpp - src/RegisterState.cpp - src/TypeId.cpp -) - -add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC}) -target_link_libraries(${PROJECT_NAME} PUBLIC spirv amdgpu::base spirv-cross-core) -target_include_directories(${PROJECT_NAME} PUBLIC include PRIVATE include/${PROJECT_PATH}) -set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "") -add_library(amdgpu::shader ALIAS ${PROJECT_NAME}) -set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp b/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp deleted file mode 100644 index 4215535..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/AccessOp.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -namespace amdgpu::shader { -enum class AccessOp { None = 0, Load = 1 << 0, Store = 1 << 1 }; - -constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) { - return static_cast(static_cast(lhs) | static_cast(rhs)); -} -constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) { - return static_cast(static_cast(lhs) & static_cast(rhs)); -} -constexpr AccessOp operator~(AccessOp rhs) { - return static_cast(~static_cast(rhs)); -} -constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) { - return ((lhs = lhs | rhs)); -} -constexpr AccessOp &operator&=(AccessOp &lhs, AccessOp rhs) { - return ((lhs = lhs & rhs)); -} -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp b/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp deleted file mode 100644 index 09dc226..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/BufferKind.hpp +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -namespace amdgpu::shader { -enum class BufferKind { VBuffer, TBuffer }; -} diff --git a/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp b/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp deleted file mode 100644 index 92ad609..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/CfBuilder.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once -#include "cf.hpp" -#include - -namespace amdgpu::shader { -cf::BasicBlock *buildCf(cf::Context &ctxt, RemoteMemory memory, - std::uint64_t entryPoint); -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp deleted file mode 100644 index ee8a966..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/Converter.hpp +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "AccessOp.hpp" -#include "Stage.hpp" - -#include -#include - -#include -#include -#include - -namespace amdgpu::shader { -struct Shader { - enum class UniformKind { Buffer, Sampler, StorageImage, Image }; - - struct UniformInfo { - std::uint32_t binding; - std::uint32_t buffer[8]; - UniformKind kind; - AccessOp accessOp; - }; - - std::vector uniforms; - std::vector spirv; -}; - -Shader convert(RemoteMemory memory, Stage stage, std::uint64_t entry, - std::span userSpgrs, std::uint32_t dimX, - std::uint32_t dimY, std::uint32_t dimZ, - util::MemoryAreaTable<> &dependencies); -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp b/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp deleted file mode 100644 index c492926..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp +++ /dev/null @@ -1,267 +0,0 @@ -#pragma once - -#include "Fragment.hpp" -#include "Function.hpp" -#include "Stage.hpp" -#include "TypeId.hpp" -#include "Uniform.hpp" -#include "util/area.hpp" - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace amdgpu::shader { -/* -struct MaterializedFunction { - spirv::Function function; - spirv::FunctionType type; - spirv::Type returnType; - - std::vector> args; - std::vector> results; -}; -*/ - -class ConverterContext { - Stage mStage; - RemoteMemory mMemory; - spirv::IdGenerator mGenerator; - spirv::SpirvBuilder mBuilder{mGenerator, 1024}; - static constexpr auto kGenericTypesCount = - static_cast(TypeId::Void) + 1; - spirv::Type mTypes[kGenericTypesCount]; - spirv::PointerType mPtrTypes[13][kGenericTypesCount]; - spirv::RuntimeArrayType mRuntimeArrayTypes[kGenericTypesCount]; - spirv::VariableValue mThreadId; - spirv::VariableValue mWorkgroupId; - spirv::VariableValue mLocalInvocationId; - spirv::VariableValue mPerVertex; - spirv::VariableValue mFragCoord; - std::vector mInterfaces; - std::map mIns; - std::map mOuts; - - std::map mConstantFloat32Map; - std::map mConstantUint32Map; - std::map mConstantSint32Map; - std::map mConstantUint64Map; - - struct FunctionType { - spirv::Type resultType; - std::vector params; - spirv::FunctionType id; - }; - - std::vector mFunctionTypes; - - struct StructTypeEntry { - spirv::StructType id; - std::vector members; - spirv::PointerType ptrTypes[13]; - - bool match(std::span other) { - if (members.size() != other.size()) { - return false; - } - - for (std::size_t i = 0; i < other.size(); ++i) { - if (members[i] != other[i]) { - return false; - } - } - - return true; - } - }; - - std::vector mStructTypes; - - std::forward_list mFragments; - std::forward_list mFunctions; - - spirv::ConstantBool mTrue; - spirv::ConstantBool mFalse; - - std::vector mUniforms; - spirv::ExtInstSet mGlslStd450; - spirv::Function mDiscardFn; - -public: - util::MemoryAreaTable<> *dependencies = nullptr; - - ConverterContext(RemoteMemory memory, Stage stage, - util::MemoryAreaTable<> *dependencies) - : mStage(stage), mMemory(memory), dependencies(dependencies) { - mGlslStd450 = mBuilder.createExtInstImport("GLSL.std.450"); - } - - const decltype(mInterfaces) &getInterfaces() const { return mInterfaces; } - - spirv::SpirvBuilder &getBuilder() { return mBuilder; } - RemoteMemory getMemory() const { return mMemory; } - spirv::ExtInstSet getGlslStd450() const { return mGlslStd450; } - std::optional getTypeIdOf(spirv::Type type) const; - - spirv::StructType findStructType(std::span members); - spirv::StructType getStructType(std::span members); - spirv::PointerType getStructPointerType(spv::StorageClass storageClass, - spirv::StructType structType); - spirv::Type getType(TypeId id); - - spirv::PointerType getPointerType(spv::StorageClass storageClass, TypeId id) { - assert(static_cast(storageClass) < 13); - auto &type = mPtrTypes[static_cast(storageClass)] - [static_cast(id)]; - - if (!type) { - type = mBuilder.createTypePointer(storageClass, getType(id)); - } - - return type; - } - - spirv::RuntimeArrayType getRuntimeArrayType(TypeId id); - - spirv::UIntType getUInt32Type() { - return spirv::cast(getType(TypeId::UInt32)); - } - spirv::UIntType getUInt64Type() { - return spirv::cast(getType(TypeId::UInt64)); - } - spirv::UIntType getUInt8Type() { - return spirv::cast(getType(TypeId::UInt8)); - } - - spirv::VectorOfType getUint32x2Type() { - return spirv::cast>( - getType(TypeId::UInt32x2)); - } - - spirv::VectorOfType getUint32x3Type() { - return spirv::cast>( - getType(TypeId::UInt32x3)); - } - - spirv::VectorOfType getUint32x4Type() { - return spirv::cast>( - getType(TypeId::UInt32x4)); - } - - spirv::ArrayOfType getArrayUint32x8Type() { - return spirv::cast>( - getType(TypeId::ArrayUInt32x8)); - } - - spirv::ArrayOfType getArrayUint32x16Type() { - return spirv::cast>( - getType(TypeId::ArrayUInt32x16)); - } - - spirv::SIntType getSint32Type() { - return spirv::cast(getType(TypeId::SInt32)); - } - spirv::SIntType getSint64Type() { - return spirv::cast(getType(TypeId::SInt64)); - } - - spirv::FloatType getFloat16Type() { - return spirv::cast(getType(TypeId::Float16)); - } - spirv::FloatType getFloat32Type() { - return spirv::cast(getType(TypeId::Float32)); - } - - spirv::VectorOfType getFloat32x4Type() { - return spirv::cast>( - getType(TypeId::Float32x4)); - } - - spirv::VectorOfType getFloat32x3Type() { - return spirv::cast>( - getType(TypeId::Float32x3)); - } - - spirv::VectorOfType getFloat32x2Type() { - return spirv::cast>( - getType(TypeId::Float32x2)); - } - - spirv::BoolType getBoolType() { - return spirv::cast(getType(TypeId::Bool)); - } - - spirv::VoidType getVoidType() { - return spirv::cast(getType(TypeId::Void)); - } - - spirv::ConstantBool getTrue() { - if (!mTrue) { - mTrue = mBuilder.createConstantTrue(getBoolType()); - } - return mTrue; - } - spirv::ConstantBool getFalse() { - if (!mFalse) { - mFalse = mBuilder.createConstantFalse(getBoolType()); - } - return mFalse; - } - - spirv::ConstantUInt getUInt64(std::uint64_t value); - spirv::ConstantUInt getUInt32(std::uint32_t value); - spirv::ConstantSInt getSInt32(std::uint32_t value); - spirv::ConstantFloat getFloat32Raw(std::uint32_t value); - - spirv::ConstantFloat getFloat32(float id) { - return getFloat32Raw(std::bit_cast(id)); - } - - spirv::SamplerType getSamplerType() { - return spirv::cast(getType(TypeId::Sampler)); - } - spirv::ImageType getImage2DType() { - return spirv::cast(getType(TypeId::Image2D)); - } - spirv::ImageType getStorageImage2DType() { - return spirv::cast(getType(TypeId::StorageImage2D)); - } - spirv::SampledImageType getSampledImage2DType() { - return spirv::cast( - getType(TypeId::SampledImage2D)); - } - - UniformInfo *createStorageBuffer(TypeId type); - UniformInfo *getOrCreateStorageBuffer(std::uint32_t *vbuffer, TypeId type); - UniformInfo *getOrCreateUniformConstant(std::uint32_t *buffer, - std::size_t size, TypeId type); - spirv::VariableValue getThreadId(); - spirv::VariableValue getWorkgroupId(); - spirv::VariableValue getLocalInvocationId(); - spirv::VariableValue getPerVertex(); - spirv::VariableValue getFragCoord(); - spirv::VariableValue getIn(unsigned location); - spirv::VariableValue getOut(unsigned location); - - spirv::Function getDiscardFn(); - - std::optional findUint32Value(spirv::Value id) const; - std::optional findSint32Value(spirv::Value id) const; - std::optional findFloat32Value(spirv::Value id) const; - spirv::FunctionType getFunctionType(spirv::Type resultType, - std::span params); - - Function *createFunction(std::size_t expectedSize); - Fragment *createFragment(std::size_t expectedSize); - - std::vector &getUniforms() { return mUniforms; } -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp deleted file mode 100644 index 8a52f26..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once -#include "AccessOp.hpp" -#include "RegisterId.hpp" -#include "RegisterState.hpp" -#include "TypeId.hpp" - -#include -#include -#include -#include - -namespace amdgpu::shader { -enum class OperandGetFlags { None, PreserveType = 1 << 0 }; - -struct Function; -class ConverterContext; - -struct Fragment { - ConverterContext *context = nullptr; - Function *function = nullptr; - spirv::Block entryBlockId; - spirv::BlockBuilder builder; - RegisterState *registers = nullptr; - - std::set values; - std::set outputs; - - std::vector predecessors; - std::uint64_t jumpAddress = 0; - spirv::BoolValue branchCondition; - bool hasTerminator = false; - - void appendBranch(Fragment &other) { other.predecessors.push_back(this); } - - void injectValuesFromPreds(); - - // std::optional findInput(spirv::Value value); - // Value addInput(RegisterId id, spirv::Type type); - spirv::SamplerValue createSampler(RegisterId base); - spirv::ImageValue createImage(RegisterId base, bool r128, bool sampled, - AccessOp access); // TODO: params - Value createCompositeExtract(Value composite, std::uint32_t member); - Value getOperand(RegisterId id, TypeId type, - OperandGetFlags flags = OperandGetFlags::None); - void setOperand(RegisterId id, Value value); - void setVcc(Value value); - void setScc(Value value); - spirv::BoolValue getScc(); - spirv::Value createBitcast(spirv::Type to, spirv::Type from, - spirv::Value value); - - Value getScalarOperand(int id, TypeId type, - OperandGetFlags flags = OperandGetFlags::None) { - return getOperand(RegisterId::Scalar(id), type, flags); - } - Value getVectorOperand(int id, TypeId type, - OperandGetFlags flags = OperandGetFlags::None) { - return getOperand(RegisterId::Vector(id), type, flags); - } - Value getAttrOperand(int id, TypeId type, - OperandGetFlags flags = OperandGetFlags::None) { - return getOperand(RegisterId::Attr(id), type, flags); - } - Value getVccLo() { return getOperand(RegisterId::VccLo, TypeId::UInt32); } - Value getVccHi() { return getOperand(RegisterId::VccHi, TypeId::UInt32); } - Value getExecLo() { return getOperand(RegisterId::ExecLo, TypeId::UInt32); } - Value getExecHi() { return getOperand(RegisterId::ExecHi, TypeId::UInt32); } - void setScalarOperand(int id, Value value) { - setOperand(RegisterId::Scalar(id), value); - } - void setVectorOperand(int id, Value value) { - setOperand(RegisterId::Vector(id), value); - } - void setExportTarget(int id, Value value) { - setOperand(RegisterId::Export(id), value); - } - // void createCallTo(MaterializedFunction *other); - void convert(std::uint64_t size); - -private: - Value getRegister(RegisterId id); - Value getRegister(RegisterId id, spirv::Type type); - void setRegister(RegisterId id, Value value); -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp b/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp deleted file mode 100644 index 48ad5a0..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/FragmentTerminator.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -namespace amdgpu::shader { -enum class FragmentTerminator { - None, - EndProgram, - CallToReg, - BranchToReg, - Branch, -}; -} diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp deleted file mode 100644 index 80d8256..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once -#include "Fragment.hpp" -#include "RegisterId.hpp" -#include "Stage.hpp" -#include "spirv/spirv-builder.hpp" -#include - -namespace amdgpu::shader { -class ConverterContext; - -struct Function { - ConverterContext *context = nullptr; - Stage stage = Stage::None; - std::span userSgprs; - std::span userVgprs; - Fragment entryFragment; - Fragment exitFragment; - std::map inputs; - spirv::FunctionBuilder builder; - std::vector fragments; - - Value getInput(RegisterId id); - Value createInput(RegisterId id); - void createExport(spirv::BlockBuilder &builder, unsigned index, Value value); - spirv::Type getResultType(); - spirv::FunctionType getFunctionType(); - - Fragment *createFragment() { - auto result = createDetachedFragment(); - appendFragment(result); - return result; - } - - Fragment *createDetachedFragment(); - void appendFragment(Fragment *fragment) { fragments.push_back(fragment); } - - void insertReturn(); -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp deleted file mode 100644 index 2d73e55..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/Instruction.hpp +++ /dev/null @@ -1,1973 +0,0 @@ -#pragma once - -#include -#include - -namespace amdgpu::shader { -inline constexpr std::uint32_t genMask(std::uint32_t offset, - std::uint32_t bitCount) { - return ((1u << bitCount) - 1u) << offset; -} - -inline constexpr std::uint32_t getMaskEnd(std::uint32_t mask) { - return 32 - std::countl_zero(mask); -} - -inline constexpr std::uint32_t fetchMaskedValue(std::uint32_t hex, - std::uint32_t mask) { - return (hex & mask) >> std::countr_zero(mask); -} - -enum SurfaceFormat { - kSurfaceFormatInvalid = 0x00000000, - kSurfaceFormat8 = 0x00000001, - kSurfaceFormat16 = 0x00000002, - kSurfaceFormat8_8 = 0x00000003, - kSurfaceFormat32 = 0x00000004, - kSurfaceFormat16_16 = 0x00000005, - kSurfaceFormat10_11_11 = 0x00000006, - kSurfaceFormat11_11_10 = 0x00000007, - kSurfaceFormat10_10_10_2 = 0x00000008, - kSurfaceFormat2_10_10_10 = 0x00000009, - kSurfaceFormat8_8_8_8 = 0x0000000a, - kSurfaceFormat32_32 = 0x0000000b, - kSurfaceFormat16_16_16_16 = 0x0000000c, - kSurfaceFormat32_32_32 = 0x0000000d, - kSurfaceFormat32_32_32_32 = 0x0000000e, -}; -enum TextureChannelType { - kTextureChannelTypeUNorm = 0x00000000, - kTextureChannelTypeSNorm = 0x00000001, - kTextureChannelTypeUScaled = 0x00000002, - kTextureChannelTypeSScaled = 0x00000003, - kTextureChannelTypeUInt = 0x00000004, - kTextureChannelTypeSInt = 0x00000005, - kTextureChannelTypeSNormNoZero = 0x00000006, - kTextureChannelTypeFloat = 0x00000007, -}; - -inline int getScalarInstSize(int id) { return id == 255 ? 1 : 0; } - -struct Sop1 { - enum class Op { - S_MOV_B32 = 3, - S_MOV_B64, - S_CMOV_B32, - S_CMOV_B64, - S_NOT_B32, - S_NOT_B64, - S_WQM_B32, - S_WQM_B64, - S_BREV_B32, - S_BREV_B64, - S_BCNT0_I32_B32, - S_BCNT0_I32_B64, - S_BCNT1_I32_B32, - S_BCNT1_I32_B64, - S_FF0_I32_B32, - S_FF0_I32_B64, - S_FF1_I32_B32, - S_FF1_I32_B64, - S_FLBIT_I32_B32, - S_FLBIT_I32_B64, - S_FLBIT_I32, - S_FLBIT_I32_I64, - S_SEXT_I32_I8, - S_SEXT_I32_I16, - S_BITSET0_B32, - S_BITSET0_B64, - S_BITSET1_B32, - S_BITSET1_B64, - S_GETPC_B64, - S_SETPC_B64, - S_SWAPPC_B64, - S_RFE_B64, - S_AND_SAVEEXEC_B64 = 36, - S_OR_SAVEEXEC_B64, - S_XOR_SAVEEXEC_B64, - S_ANDN2_SAVEEXEC_B64, - S_ORN2_SAVEEXEC_B64, - S_NAND_SAVEEXEC_B64, - S_NOR_SAVEEXEC_B64, - S_XNOR_SAVEEXEC_B64, - S_QUADMASK_B32, - S_QUADMASK_B64, - S_MOVRELS_B32, - S_MOVRELS_B64, - S_MOVRELD_B32, - S_MOVRELD_B64, - S_CBRANCH_JOIN, - S_ABS_I32 = 52, - S_MOV_FED_B32, - }; - - static constexpr int kMinInstSize = 1; - - static constexpr auto ssrc0Mask = genMask(0, 8); - static constexpr auto opMask = genMask(getMaskEnd(ssrc0Mask), 8); - static constexpr auto sdstMask = genMask(getMaskEnd(opMask), 7); - - const std::uint32_t *inst; - - const std::uint32_t ssrc0 = fetchMaskedValue(inst[0], ssrc0Mask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); - - Sop1(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize + getScalarInstSize(ssrc0); } - - void dump() const; -}; - -struct Sopk { - enum class Op { - S_MOVK_I32, - S_CMOVK_I32 = 2, - S_CMPK_EQ_I32, - S_CMPK_LG_I32, - S_CMPK_GT_I32, - S_CMPK_GE_I32, - S_CMPK_LT_I32, - S_CMPK_LE_I32, - S_CMPK_EQ_U32, - S_CMPK_LG_U32, - S_CMPK_GT_U32, - S_CMPK_GE_U32, - S_CMPK_LT_U32, - S_CMPK_LE_U32, - S_ADDK_I32, - S_MULK_I32, - S_CBRANCH_I_FORK, - S_GETREG_B32, - S_SETREG_B32, - S_SETREG_IMM - }; - - static constexpr int kMinInstSize = 1; - - static constexpr auto simmMask = genMask(0, 16); - static constexpr auto sdstMask = genMask(getMaskEnd(simmMask), 7); - static constexpr auto opMask = genMask(getMaskEnd(sdstMask), 5); - - const std::uint32_t *inst; - - const std::int16_t simm = (std::int16_t)fetchMaskedValue(inst[0], simmMask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); - - Sopk(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Sopc { - enum class Op { - S_CMP_EQ_I32, - S_CMP_LG_I32, - S_CMP_GT_I32, - S_CMP_GE_I32, - S_CMP_LT_I32, - S_CMP_LE_I32, - S_CMP_EQ_U32, - S_CMP_LG_U32, - S_CMP_GT_U32, - S_CMP_GE_U32, - S_CMP_LT_U32, - S_CMP_LE_U32, - S_BITCMP0_B32, - S_BITCMP1_B32, - S_BITCMP0_B64, - S_BITCMP1_B64, - S_SETVSKIP, - S_ILLEGALD - }; - - static constexpr int kMinInstSize = 1; - - static constexpr auto ssrc0Mask = genMask(0, 8); - static constexpr auto ssrc1Mask = genMask(getMaskEnd(ssrc0Mask), 8); - static constexpr auto opMask = genMask(getMaskEnd(ssrc1Mask), 7); - - const std::uint32_t *inst; - - const std::uint32_t ssrc0 = fetchMaskedValue(inst[0], ssrc0Mask); - const std::uint32_t ssrc1 = fetchMaskedValue(inst[0], ssrc1Mask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - Sopc(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize + getScalarInstSize(ssrc0); } - - void dump() const; -}; - -struct Sop2 { - enum class Op { - S_ADD_U32, - S_SUB_U32, - S_ADD_I32, - S_SUB_I32, - S_ADDC_U32, - S_SUBB_U32, - S_MIN_I32, - S_MIN_U32, - S_MAX_I32, - S_MAX_U32, - S_CSELECT_B32, - S_CSELECT_B64, - S_AND_B32 = 14, - S_AND_B64, - S_OR_B32, - S_OR_B64, - S_XOR_B32, - S_XOR_B64, - S_ANDN2_B32, - S_ANDN2_B64, - S_ORN2_B32, - S_ORN2_B64, - S_NAND_B32, - S_NAND_B64, - S_NOR_B32, - S_NOR_B64, - S_XNOR_B32, - S_XNOR_B64, - S_LSHL_B32, - S_LSHL_B64, - S_LSHR_B32, - S_LSHR_B64, - S_ASHR_I32, - S_ASHR_I64, - S_BFM_B32, - S_BFM_B64, - S_MUL_I32, - S_BFE_U32, - S_BFE_I32, - S_BFE_U64, - S_BFE_I64, - S_CBRANCH_G_FORK, - S_ABSDIFF_I32, - S_LSHL1_ADD_U32, - S_LSHL2_ADD_U32, - S_LSHL3_ADD_U32, - S_LSHL4_ADD_U32, - S_PACK_LL_B32_B16, - S_PACK_LH_B32_B16, - S_PACK_HH_B32_B16, - S_MUL_HI_U32, - S_MUL_HI_I32, - }; - - static constexpr int kMinInstSize = 1; - - static constexpr auto ssrc0Mask = genMask(0, 8); - static constexpr auto ssrc1Mask = genMask(getMaskEnd(ssrc0Mask), 8); - static constexpr auto sdstMask = genMask(getMaskEnd(ssrc1Mask), 7); - static constexpr auto opMask = genMask(getMaskEnd(sdstMask), 7); - - const std::uint32_t *inst; - const std::uint32_t ssrc0 = fetchMaskedValue(inst[0], ssrc0Mask); - const std::uint32_t ssrc1 = fetchMaskedValue(inst[0], ssrc1Mask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); - - Sop2(const std::uint32_t *inst) : inst(inst) {} - - int size() const { - return kMinInstSize + getScalarInstSize(ssrc0) + getScalarInstSize(ssrc1); - } - - void dump() const; -}; - -struct Sopp { - enum class Op { - S_NOP, - S_ENDPGM, - S_BRANCH, - S_CBRANCH_SCC0 = 4, - S_CBRANCH_SCC1, - S_CBRANCH_VCCZ, - S_CBRANCH_VCCNZ, - S_CBRANCH_EXECZ, - S_CBRANCH_EXECNZ, - S_BARRIER, - S_WAITCNT = 12, - S_SETHALT, - S_SLEEP, - S_SETPRIO, - S_SENDMSG, - S_SENDMSGHALT, - S_TRAP, - S_ICACHE_INV, - S_INCPERFLEVEL, - S_DECPERFLEVEL, - S_TTRACEDATA, - S_CBRANCH_CDBGSYS = 23, - S_CBRANCH_CDBGUSER = 24, - S_CBRANCH_CDBGSYS_OR_USER = 25, - S_CBRANCH_CDBGSYS_AND_USER = 26, - }; - - static constexpr int kMinInstSize = 1; - - static constexpr auto simmMask = genMask(0, 16); - static constexpr auto opMask = genMask(getMaskEnd(simmMask), 7); - - const std::uint32_t *inst; - const std::int16_t simm = (std::int16_t)fetchMaskedValue(inst[0], simmMask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - Sopp(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Vop1 { - enum class Op { - V_NOP, - V_MOV_B32, - V_READFIRSTLANE_B32, - V_CVT_I32_F64, - V_CVT_F64_I32, - V_CVT_F32_I32, - V_CVT_F32_U32, - V_CVT_U32_F32, - V_CVT_I32_F32, - V_MOV_FED_B32, - V_CVT_F16_F32, - V_CVT_F32_F16, - V_CVT_RPI_I32_F32, - V_CVT_FLR_I32_F32, - V_CVT_OFF_F32_I4, - V_CVT_F32_F64, - V_CVT_F64_F32, - V_CVT_F32_UBYTE0, - V_CVT_F32_UBYTE1, - V_CVT_F32_UBYTE2, - V_CVT_F32_UBYTE3, - V_CVT_U32_F64, - V_CVT_F64_U32, - V_FRACT_F32 = 32, - V_TRUNC_F32, - V_CEIL_F32, - V_RNDNE_F32, - V_FLOOR_F32, - V_EXP_F32, - V_LOG_CLAMP_F32, - V_LOG_F32, - V_RCP_CLAMP_F32, - V_RCP_LEGACY_F32, - V_RCP_F32, - V_RCP_IFLAG_F32, - V_RSQ_CLAMP_F32, - V_RSQ_LEGACY_F32, - V_RSQ_F32, - V_RCP_F64, - V_RCP_CLAMP_F64, - V_RSQ_F64, - V_RSQ_CLAMP_F64, - V_SQRT_F32, - V_SQRT_F64, - V_SIN_F32, - V_COS_F32, - V_NOT_B32, - V_BFREV_B32, - V_FFBH_U32, - V_FFBL_B32, - V_FFBH_I32, - V_FREXP_EXP_I32_F64, - V_FREXP_MANT_F64, - V_FRACT_F64, - V_FREXP_EXP_I32_F32, - V_FREXP_MANT_F32, - V_CLREXCP, - V_MOVRELD_B32, - V_MOVRELS_B32, - V_MOVRELSD_B32, - V_CVT_F16_U16 = 80, - V_CVT_F16_I16, - V_CVT_U16_F16, - V_CVT_I16_F16, - V_RCP_F16, - V_SQRT_F16, - V_RSQ_F16, - V_LOG_F16, - V_EXP_F16, - V_FREXP_MANT_F16, - V_FREXP_EXP_I16_F16, - V_FLOOR_F16, - V_CEIL_F16, - V_TRUNC_F16, - V_RNDNE_F16, - V_FRACT_F16, - V_SIN_F16, - V_COS_F16, - V_SAT_PK_U8_I16, - V_CVT_NORM_I16_F16, - V_CVT_NORM_U16_F16, - V_SWAP_B32, - }; - - static constexpr int kMinInstSize = 1; - - static constexpr auto src0Mask = genMask(0, 9); - static constexpr auto opMask = genMask(getMaskEnd(src0Mask), 8); - static constexpr auto vdstMask = genMask(getMaskEnd(opMask), 8); - - const std::uint32_t *inst; - const std::uint32_t src0 = fetchMaskedValue(inst[0], src0Mask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - const std::uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); - - int size() const { return kMinInstSize + getScalarInstSize(src0); } - - Vop1(const std::uint32_t *inst) : inst(inst) {} - - void dump() const; -}; - -struct Vop2 { - enum class Op { - V_CNDMASK_B32, - V_READLANE_B32, - V_WRITELANE_B32, - V_ADD_F32, - V_SUB_F32, - V_SUBREV_F32, - V_MAC_LEGACY_F32, - V_MUL_LEGACY_F32, - V_MUL_F32, - V_MUL_I32_I24, - V_MUL_HI_I32_I24, - V_MUL_U32_U24, - V_MUL_HI_U32_U24, - V_MIN_LEGACY_F32, - V_MAX_LEGACY_F32, - V_MIN_F32, - V_MAX_F32, - V_MIN_I32, - V_MAX_I32, - V_MIN_U32, - V_MAX_U32, - V_LSHR_B32, - V_LSHRREV_B32, - V_ASHR_I32, - V_ASHRREV_I32, - V_LSHL_B32, - V_LSHLREV_B32, - V_AND_B32, - V_OR_B32, - V_XOR_B32, - V_BFM_B32, - V_MAC_F32, - V_MADMK_F32, - V_MADAK_F32, - V_BCNT_U32_B32, - V_MBCNT_LO_U32_B32, - V_MBCNT_HI_U32_B32, - V_ADD_I32, - V_SUB_I32, - V_SUBREV_I32, - V_ADDC_U32, - V_SUBB_U32, - V_SUBBREV_U32, - V_LDEXP_F32, - V_CVT_PKACCUM_U8_F32, - V_CVT_PKNORM_I16_F32, - V_CVT_PKNORM_U16_F32, - V_CVT_PKRTZ_F16_F32, - V_CVT_PK_U16_U32, - V_CVT_PK_I16_I32, - }; - - static constexpr int kMinInstSize = 1; - static constexpr auto src0Mask = genMask(0, 9); - static constexpr auto vsrc1Mask = genMask(getMaskEnd(src0Mask), 8); - static constexpr auto vdstMask = genMask(getMaskEnd(vsrc1Mask), 8); - static constexpr auto opMask = genMask(getMaskEnd(vdstMask), 6); - - const std::uint32_t *inst; - const std::uint32_t src0 = fetchMaskedValue(inst[0], src0Mask); - const std::uint32_t vsrc1 = fetchMaskedValue(inst[0], vsrc1Mask); - const std::uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - Vop2(const std::uint32_t *inst) : inst(inst) {} - - int size() const { - int result = kMinInstSize + getScalarInstSize(src0); - - if (op == Vop2::Op::V_MADMK_F32 || op == Vop2::Op::V_MADAK_F32) { - result += 1; - } - - return result; - } - void dump() const; -}; - -struct Vop3 { - enum class Op { - V3_CMP_F_F32, - V3_CMP_LT_F32, - V3_CMP_EQ_F32, - V3_CMP_LE_F32, - V3_CMP_GT_F32, - V3_CMP_LG_F32, - V3_CMP_GE_F32, - V3_CMP_O_F32, - V3_CMP_U_F32, - V3_CMP_NGE_F32, - V3_CMP_NLG_F32, - V3_CMP_NGT_F32, - V3_CMP_NLE_F32, - V3_CMP_NEQ_F32, - V3_CMP_NLT_F32, - V3_CMP_TRU_F32, - V3_CMPX_F_F32, - V3_CMPX_LT_F32, - V3_CMPX_EQ_F32, - V3_CMPX_LE_F32, - V3_CMPX_GT_F32, - V3_CMPX_LG_F32, - V3_CMPX_GE_F32, - V3_CMPX_O_F32, - V3_CMPX_U_F32, - V3_CMPX_NGE_F32, - V3_CMPX_NLG_F32, - V3_CMPX_NGT_F32, - V3_CMPX_NLE_F32, - V3_CMPX_NEQ_F32, - V3_CMPX_NLT_F32, - V3_CMPX_TRU_F32, - V3_CMP_F_F64, - V3_CMP_LT_F64, - V3_CMP_EQ_F64, - V3_CMP_LE_F64, - V3_CMP_GT_F64, - V3_CMP_LG_F64, - V3_CMP_GE_F64, - V3_CMP_O_F64, - V3_CMP_U_F64, - V3_CMP_NGE_F64, - V3_CMP_NLG_F64, - V3_CMP_NGT_F64, - V3_CMP_NLE_F64, - V3_CMP_NEQ_F64, - V3_CMP_NLT_F64, - V3_CMP_TRU_F64, - V3_CMPX_F_F64, - V3_CMPX_LT_F64, - V3_CMPX_EQ_F64, - V3_CMPX_LE_F64, - V3_CMPX_GT_F64, - V3_CMPX_LG_F64, - V3_CMPX_GE_F64, - V3_CMPX_O_F64, - V3_CMPX_U_F64, - V3_CMPX_NGE_F64, - V3_CMPX_NLG_F64, - V3_CMPX_NGT_F64, - V3_CMPX_NLE_F64, - V3_CMPX_NEQ_F64, - V3_CMPX_NLT_F64, - V3_CMPX_TRU_F64, - V3_CMPS_F_F32, - V3_CMPS_LT_F32, - V3_CMPS_EQ_F32, - V3_CMPS_LE_F32, - V3_CMPS_GT_F32, - V3_CMPS_LG_F32, - V3_CMPS_GE_F32, - V3_CMPS_O_F32, - V3_CMPS_U_F32, - V3_CMPS_NGE_F32, - V3_CMPS_NLG_F32, - V3_CMPS_NGT_F32, - V3_CMPS_NLE_F32, - V3_CMPS_NEQ_F32, - V3_CMPS_NLT_F32, - V3_CMPS_TRU_F32, - V3_CMPSX_F_F32, - V3_CMPSX_LT_F32, - V3_CMPSX_EQ_F32, - V3_CMPSX_LE_F32, - V3_CMPSX_GT_F32, - V3_CMPSX_LG_F32, - V3_CMPSX_GE_F32, - V3_CMPSX_O_F32, - V3_CMPSX_U_F32, - V3_CMPSX_NGE_F32, - V3_CMPSX_NLG_F32, - V3_CMPSX_NGT_F32, - V3_CMPSX_NLE_F32, - V3_CMPSX_NEQ_F32, - V3_CMPSX_NLT_F32, - V3_CMPSX_TRU_F32, - V3_CMPS_F_F64, - V3_CMPS_LT_F64, - V3_CMPS_EQ_F64, - V3_CMPS_LE_F64, - V3_CMPS_GT_F64, - V3_CMPS_LG_F64, - V3_CMPS_GE_F64, - V3_CMPS_O_F64, - V3_CMPS_U_F64, - V3_CMPS_NGE_F64, - V3_CMPS_NLG_F64, - V3_CMPS_NGT_F64, - V3_CMPS_NLE_F64, - V3_CMPS_NEQ_F64, - V3_CMPS_NLT_F64, - V3_CMPS_TRU_F64, - V3_CMPSX_F_F64, - V3_CMPSX_LT_F64, - V3_CMPSX_EQ_F64, - V3_CMPSX_LE_F64, - V3_CMPSX_GT_F64, - V3_CMPSX_LG_F64, - V3_CMPSX_GE_F64, - V3_CMPSX_O_F64, - V3_CMPSX_U_F64, - V3_CMPSX_NGE_F64, - V3_CMPSX_NLG_F64, - V3_CMPSX_NGT_F64, - V3_CMPSX_NLE_F64, - V3_CMPSX_NEQ_F64, - V3_CMPSX_NLT_F64, - V3_CMPSX_TRU_F64, - V3_CMP_F_I32, - V3_CMP_LT_I32, - V3_CMP_EQ_I32, - V3_CMP_LE_I32, - V3_CMP_GT_I32, - V3_CMP_NE_I32, - V3_CMP_GE_I32, - V3_CMP_T_I32, - V3_CMP_CLASS_F32, - V3_CMP_LT_I16, - V3_CMP_EQ_I16, - V3_CMP_LE_I16, - V3_CMP_GT_I16, - V3_CMP_NE_I16, - V3_CMP_GE_I16, - V3_CMP_CLASS_F16, - V3_CMPX_F_I32, - V3_CMPX_LT_I32, - V3_CMPX_EQ_I32, - V3_CMPX_LE_I32, - V3_CMPX_GT_I32, - V3_CMPX_NE_I32, - V3_CMPX_GE_I32, - V3_CMPX_T_I32, - V3_CMPX_CLASS_F32, - V3_CMPX_LT_I16, - V3_CMPX_EQ_I16, - V3_CMPX_LE_I16, - V3_CMPX_GT_I16, - V3_CMPX_NE_I16, - V3_CMPX_GE_I16, - V3_CMPX_CLASS_F16, - V3_CMP_F_I64, - V3_CMP_LT_I64, - V3_CMP_EQ_I64, - V3_CMP_LE_I64, - V3_CMP_GT_I64, - V3_CMP_NE_I64, - V3_CMP_GE_I64, - V3_CMP_T_I64, - V3_CMP_CLASS_F64, - V3_CMP_LT_U16, - V3_CMP_EQ_U16, - V3_CMP_LE_U16, - V3_CMP_GT_U16, - V3_CMP_NE_U16, - V3_CMP_GE_U16, - V3_CMPX_F_I64 = 176, - V3_CMPX_LT_I64, - V3_CMPX_EQ_I64, - V3_CMPX_LE_I64, - V3_CMPX_GT_I64, - V3_CMPX_NE_I64, - V3_CMPX_GE_I64, - V3_CMPX_T_I64, - V3_CMPX_CLASS_F64, - V3_CMPX_LT_U16, - V3_CMPX_EQ_U16, - V3_CMPX_LE_U16, - V3_CMPX_GT_U16, - V3_CMPX_NE_U16, - V3_CMPX_GE_U16, - V3_CMP_F_U32 = 192, - V3_CMP_LT_U32, - V3_CMP_EQ_U32, - V3_CMP_LE_U32, - V3_CMP_GT_U32, - V3_CMP_NE_U32, - V3_CMP_GE_U32, - V3_CMP_T_U32, - V3_CMP_F_F16, - V3_CMP_LT_F16, - V3_CMP_EQ_F16, - V3_CMP_LE_F16, - V3_CMP_GT_F16, - V3_CMP_LG_F16, - V3_CMP_GE_F16, - V3_CMP_O_F16, - V3_CMPX_F_U32, - V3_CMPX_LT_U32, - V3_CMPX_EQ_U32, - V3_CMPX_LE_U32, - V3_CMPX_GT_U32, - V3_CMPX_NE_U32, - V3_CMPX_GE_U32, - V3_CMPX_T_U32, - V3_CMPX_F_F16, - V3_CMPX_LT_F16, - V3_CMPX_EQ_F16, - V3_CMPX_LE_F16, - V3_CMPX_GT_F16, - V3_CMPX_LG_F16, - V3_CMPX_GE_F16, - V3_CMPX_O_F16, - V3_CMP_F_U64, - V3_CMP_LT_U64, - V3_CMP_EQ_U64, - V3_CMP_LE_U64, - V3_CMP_GT_U64, - V3_CMP_NE_U64, - V3_CMP_GE_U64, - V3_CMP_T_U64, - V3_CMP_U_F16, - V3_CMP_NGE_F16, - V3_CMP_NLG_F16, - V3_CMP_NGT_F16, - V3_CMP_NLE_F16, - V3_CMP_NEQ_F16, - V3_CMP_NLT_F16, - V3_CMP_TRU_F16, - V3_CMPX_F_U64, - V3_CMPX_LT_U64, - V3_CMPX_EQ_U64, - V3_CMPX_LE_U64, - V3_CMPX_GT_U64, - V3_CMPX_NE_U64, - V3_CMPX_GE_U64, - V3_CMPX_T_U64, - V3_CNDMASK_B32 = 256, - V3_READLANE_B32, - V3_WRITELANE_B32, - V3_ADD_F32, - V3_SUB_F32, - V3_SUBREV_F32, - V3_MAC_LEGACY_F32, - V3_MUL_LEGACY_F32, - V3_MUL_F32, - V3_MUL_I32_I24, - V3_MUL_HI_I32_I24, - V3_MUL_U32_U24, - V3_MUL_HI_U32_U24, - V3_MIN_LEGACY_F32, - V3_MAX_LEGACY_F32, - V3_MIN_F32, - V3_MAX_F32, - V3_MIN_I32, - V3_MAX_I32, - V3_MIN_U32, - V3_MAX_U32, - V3_LSHR_B32, - V3_LSHRREV_B32, - V3_ASHR_I32, - V3_ASHRREV_I32, - V3_LSHL_B32, - V3_LSHLREV_B32, - V3_AND_B32, - V3_OR_B32, - V3_XOR_B32, - V3_BFM_B32, - V3_MAC_F32, - V3_MADMK_F32, - V3_MADAK_F32, - V3_BCNT_U32_B32, - V3_MBCNT_LO_U32_B32, - V3_MBCNT_HI_U32_B32, - V3_ADD_I32, - V3_SUB_I32, - V3_SUBREV_I32, - V3_ADDC_U32, - V3_SUBB_U32, - V3_SUBBREV_U32, - V3_LDEXP_F32, - V3_CVT_PKACCUM_U8_F32, - V3_CVT_PKNORM_I16_F32, - V3_CVT_PKNORM_U16_F32, - V3_CVT_PKRTZ_F16_F32, - V3_CVT_PK_U16_U32, - V3_CVT_PK_I16_I32, - V3_MAD_LEGACY_F32 = 320, - V3_MAD_F32, - V3_MAD_I32_I24, - V3_MAD_U32_U24, - V3_CUBEID_F32, - V3_CUBESC_F32, - V3_CUBETC_F32, - V3_CUBEMA_F32, - V3_BFE_U32, - V3_BFE_I32, - V3_BFI_B32, - V3_FMA_F32, - V3_FMA_F64, - V3_LERP_U8, - V3_ALIGNBIT_B32, - V3_ALIGNBYTE_B32, - V3_MULLIT_F32, - V3_MIN3_F32, - V3_MIN3_I32, - V3_MIN3_U32, - V3_MAX3_F32, - V3_MAX3_I32, - V3_MAX3_U32, - V3_MED3_F32, - V3_MED3_I32, - V3_MED3_U32, - V3_SAD_U8, - V3_SAD_HI_U8, - V3_SAD_U16, - V3_SAD_U32, - V3_CVT_PK_U8_F32, - V3_DIV_FIXUP_F32, - V3_DIV_FIXUP_F64, - V3_LSHL_B64, - V3_LSHR_B64, - V3_ASHR_I64, - V3_ADD_F64, - V3_MUL_F64, - V3_MIN_F64, - V3_MAX_F64, - V3_LDEXP_F64, - V3_MUL_LO_U32, - V3_MUL_HI_U32, - V3_MUL_LO_I32, - V3_MUL_HI_I32, - V3_DIV_SCALE_F32, - V3_DIV_SCALE_F64, - V3_DIV_FMAS_F32, - V3_DIV_FMAS_F64, - V3_MSAD_U8, - V3_QSAD_U8, - V3_MQSAD_U8, - V3_TRIG_PREOP_F64, - V3_NOP = 384, - V3_MOV_B32, - V3_READFIRSTLANE_B32, - V3_CVT_I32_F64, - V3_CVT_F64_I32, - V3_CVT_F32_I32, - V3_CVT_F32_U32, - V3_CVT_U32_F32, - V3_CVT_I32_F32, - V3_MOV_FED_B32, - V3_CVT_F16_F32, - V3_CVT_F32_F16, - V3_CVT_RPI_I32_F32, - V3_CVT_FLR_I32_F32, - V3_CVT_OFF_F32_I4, - V3_CVT_F32_F64, - V3_CVT_F64_F32, - V3_CVT_F32_UBYTE0, - V3_CVT_F32_UBYTE1, - V3_CVT_F32_UBYTE2, - V3_CVT_F32_UBYTE3, - V3_CVT_U32_F64, - V3_CVT_F64_U32, - V3_FRACT_F32 = 416, - V3_TRUNC_F32, - V3_CEIL_F32, - V3_RNDNE_F32, - V3_FLOOR_F32, - V3_EXP_F32, - V3_LOG_CLAMP_F32, - V3_LOG_F32, - V3_RCP_CLAMP_F32, - V3_RCP_LEGACY_F32, - V3_RCP_F32, - V3_RCP_IFLAG_F32, - V3_RSQ_CLAMP_F32, - V3_RSQ_LEGACY_F32, - V3_RSQ_F32, - V3_RCP_F64, - V3_RCP_CLAMP_F64, - V3_RSQ_F64, - V3_RSQ_CLAMP_F64, - V3_SQRT_F32, - V3_SQRT_F64, - V3_SIN_F32, - V3_COS_F32, - V3_NOT_B32, - V3_BFREV_B32, - V3_FFBH_U32, - V3_FFBL_B32, - V3_FFBH_I32, - V3_FREXP_EXP_I32_F64, - V3_FREXP_MANT_F64, - V3_FRACT_F64, - V3_FREXP_EXP_I32_F32, - V3_FREXP_MANT_F32, - V3_CLREXCP, - V3_MOVRELD_B32, - V3_MOVRELS_B32, - V3_MOVRELSD_B32, - }; - - static constexpr int kMinInstSize = 2; - static constexpr auto vdstMask = genMask(0, 8); - - static constexpr auto absMask = genMask(getMaskEnd(vdstMask), 3); - static constexpr auto abs0Mask = genMask(getMaskEnd(vdstMask), 1); - static constexpr auto abs1Mask = genMask(getMaskEnd(abs0Mask), 1); - static constexpr auto abs2Mask = genMask(getMaskEnd(abs1Mask), 1); - static constexpr auto clmpMask = genMask(getMaskEnd(absMask), 1); - - static constexpr auto sdstMask = genMask(getMaskEnd(vdstMask), 7); - - static_assert(getMaskEnd(clmpMask) + 5 == getMaskEnd(sdstMask) + 2); - - static constexpr auto opMask = genMask(getMaskEnd(clmpMask) + 5, 9); - - static constexpr auto src0Mask = genMask(0, 9); - static constexpr auto src1Mask = genMask(getMaskEnd(src0Mask), 9); - static constexpr auto src2Mask = genMask(getMaskEnd(src1Mask), 9); - static constexpr auto omodMask = genMask(getMaskEnd(src2Mask), 2); - static constexpr auto negMask = genMask(getMaskEnd(omodMask), 3); - static constexpr auto neg0Mask = genMask(getMaskEnd(omodMask), 1); - static constexpr auto neg1Mask = genMask(getMaskEnd(neg0Mask), 1); - static constexpr auto neg2Mask = genMask(getMaskEnd(neg1Mask), 1); - - const std::uint32_t *inst; - const std::uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); - const std::uint32_t abs = fetchMaskedValue(inst[0], absMask); - const std::uint32_t clmp = fetchMaskedValue(inst[0], clmpMask); - const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - const std::uint32_t src0 = fetchMaskedValue(inst[1], src0Mask); - const std::uint32_t src1 = fetchMaskedValue(inst[1], src1Mask); - const std::uint32_t src2 = fetchMaskedValue(inst[1], src2Mask); - const std::uint32_t omod = fetchMaskedValue(inst[1], omodMask); - const std::uint32_t neg = fetchMaskedValue(inst[1], negMask); - - Vop3(const std::uint32_t *inst) : inst(inst) {} - - int size() const { - return kMinInstSize + getScalarInstSize(src0) + getScalarInstSize(src1) + - getScalarInstSize(src2); - } - - void dump() const; -}; - -struct Vopc { - enum class Op { - V_CMP_F_F32, - V_CMP_LT_F32, - V_CMP_EQ_F32, - V_CMP_LE_F32, - V_CMP_GT_F32, - V_CMP_LG_F32, - V_CMP_GE_F32, - V_CMP_O_F32, - V_CMP_U_F32, - V_CMP_NGE_F32, - V_CMP_NLG_F32, - V_CMP_NGT_F32, - V_CMP_NLE_F32, - V_CMP_NEQ_F32, - V_CMP_NLT_F32, - V_CMP_TRU_F32, - V_CMPX_F_F32, - V_CMPX_LT_F32, - V_CMPX_EQ_F32, - V_CMPX_LE_F32, - V_CMPX_GT_F32, - V_CMPX_LG_F32, - V_CMPX_GE_F32, - V_CMPX_O_F32, - V_CMPX_U_F32, - V_CMPX_NGE_F32, - V_CMPX_NLG_F32, - V_CMPX_NGT_F32, - V_CMPX_NLE_F32, - V_CMPX_NEQ_F32, - V_CMPX_NLT_F32, - V_CMPX_TRU_F32, - V_CMP_F_F64, - V_CMP_LT_F64, - V_CMP_EQ_F64, - V_CMP_LE_F64, - V_CMP_GT_F64, - V_CMP_LG_F64, - V_CMP_GE_F64, - V_CMP_O_F64, - V_CMP_U_F64, - V_CMP_NGE_F64, - V_CMP_NLG_F64, - V_CMP_NGT_F64, - V_CMP_NLE_F64, - V_CMP_NEQ_F64, - V_CMP_NLT_F64, - V_CMP_TRU_F64, - V_CMPX_F_F64, - V_CMPX_LT_F64, - V_CMPX_EQ_F64, - V_CMPX_LE_F64, - V_CMPX_GT_F64, - V_CMPX_LG_F64, - V_CMPX_GE_F64, - V_CMPX_O_F64, - V_CMPX_U_F64, - V_CMPX_NGE_F64, - V_CMPX_NLG_F64, - V_CMPX_NGT_F64, - V_CMPX_NLE_F64, - V_CMPX_NEQ_F64, - V_CMPX_NLT_F64, - V_CMPX_TRU_F64, - V_CMPS_F_F32, - V_CMPS_LT_F32, - V_CMPS_EQ_F32, - V_CMPS_LE_F32, - V_CMPS_GT_F32, - V_CMPS_LG_F32, - V_CMPS_GE_F32, - V_CMPS_O_F32, - V_CMPS_U_F32, - V_CMPS_NGE_F32, - V_CMPS_NLG_F32, - V_CMPS_NGT_F32, - V_CMPS_NLE_F32, - V_CMPS_NEQ_F32, - V_CMPS_NLT_F32, - V_CMPS_TRU_F32, - V_CMPSX_F_F32, - V_CMPSX_LT_F32, - V_CMPSX_EQ_F32, - V_CMPSX_LE_F32, - V_CMPSX_GT_F32, - V_CMPSX_LG_F32, - V_CMPSX_GE_F32, - V_CMPSX_O_F32, - V_CMPSX_U_F32, - V_CMPSX_NGE_F32, - V_CMPSX_NLG_F32, - V_CMPSX_NGT_F32, - V_CMPSX_NLE_F32, - V_CMPSX_NEQ_F32, - V_CMPSX_NLT_F32, - V_CMPSX_TRU_F32, - V_CMPS_F_F64, - V_CMPS_LT_F64, - V_CMPS_EQ_F64, - V_CMPS_LE_F64, - V_CMPS_GT_F64, - V_CMPS_LG_F64, - V_CMPS_GE_F64, - V_CMPS_O_F64, - V_CMPS_U_F64, - V_CMPS_NGE_F64, - V_CMPS_NLG_F64, - V_CMPS_NGT_F64, - V_CMPS_NLE_F64, - V_CMPS_NEQ_F64, - V_CMPS_NLT_F64, - V_CMPS_TRU_F64, - V_CMPSX_F_F64, - V_CMPSX_LT_F64, - V_CMPSX_EQ_F64, - V_CMPSX_LE_F64, - V_CMPSX_GT_F64, - V_CMPSX_LG_F64, - V_CMPSX_GE_F64, - V_CMPSX_O_F64, - V_CMPSX_U_F64, - V_CMPSX_NGE_F64, - V_CMPSX_NLG_F64, - V_CMPSX_NGT_F64, - V_CMPSX_NLE_F64, - V_CMPSX_NEQ_F64, - V_CMPSX_NLT_F64, - V_CMPSX_TRU_F64, - V_CMP_F_I32, - V_CMP_LT_I32, - V_CMP_EQ_I32, - V_CMP_LE_I32, - V_CMP_GT_I32, - V_CMP_NE_I32, - V_CMP_GE_I32, - V_CMP_T_I32, - V_CMP_CLASS_F32, - V_CMP_LT_I16, - V_CMP_EQ_I16, - V_CMP_LE_I16, - V_CMP_GT_I16, - V_CMP_NE_I16, - V_CMP_GE_I16, - V_CMP_CLASS_F16, - V_CMPX_F_I32, - V_CMPX_LT_I32, - V_CMPX_EQ_I32, - V_CMPX_LE_I32, - V_CMPX_GT_I32, - V_CMPX_NE_I32, - V_CMPX_GE_I32, - V_CMPX_T_I32, - V_CMPX_CLASS_F32, - V_CMPX_LT_I16, - V_CMPX_EQ_I16, - V_CMPX_LE_I16, - V_CMPX_GT_I16, - V_CMPX_NE_I16, - V_CMPX_GE_I16, - V_CMPX_CLASS_F16, - V_CMP_F_I64, - V_CMP_LT_I64, - V_CMP_EQ_I64, - V_CMP_LE_I64, - V_CMP_GT_I64, - V_CMP_NE_I64, - V_CMP_GE_I64, - V_CMP_T_I64, - V_CMP_CLASS_F64, - V_CMP_LT_U16, - V_CMP_EQ_U16, - V_CMP_LE_U16, - V_CMP_GT_U16, - V_CMP_NE_U16, - V_CMP_GE_U16, - V_CMPX_F_I64 = 176, - V_CMPX_LT_I64, - V_CMPX_EQ_I64, - V_CMPX_LE_I64, - V_CMPX_GT_I64, - V_CMPX_NE_I64, - V_CMPX_GE_I64, - V_CMPX_T_I64, - V_CMPX_CLASS_F64, - V_CMPX_LT_U16, - V_CMPX_EQ_U16, - V_CMPX_LE_U16, - V_CMPX_GT_U16, - V_CMPX_NE_U16, - V_CMPX_GE_U16, - V_CMP_F_U32 = 192, - V_CMP_LT_U32, - V_CMP_EQ_U32, - V_CMP_LE_U32, - V_CMP_GT_U32, - V_CMP_NE_U32, - V_CMP_GE_U32, - V_CMP_T_U32, - V_CMP_F_F16, - V_CMP_LT_F16, - V_CMP_EQ_F16, - V_CMP_LE_F16, - V_CMP_GT_F16, - V_CMP_LG_F16, - V_CMP_GE_F16, - V_CMP_O_F16, - V_CMPX_F_U32, - V_CMPX_LT_U32, - V_CMPX_EQ_U32, - V_CMPX_LE_U32, - V_CMPX_GT_U32, - V_CMPX_NE_U32, - V_CMPX_GE_U32, - V_CMPX_T_U32, - V_CMPX_F_F16, - V_CMPX_LT_F16, - V_CMPX_EQ_F16, - V_CMPX_LE_F16, - V_CMPX_GT_F16, - V_CMPX_LG_F16, - V_CMPX_GE_F16, - V_CMPX_O_F16, - V_CMP_F_U64, - V_CMP_LT_U64, - V_CMP_EQ_U64, - V_CMP_LE_U64, - V_CMP_GT_U64, - V_CMP_NE_U64, - V_CMP_GE_U64, - V_CMP_T_U64, - V_CMP_U_F16, - V_CMP_NGE_F16, - V_CMP_NLG_F16, - V_CMP_NGT_F16, - V_CMP_NLE_F16, - V_CMP_NEQ_F16, - V_CMP_NLT_F16, - V_CMP_TRU_F16, - V_CMPX_F_U64, - V_CMPX_LT_U64, - V_CMPX_EQ_U64, - V_CMPX_LE_U64, - V_CMPX_GT_U64, - V_CMPX_NE_U64, - V_CMPX_GE_U64, - V_CMPX_T_U64, - V_CMPX_U_F16, - V_CMPX_NGE_F16, - V_CMPX_NLG_F16, - V_CMPX_NGT_F16, - V_CMPX_NLE_F16, - V_CMPX_NEQ_F16, - V_CMPX_NLT_F16, - V_CMPX_TRU_F16, - }; - - static constexpr int kMinInstSize = 1; - - static constexpr auto src0Mask = genMask(0, 9); - static constexpr auto vsrc1Mask = genMask(getMaskEnd(src0Mask), 8); - static constexpr auto opMask = genMask(getMaskEnd(vsrc1Mask), 8); - - const std::uint32_t *inst; - const std::uint16_t src0 = fetchMaskedValue(inst[0], src0Mask); - const std::uint8_t vsrc1 = fetchMaskedValue(inst[0], vsrc1Mask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - Vopc(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Smrd { - enum class Op { - S_LOAD_DWORD, - S_LOAD_DWORDX2, - S_LOAD_DWORDX4, - S_LOAD_DWORDX8, - S_LOAD_DWORDX16, - S_BUFFER_LOAD_DWORD = 8, - S_BUFFER_LOAD_DWORDX2, - S_BUFFER_LOAD_DWORDX4, - S_BUFFER_LOAD_DWORDX8, - S_BUFFER_LOAD_DWORDX16, - S_DCACHE_INV_VOL = 29, - S_MEMTIME, - S_DCACHE_INV, - }; - - static constexpr int kMinInstSize = 1; - static constexpr auto offsetMask = genMask(0, 8); - static constexpr auto immMask = genMask(getMaskEnd(offsetMask), 1); - static constexpr auto sbaseMask = genMask(getMaskEnd(immMask), 6); - static constexpr auto sdstMask = genMask(getMaskEnd(sbaseMask), 7); - static constexpr auto opMask = genMask(getMaskEnd(sdstMask), 5); - - const std::uint32_t *inst; - const std::uint32_t offset = fetchMaskedValue(inst[0], offsetMask); - const std::uint32_t imm = fetchMaskedValue(inst[0], immMask); - const std::uint32_t sbase = fetchMaskedValue(inst[0], sbaseMask); - const std::uint32_t sdst = fetchMaskedValue(inst[0], sdstMask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - Smrd(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - void dump() const; -}; - -struct Mubuf { - enum class Op { - BUFFER_LOAD_FORMAT_X, - BUFFER_LOAD_FORMAT_XY, - BUFFER_LOAD_FORMAT_XYZ, - BUFFER_LOAD_FORMAT_XYZW, - BUFFER_STORE_FORMAT_X, - BUFFER_STORE_FORMAT_XY, - BUFFER_STORE_FORMAT_XYZ, - BUFFER_STORE_FORMAT_XYZW, - BUFFER_LOAD_UBYTE, - BUFFER_LOAD_SBYTE, - BUFFER_LOAD_USHORT, - BUFFER_LOAD_SSHORT, - BUFFER_LOAD_DWORD, - BUFFER_LOAD_DWORDX2, - BUFFER_LOAD_DWORDX4, - BUFFER_LOAD_DWORDX3, - BUFFER_STORE_BYTE = 24, - BUFFER_STORE_SHORT = 26, - BUFFER_STORE_DWORD = 28, - BUFFER_STORE_DWORDX2, - BUFFER_STORE_DWORDX4, - BUFFER_STORE_DWORDX3, - BUFFER_ATOMIC_SWAP = 48, - BUFFER_ATOMIC_CMPSWAP, - BUFFER_ATOMIC_ADD, - BUFFER_ATOMIC_SUB, - BUFFER_ATOMIC_RSUB, - BUFFER_ATOMIC_SMIN, - BUFFER_ATOMIC_UMIN, - BUFFER_ATOMIC_SMAX, - BUFFER_ATOMIC_UMAX, - BUFFER_ATOMIC_AND, - BUFFER_ATOMIC_OR, - BUFFER_ATOMIC_XOR, - BUFFER_ATOMIC_INC, - BUFFER_ATOMIC_DEC, - BUFFER_ATOMIC_FCMPSWAP, - BUFFER_ATOMIC_FMIN, - BUFFER_ATOMIC_FMAX, - BUFFER_ATOMIC_SWAP_X2 = 80, - BUFFER_ATOMIC_CMPSWAP_X2, - BUFFER_ATOMIC_ADD_X2, - BUFFER_ATOMIC_SUB_X2, - BUFFER_ATOMIC_RSUB_X2, - BUFFER_ATOMIC_SMIN_X2, - BUFFER_ATOMIC_UMIN_X2, - BUFFER_ATOMIC_SMAX_X2, - BUFFER_ATOMIC_UMAX_X2, - BUFFER_ATOMIC_AND_X2, - BUFFER_ATOMIC_OR_X2, - BUFFER_ATOMIC_XOR_X2, - BUFFER_ATOMIC_INC_X2, - BUFFER_ATOMIC_DEC_X2, - BUFFER_ATOMIC_FCMPSWAP_X2, - BUFFER_ATOMIC_FMIN_X2, - BUFFER_ATOMIC_FMAX_X2, - BUFFER_WBINVL1_SC_VOL = 112, - BUFFER_WBINVL1, - }; - - static constexpr int kMinInstSize = 2; - static constexpr auto offsetMask = genMask(0, 12); - static constexpr auto offenMask = genMask(getMaskEnd(offsetMask), 1); - static constexpr auto idxenMask = genMask(getMaskEnd(offenMask), 1); - static constexpr auto glcMask = genMask(getMaskEnd(idxenMask), 1); - static constexpr auto ldsMask = genMask(getMaskEnd(glcMask) + 1, 1); - static constexpr auto opMask = genMask(getMaskEnd(ldsMask) + 1, 7); - - static constexpr auto vaddrMask = genMask(0, 8); - static constexpr auto vdataMask = genMask(getMaskEnd(vaddrMask), 8); - static constexpr auto srsrcMask = genMask(getMaskEnd(vdataMask), 5); - static constexpr auto slcMask = genMask(getMaskEnd(srsrcMask) + 1, 1); - static constexpr auto tfeMask = genMask(getMaskEnd(slcMask), 1); - static constexpr auto soffsetMask = genMask(getMaskEnd(tfeMask), 8); - - const std::uint32_t *inst; - std::uint16_t offset = fetchMaskedValue(inst[0], offsetMask); - bool offen = fetchMaskedValue(inst[0], offenMask); - bool idxen = fetchMaskedValue(inst[0], idxenMask); - bool glc = fetchMaskedValue(inst[0], glcMask); - bool lds = fetchMaskedValue(inst[0], ldsMask); - Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - std::uint8_t vaddr = fetchMaskedValue(inst[1], vaddrMask); - std::uint8_t vdata = fetchMaskedValue(inst[1], vdataMask); - std::uint8_t srsrc = fetchMaskedValue(inst[1], srsrcMask); - bool slc = fetchMaskedValue(inst[1], slcMask); - bool tfe = fetchMaskedValue(inst[1], tfeMask); - std::uint8_t soffset = fetchMaskedValue(inst[1], soffsetMask); - - Mubuf(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Mtbuf { - enum class Op { - TBUFFER_LOAD_FORMAT_X, - TBUFFER_LOAD_FORMAT_XY, - TBUFFER_LOAD_FORMAT_XYZ, - TBUFFER_LOAD_FORMAT_XYZW, - TBUFFER_STORE_FORMAT_X, - TBUFFER_STORE_FORMAT_XY, - TBUFFER_STORE_FORMAT_XYZ, - TBUFFER_STORE_FORMAT_XYZW, - }; - static constexpr int kMinInstSize = 2; - - static constexpr auto offsetMask = genMask(0, 12); - static constexpr auto offenMask = genMask(getMaskEnd(offsetMask), 1); - static constexpr auto idxenMask = genMask(getMaskEnd(offenMask), 1); - static constexpr auto glcMask = genMask(getMaskEnd(idxenMask), 1); - static constexpr auto opMask = genMask(getMaskEnd(glcMask) + 1, 3); - static constexpr auto dfmtMask = genMask(getMaskEnd(opMask), 4); - static constexpr auto nfmtMask = genMask(getMaskEnd(dfmtMask), 4); - - static constexpr auto vaddrMask = genMask(0, 8); - static constexpr auto vdataMask = genMask(getMaskEnd(vaddrMask), 8); - static constexpr auto srsrcMask = genMask(getMaskEnd(vdataMask), 5); - static constexpr auto slcMask = genMask(getMaskEnd(srsrcMask) + 1, 1); - static constexpr auto tfeMask = genMask(getMaskEnd(slcMask), 1); - static constexpr auto soffsetMask = genMask(getMaskEnd(tfeMask), 8); - - const std::uint32_t *inst; - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - std::uint16_t offset = fetchMaskedValue(inst[0], offsetMask); - bool offen = fetchMaskedValue(inst[0], offenMask); - bool idxen = fetchMaskedValue(inst[0], idxenMask); - bool glc = fetchMaskedValue(inst[0], glcMask); - SurfaceFormat dfmt = (SurfaceFormat)fetchMaskedValue(inst[0], dfmtMask); - TextureChannelType nfmt = - (TextureChannelType)fetchMaskedValue(inst[0], nfmtMask); - - std::uint8_t vaddr = fetchMaskedValue(inst[1], vaddrMask); - std::uint8_t vdata = fetchMaskedValue(inst[1], vdataMask); - std::uint8_t srsrc = fetchMaskedValue(inst[1], srsrcMask); - bool slc = fetchMaskedValue(inst[1], slcMask); - bool tfe = fetchMaskedValue(inst[1], tfeMask); - std::uint8_t soffset = fetchMaskedValue(inst[1], soffsetMask); - - Mtbuf(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Mimg { - enum class Op { - IMAGE_LOAD, - IMAGE_LOAD_MIP, - IMAGE_LOAD_PCK, - IMAGE_LOAD_PCK_SGN, - IMAGE_LOAD_MIP_PCK, - IMAGE_LOAD_MIP_PCK_SGN, - IMAGE_STORE = 8, - IMAGE_STORE_MIP, - IMAGE_STORE_PCK, - IMAGE_STORE_MIP_PCK, - IMAGE_GET_RESINFO = 14, - IMAGE_ATOMIC_SWAP, - IMAGE_ATOMIC_CMPSWAP, - IMAGE_ATOMIC_ADD, - IMAGE_ATOMIC_SUB, - IMAGE_ATOMIC_RSUB, - IMAGE_ATOMIC_SMIN, - IMAGE_ATOMIC_UMIN, - IMAGE_ATOMIC_SMAX, - IMAGE_ATOMIC_UMAX, - IMAGE_ATOMIC_AND, - IMAGE_ATOMIC_OR, - IMAGE_ATOMIC_XOR, - IMAGE_ATOMIC_INC, - IMAGE_ATOMIC_DEC, - IMAGE_ATOMIC_FCMPSWAP, - IMAGE_ATOMIC_FMIN, - IMAGE_ATOMIC_FMAX, - IMAGE_SAMPLE, - IMAGE_SAMPLE_CL, - IMAGE_SAMPLE_D, - IMAGE_SAMPLE_D_CL, - IMAGE_SAMPLE_L, - IMAGE_SAMPLE_B, - IMAGE_SAMPLE_B_CL, - IMAGE_SAMPLE_LZ, - IMAGE_SAMPLE_C, - IMAGE_SAMPLE_C_CL, - IMAGE_SAMPLE_C_D, - IMAGE_SAMPLE_C_D_CL, - IMAGE_SAMPLE_C_L, - IMAGE_SAMPLE_C_B, - IMAGE_SAMPLE_C_B_CL, - IMAGE_SAMPLE_C_LZ, - IMAGE_SAMPLE_O, - IMAGE_SAMPLE_CL_O, - IMAGE_SAMPLE_D_O, - IMAGE_SAMPLE_D_CL_O, - IMAGE_SAMPLE_L_O, - IMAGE_SAMPLE_B_O, - IMAGE_SAMPLE_B_CL_O, - IMAGE_SAMPLE_LZ_O, - IMAGE_SAMPLE_C_O, - IMAGE_SAMPLE_C_CL_O, - IMAGE_SAMPLE_C_D_O, - IMAGE_SAMPLE_C_D_CL_O, - IMAGE_SAMPLE_C_L_O, - IMAGE_SAMPLE_C_B_O, - IMAGE_SAMPLE_C_B_CL_O, - IMAGE_SAMPLE_C_LZ_O, - IMAGE_GATHER4, - IMAGE_GATHER4_CL, - IMAGE_GATHER4_L = 68, - IMAGE_GATHER4_B, - IMAGE_GATHER4_B_CL, - IMAGE_GATHER4_LZ, - IMAGE_GATHER4_C, - IMAGE_GATHER4_C_CL, - IMAGE_GATHER4_C_L = 76, - IMAGE_GATHER4_C_B, - IMAGE_GATHER4_C_B_CL, - IMAGE_GATHER4_C_LZ, - IMAGE_GATHER4_O, - IMAGE_GATHER4_CL_O, - IMAGE_GATHER4_L_O = 84, - IMAGE_GATHER4_B_O, - IMAGE_GATHER4_B_CL_O, - IMAGE_GATHER4_LZ_O, - IMAGE_GATHER4_C_O, - IMAGE_GATHER4_C_CL_O, - IMAGE_GATHER4_C_L_O = 92, - IMAGE_GATHER4_C_B_O, - IMAGE_GATHER4_C_B_CL_O, - IMAGE_GATHER4_C_LZ_O, - IMAGE_GET_LOD, - IMAGE_SAMPLE_CD = 104, - IMAGE_SAMPLE_CD_CL, - IMAGE_SAMPLE_C_CD, - IMAGE_SAMPLE_C_CD_CL, - IMAGE_SAMPLE_CD_O, - IMAGE_SAMPLE_CD_CL_O, - IMAGE_SAMPLE_C_CD_O, - IMAGE_SAMPLE_C_CD_CL_O, - }; - - static constexpr int kMinInstSize = 2; - - static constexpr auto dmaskMask = genMask(8, 4); - static constexpr auto unrmMask = genMask(getMaskEnd(dmaskMask), 1); - static constexpr auto glcMask = genMask(getMaskEnd(unrmMask), 1); - static constexpr auto daMask = genMask(getMaskEnd(glcMask), 1); - static constexpr auto r128Mask = genMask(getMaskEnd(daMask), 1); - static constexpr auto tfeMask = genMask(getMaskEnd(r128Mask), 1); - static constexpr auto lweMask = genMask(getMaskEnd(tfeMask), 1); - static constexpr auto opMask = genMask(getMaskEnd(lweMask), 7); - static constexpr auto slcMask = genMask(getMaskEnd(opMask), 1); - - static constexpr auto vaddrMask = genMask(0, 8); - static constexpr auto vdataMask = genMask(getMaskEnd(vaddrMask), 8); - static constexpr auto srsrcMask = genMask(getMaskEnd(vdataMask), 5); - static constexpr auto ssampMask = genMask(getMaskEnd(srsrcMask), 5); - - const std::uint32_t *inst; - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - std::uint8_t dmask = fetchMaskedValue(inst[0], dmaskMask); - bool unrm = fetchMaskedValue(inst[0], unrmMask); - bool glc = fetchMaskedValue(inst[0], glcMask); - bool da = fetchMaskedValue(inst[0], daMask); - bool r128 = fetchMaskedValue(inst[0], r128Mask); - bool tfe = fetchMaskedValue(inst[0], tfeMask); - bool lwe = fetchMaskedValue(inst[0], lweMask); - bool slc = fetchMaskedValue(inst[0], slcMask); - - std::uint8_t vaddr = fetchMaskedValue(inst[1], vaddrMask); - std::uint8_t vdata = fetchMaskedValue(inst[1], vdataMask); - std::uint8_t srsrc = fetchMaskedValue(inst[1], srsrcMask); - std::uint8_t ssamp = fetchMaskedValue(inst[1], ssampMask); - - Mimg(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Ds { - enum class Op { - DS_ADD_U32, - DS_SUB_U32, - DS_RSUB_U32, - DS_INC_U32, - DS_DEC_U32, - DS_MIN_I32, - DS_MAX_I32, - DS_MIN_U32, - DS_MAX_U32, - DS_AND_B32, - DS_OR_B32, - DS_XOR_B32, - DS_MSKOR_B32, - DS_WRITE_B32, - DS_WRITE2_B32, - DS_WRITE2ST64_B32, - DS_CMPST_B32, - DS_CMPST_F32, - DS_MIN_F32, - DS_MAX_F32, - DS_NOP, - DS_GWS_SEMA_RELEASE_ALL = 24, - DS_GWS_INIT, - DS_GWS_SEMA_V, - DS_GWS_SEMA_BR, - DS_GWS_SEMA_P, - DS_GWS_BARRIER, - DS_WRITE_B8, - DS_WRITE_B16, - DS_ADD_RTN_U32, - DS_SUB_RTN_U32, - DS_RSUB_RTN_U32, - DS_INC_RTN_U32, - DS_DEC_RTN_U32, - DS_MIN_RTN_I32, - DS_MAX_RTN_I32, - DS_MIN_RTN_U32, - DS_MAX_RTN_U32, - DS_AND_RTN_B32, - DS_OR_RTN_B32, - DS_XOR_RTN_B32, - DS_MSKOR_RTN_B32, - DS_WRXCHG_RTN_B32, - DS_WRXCHG2_RTN_B32, - DS_WRXCHG2ST64_RTN_B32, - DS_CMPST_RTN_B32, - DS_CMPST_RTN_F32, - DS_MIN_RTN_F32, - DS_MAX_RTN_F32, - DS_WRAP_RTN_B32, - DS_SWIZZLE_B32, - DS_READ_B32, - DS_READ2_B32, - DS_READ2ST64_B32, - DS_READ_I8, - DS_READ_U8, - DS_READ_I16, - DS_READ_U16, - DS_CONSUME, - DS_APPEND, - DS_ORDERED_COUNT, - DS_ADD_U64, - DS_SUB_U64, - DS_RSUB_U64, - DS_INC_U64, - DS_DEC_U64, - DS_MIN_I64, - DS_MAX_I64, - DS_MIN_U64, - DS_MAX_U64, - DS_AND_B64, - DS_OR_B64, - DS_XOR_B64, - DS_MSKOR_B64, - DS_WRITE_B64, - DS_WRITE2_B64, - DS_WRITE2ST64_B64, - DS_CMPST_B64, - DS_CMPST_F64, - DS_MIN_F64, - DS_MAX_F64, - DS_ADD_RTN_U64 = 96, - DS_SUB_RTN_U64, - DS_RSUB_RTN_U64, - DS_INC_RTN_U64, - DS_DEC_RTN_U64, - DS_MIN_RTN_I64, - DS_MAX_RTN_I64, - DS_MIN_RTN_U64, - DS_MAX_RTN_U64, - DS_AND_RTN_B64, - DS_OR_RTN_B64, - DS_XOR_RTN_B64, - DS_MSKOR_RTN_B64, - DS_WRXCHG_RTN_B64, - DS_WRXCHG2_RTN_B64, - DS_WRXCHG2ST64_RTN_B64, - DS_CMPST_RTN_B64, - DS_CMPST_RTN_F64, - DS_MIN_RTN_F64, - DS_MAX_RTN_F64, - DS_READ_B64 = 118, - DS_READ2_B64, - DS_READ2ST64_B64, - DS_CONDXCHG32_RTN_B64 = 126, - DS_ADD_SRC2_U32 = 128, - DS_SUB_SRC2_U32, - DS_RSUB_SRC2_U32, - DS_INC_SRC2_U32, - DS_DEC_SRC2_U32, - DS_MIN_SRC2_I32, - DS_MAX_SRC2_I32, - DS_MIN_SRC2_U32, - DS_MAX_SRC2_U32, - DS_AND_SRC2_B32, - DS_OR_SRC2_B32, - DS_XOR_SRC2_B32, - DS_WRITE_SRC2_B32, - DS_MIN_SRC2_F32 = 146, - DS_MAX_SRC2_F32, - DS_ADD_SRC2_U64 = 192, - DS_SUB_SRC2_U64, - DS_RSUB_SRC2_U64, - DS_INC_SRC2_U64, - DS_DEC_SRC2_U64, - DS_MIN_SRC2_I64, - DS_MAX_SRC2_I64, - DS_MIN_SRC2_U64, - DS_MAX_SRC2_U64, - DS_AND_SRC2_B64, - DS_OR_SRC2_B64, - DS_XOR_SRC2_B64, - DS_WRITE_SRC2_B64, - DS_MIN_SRC2_F64 = 210, - DS_MAX_SRC2_F64, - DS_WRITE_B96 = 222, - DS_WRITE_B128, - DS_CONDXCHG32_RTN_B128 = 253, - DS_READ_B96, - DS_READ_B128, - }; - - static constexpr int kMinInstSize = 2; - static constexpr auto offset0Mask = genMask(0, 8); - static constexpr auto offset1Mask = genMask(getMaskEnd(offset0Mask), 8); - static constexpr auto gdsMask = genMask(getMaskEnd(offset1Mask) + 1, 1); - static constexpr auto opMask = genMask(getMaskEnd(gdsMask), 8); - - static constexpr auto addrMask = genMask(0, 8); - static constexpr auto data0Mask = genMask(getMaskEnd(addrMask), 8); - static constexpr auto data1Mask = genMask(getMaskEnd(data0Mask), 8); - static constexpr auto vdstMask = genMask(getMaskEnd(data1Mask), 8); - - const std::uint32_t *inst; - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - - Ds(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Vintrp { - enum class Op { V_INTERP_P1_F32, V_INTERP_P2_F32, V_INTERP_MOV_F32 }; - - static constexpr int kMinInstSize = 1; - static constexpr auto vsrcMask = genMask(0, 8); - static constexpr auto attrChanMask = genMask(getMaskEnd(vsrcMask), 2); - static constexpr auto attrMask = genMask(getMaskEnd(attrChanMask), 6); - static constexpr auto opMask = genMask(getMaskEnd(attrMask), 2); - static constexpr auto vdstMask = genMask(getMaskEnd(opMask), 8); - - const std::uint32_t *inst; - uint32_t vsrc = fetchMaskedValue(inst[0], vsrcMask); - uint32_t attrChan = fetchMaskedValue(inst[0], attrChanMask); - uint32_t attr = fetchMaskedValue(inst[0], attrMask); - const Op op = static_cast(fetchMaskedValue(inst[0], opMask)); - uint32_t vdst = fetchMaskedValue(inst[0], vdstMask); - - Vintrp(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -struct Exp { - static constexpr int kMinInstSize = 2; - - static constexpr auto enMask = genMask(0, 4); - static constexpr auto targetMask = genMask(getMaskEnd(enMask), 6); - static constexpr auto comprMask = genMask(getMaskEnd(targetMask), 1); - static constexpr auto doneMask = genMask(getMaskEnd(comprMask), 1); - static constexpr auto vmMask = genMask(getMaskEnd(doneMask), 1); - - static constexpr auto vsrc0Mask = genMask(0, 8); - static constexpr auto vsrc1Mask = genMask(getMaskEnd(vsrc0Mask), 8); - static constexpr auto vsrc2Mask = genMask(getMaskEnd(vsrc1Mask), 8); - static constexpr auto vsrc3Mask = genMask(getMaskEnd(vsrc2Mask), 8); - - const std::uint32_t *inst; - - std::uint8_t en = fetchMaskedValue(inst[0], enMask); - std::uint8_t target = fetchMaskedValue(inst[0], targetMask); - bool compr = fetchMaskedValue(inst[0], comprMask); - bool done = fetchMaskedValue(inst[0], doneMask); - bool vm = fetchMaskedValue(inst[0], vmMask); - std::uint8_t vsrc0 = fetchMaskedValue(inst[1], vsrc0Mask); - std::uint8_t vsrc1 = fetchMaskedValue(inst[1], vsrc1Mask); - std::uint8_t vsrc2 = fetchMaskedValue(inst[1], vsrc2Mask); - std::uint8_t vsrc3 = fetchMaskedValue(inst[1], vsrc3Mask); - - Exp(const std::uint32_t *inst) : inst(inst) {} - - int size() const { return kMinInstSize; } - - void dump() const; -}; - -enum class InstructionClass : std::uint8_t { - Invalid, - Vop2, - Sop2, - Sopk, - Smrd, - Vop3, - Mubuf, - Mtbuf, - Mimg, - Ds, - Vintrp, - Exp, - Vop1, - Vopc, - Sop1, - Sopc, - Sopp, -}; - -static constexpr std::uint32_t kInstMask1 = - static_cast(~0u << (32 - 1)); -static constexpr std::uint32_t kInstMask2 = - static_cast(~0u << (32 - 2)); -static constexpr std::uint32_t kInstMask4 = - static_cast(~0u << (32 - 4)); -static constexpr std::uint32_t kInstMask5 = - static_cast(~0u << (32 - 5)); -static constexpr std::uint32_t kInstMask6 = - static_cast(~0u << (32 - 6)); -static constexpr std::uint32_t kInstMask7 = - static_cast(~0u << (32 - 7)); -static constexpr std::uint32_t kInstMask9 = - static_cast(~0u << (32 - 9)); - -static constexpr std::uint32_t kInstMaskValVop2 = 0b0u << (32 - 1); -static constexpr std::uint32_t kInstMaskValSop2 = 0b10u << (32 - 2); -static constexpr std::uint32_t kInstMaskValSopk = 0b1011u << (32 - 4); -static constexpr std::uint32_t kInstMaskValSmrd = 0b11000u << (32 - 5); -static constexpr std::uint32_t kInstMaskValVop3 = 0b110100u << (32 - 6); -static constexpr std::uint32_t kInstMaskValMubuf = 0b111000u << (32 - 6); -static constexpr std::uint32_t kInstMaskValMtbuf = 0b111010u << (32 - 6); -static constexpr std::uint32_t kInstMaskValMimg = 0b111100u << (32 - 6); -static constexpr std::uint32_t kInstMaskValDs = 0b110110u << (32 - 6); -static constexpr std::uint32_t kInstMaskValVintrp = 0b110010u << (32 - 6); -static constexpr std::uint32_t kInstMaskValExp = 0b111110u << (32 - 6); -static constexpr std::uint32_t kInstMaskValVop1 = 0b0111111u << (32 - 7); -static constexpr std::uint32_t kInstMaskValVopC = 0b0111110u << (32 - 7); -static constexpr std::uint32_t kInstMaskValSop1 = 0b101111101u << (32 - 9); -static constexpr std::uint32_t kInstMaskValSopc = 0b101111110u << (32 - 9); -static constexpr std::uint32_t kInstMaskValSopp = 0b101111111u << (32 - 9); - -inline InstructionClass getInstructionClass(std::uint32_t instr) { - switch (instr & kInstMask9) { - case kInstMaskValSop1: - return InstructionClass::Sop1; - case kInstMaskValSopc: - return InstructionClass::Sopc; - case kInstMaskValSopp: - return InstructionClass::Sopp; - } - - switch (instr & kInstMask7) { - case kInstMaskValVop1: - return InstructionClass::Vop1; - case kInstMaskValVopC: - return InstructionClass::Vopc; - } - - switch (instr & kInstMask6) { - case kInstMaskValVop3: - return InstructionClass::Vop3; - case kInstMaskValMubuf: - return InstructionClass::Mubuf; - case kInstMaskValMtbuf: - return InstructionClass::Mtbuf; - case kInstMaskValMimg: - return InstructionClass::Mimg; - case kInstMaskValDs: - return InstructionClass::Ds; - case kInstMaskValVintrp: - return InstructionClass::Vintrp; - case kInstMaskValExp: - return InstructionClass::Exp; - } - - if ((instr & kInstMask5) == kInstMaskValSmrd) { - return InstructionClass::Smrd; - } - - if ((instr & kInstMask4) == kInstMaskValSopk) { - return InstructionClass::Sopk; - } - - if ((instr & kInstMask2) == kInstMaskValSop2) { - return InstructionClass::Sop2; - } - - if ((instr & kInstMask1) == kInstMaskValVop2) { - return InstructionClass::Vop2; - } - - return InstructionClass::Invalid; -} - -struct Instruction { - const std::uint32_t *inst; - InstructionClass instClass = getInstructionClass(*inst); - - Instruction(const std::uint32_t *inst) : inst(inst) {} - - int size() const { - switch (instClass) { - case InstructionClass::Vop2: - return Vop2(inst).size(); - case InstructionClass::Sop2: - return Sop2(inst).size(); - case InstructionClass::Sopk: - return Sopk(inst).size(); - case InstructionClass::Smrd: - return Smrd(inst).size(); - case InstructionClass::Vop3: - return Vop3(inst).size(); - case InstructionClass::Mubuf: - return Mubuf(inst).size(); - case InstructionClass::Mtbuf: - return Mtbuf(inst).size(); - case InstructionClass::Mimg: - return Mimg(inst).size(); - case InstructionClass::Ds: - return Ds(inst).size(); - case InstructionClass::Vintrp: - return Vintrp(inst).size(); - case InstructionClass::Exp: - return Exp(inst).size(); - case InstructionClass::Vop1: - return Vop1(inst).size(); - case InstructionClass::Vopc: - return Vopc(inst).size(); - case InstructionClass::Sop1: - return Sop1(inst).size(); - case InstructionClass::Sopc: - return Sopc(inst).size(); - case InstructionClass::Sopp: - return Sopp(inst).size(); - - case InstructionClass::Invalid: - break; - } - - return 1; - } - - void dump() const; -}; - -const char *instructionClassToString(InstructionClass instrClass); -const char *opcodeToString(InstructionClass instClass, int op); - -const char *sop1OpcodeToString(Sop1::Op op); -const char *sop2OpcodeToString(Sop2::Op op); -const char *sopkOpcodeToString(Sopk::Op op); -const char *sopcOpcodeToString(Sopc::Op op); -const char *soppOpcodeToString(Sopp::Op op); -const char *vop2OpcodeToString(Vop2::Op op); -const char *vop1OpcodeToString(Vop1::Op op); -const char *vopcOpcodeToString(Vopc::Op op); -const char *vop3OpcodeToString(Vop3::Op op); -const char *smrdOpcodeToString(Smrd::Op op); -const char *mubufOpcodeToString(Mubuf::Op op); -const char *mtbufOpcodeToString(Mtbuf::Op op); -const char *mimgOpcodeToString(Mimg::Op op); -const char *dsOpcodeToString(Ds::Op op); -const char *vintrpOpcodeToString(Vintrp::Op op); -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp b/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp deleted file mode 100644 index d4fd130..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/RegisterId.hpp +++ /dev/null @@ -1,102 +0,0 @@ -#pragma once - -#include - -namespace amdgpu::shader { -class RegisterId { - static constexpr std::uint32_t kScalarOperandsOffset = 0; - static constexpr std::uint32_t kScalarOperandsCount = 256; - static constexpr std::uint32_t kVectorOperandsOffset = - kScalarOperandsOffset + kScalarOperandsCount; - static constexpr std::uint32_t kVectorOperandsCount = 512; - static constexpr std::uint32_t kExportOperandsOffset = - kVectorOperandsOffset + kVectorOperandsCount; - static constexpr std::uint32_t kExportOperandsCount = 64; - static constexpr std::uint32_t kAttrOperandsOffset = - kExportOperandsOffset + kExportOperandsCount; - static constexpr std::uint32_t kAttrOperandsCount = 32; - static constexpr std::uint32_t kOperandsCount = - kAttrOperandsOffset + kAttrOperandsCount; - - static constexpr std::uint32_t kRegisterVccLoId = kScalarOperandsOffset + 106; - static constexpr std::uint32_t kRegisterVccHiId = kScalarOperandsOffset + 107; - static constexpr std::uint32_t kRegisterM0Id = kScalarOperandsOffset + 124; - static constexpr std::uint32_t kRegisterExecLoId = - kScalarOperandsOffset + 126; - static constexpr std::uint32_t kRegisterExecHiId = - kScalarOperandsOffset + 127; - static constexpr std::uint32_t kRegisterSccId = kScalarOperandsOffset + 253; - static constexpr std::uint32_t kRegisterLdsDirect = - kScalarOperandsOffset + 254; - -public: - enum enum_type : std::uint32_t { - Invalid = ~static_cast(0), - - VccLo = kRegisterVccLoId, - VccHi = kRegisterVccHiId, - M0 = kRegisterM0Id, - ExecLo = kRegisterExecLoId, - ExecHi = kRegisterExecHiId, - Scc = kRegisterSccId, - LdsDirect = kRegisterLdsDirect, - } raw = Invalid; - - RegisterId(enum_type value) : raw(value) {} - - operator enum_type() const { return raw; } - - static RegisterId Raw(std::uint32_t index) { - return static_cast(index); - } - static RegisterId Scalar(std::uint32_t index) { - return static_cast(index + kScalarOperandsOffset); - } - static RegisterId Vector(std::uint32_t index) { - return static_cast(index + kVectorOperandsOffset); - } - static RegisterId Export(std::uint32_t index) { - return static_cast(index + kExportOperandsOffset); - } - static RegisterId Attr(std::uint32_t index) { - return static_cast(index + kAttrOperandsOffset); - } - - bool isScalar() const { - return raw >= kScalarOperandsOffset && - raw < kScalarOperandsOffset + kScalarOperandsCount; - } - bool isVector() const { - return raw >= kVectorOperandsOffset && - raw < kVectorOperandsOffset + kVectorOperandsCount; - } - bool isExport() const { - return raw >= kExportOperandsOffset && - raw < kExportOperandsOffset + kExportOperandsCount; - } - bool isAttr() const { - return raw >= kAttrOperandsOffset && - raw < kAttrOperandsOffset + kAttrOperandsCount; - } - - unsigned getOffset() const { - if (isScalar()) { - return raw - kScalarOperandsOffset; - } - - if (isVector()) { - return raw - kVectorOperandsOffset; - } - - if (isExport()) { - return raw - kExportOperandsOffset; - } - - if (isAttr()) { - return raw - kAttrOperandsOffset; - } - - return raw; - } -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp b/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp deleted file mode 100644 index c0f11f7..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/RegisterState.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once -#include "RegisterId.hpp" -#include "Value.hpp" -#include - -namespace amdgpu::shader { -struct RegisterState { - std::uint64_t pc; - - Value sgprs[104]; - Value vccLo; - Value vccHi; - Value m0; - Value execLo; - Value execHi; - Value scc; - Value ldsDirect; - Value vgprs[512]; - Value attrs[32]; - - Value getRegister(RegisterId regId); - void setRegister(RegisterId regId, Value value); - -private: - Value getRegisterImpl(RegisterId regId); -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp deleted file mode 100644 index 7b247ed..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/Stage.hpp +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -namespace amdgpu::shader { -enum class Stage : unsigned char { None, Vertex, Fragment, Geometry, Compute }; -} diff --git a/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp b/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp deleted file mode 100644 index 84c2471..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/TypeId.hpp +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include - -namespace amdgpu::shader { -struct TypeId { - enum { - Bool, - SInt8, - UInt8, - SInt16, - UInt16, - SInt32, - UInt32, - UInt32x2, - UInt32x3, - UInt32x4, - UInt64, - SInt64, - ArrayUInt32x8, - ArrayUInt32x16, - Float16, - Float32, - Float32x2, - Float32x3, - Float32x4, - Float64, - ArrayFloat32x8, - ArrayFloat32x16, - Sampler, - Image2D, - StorageImage2D, - SampledImage2D, - - Void // should be last - } raw = Void; - - using enum_type = decltype(raw); - - TypeId() = default; - TypeId(enum_type value) : raw(value) {} - operator enum_type() const { return raw; } - - TypeId getBaseType() const; - std::size_t getSize() const; - std::size_t getElementsCount() const; - - bool isSignedInt() const { - return raw == TypeId::SInt8 || raw == TypeId::SInt16 || - raw == TypeId::SInt32 || raw == TypeId::SInt64; - } - - bool isFloatPoint() const { - return raw == TypeId::Float16 || raw == TypeId::Float32 || - raw == TypeId::Float64; - } -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp deleted file mode 100644 index ab88789..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/Uniform.hpp +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include "AccessOp.hpp" -#include "TypeId.hpp" -#include "spirv/spirv-builder.hpp" - -#include -#include - -namespace amdgpu::shader { -struct UniformInfo { - std::uint32_t buffer[8]; - int index; - TypeId typeId; - spirv::PointerType type; - spirv::VariableValue variable; - AccessOp accessOp = AccessOp::None; - bool isBuffer; -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp b/hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp deleted file mode 100644 index f90635b..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/UniformBindings.hpp +++ /dev/null @@ -1,72 +0,0 @@ -#pragma once - -#include "Stage.hpp" -#include "util/unreachable.hpp" - -namespace amdgpu::shader { -struct UniformBindings { - static constexpr auto kBufferSlots = 16; - static constexpr auto kImageSlots = 16; - static constexpr auto kSamplerSlots = 16; - static constexpr auto kStorageImageSlots = 16; - - static constexpr auto kBufferOffset = 0; - static constexpr auto kImageOffset = kBufferOffset + kBufferSlots; - static constexpr auto kSamplerOffset = kImageOffset + kImageSlots; - static constexpr auto kStorageImageOffset = kSamplerOffset + kSamplerSlots; - - static constexpr auto kStageSize = kStorageImageOffset + kStorageImageSlots; - - static constexpr auto kVertexOffset = 0; - static constexpr auto kFragmentOffset = kStageSize; - - static unsigned getBufferBinding(Stage stage, unsigned index) { - if (index >= kBufferSlots) { - util::unreachable(); - } - - return index + getStageOffset(stage) + kBufferOffset; - } - - static unsigned getImageBinding(Stage stage, unsigned index) { - if (index >= kImageSlots) { - util::unreachable(); - } - - return index + getStageOffset(stage) + kImageOffset; - } - - static unsigned getStorageImageBinding(Stage stage, unsigned index) { - if (index >= kStorageImageSlots) { - util::unreachable(); - } - - return index + getStageOffset(stage) + kStorageImageOffset; - } - - static unsigned getSamplerBinding(Stage stage, unsigned index) { - if (index >= kSamplerSlots) { - util::unreachable(); - } - - return index + getStageOffset(stage) + kSamplerOffset; - } - -private: - static unsigned getStageOffset(Stage stage) { - switch (stage) { - case Stage::Fragment: - return kFragmentOffset; - - case Stage::Vertex: - return kVertexOffset; - - case Stage::Compute: - return kVertexOffset; - - default: - util::unreachable(); - } - } -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp deleted file mode 100644 index b98d93d..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/Value.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once -#include - -namespace amdgpu::shader { -struct Value { - spirv::Type type; - spirv::Value value; - - Value() = default; - Value(spirv::Type type, spirv::Value value) : type(type), value(value) {} - - explicit operator bool() const { return static_cast(value); } - bool operator==(Value other) const { return value == other.value; } -}; -} // namespace amdgpu::shader diff --git a/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp b/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp deleted file mode 100644 index e393622..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/cf.hpp +++ /dev/null @@ -1,149 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace cf { -enum class TerminatorKind { - None, - Branch, - BranchToUnknown, - Return, -}; - -class BasicBlock { - std::uint64_t address; - std::uint64_t size = 0; - - std::set predecessors; - BasicBlock *successors[2]{}; - TerminatorKind terminator = TerminatorKind::None; - -public: - explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0) - : address(address), size(size) {} - - BasicBlock(const BasicBlock &) = delete; - - void setSize(std::uint64_t newSize) { size = newSize; } - std::uint64_t getSize() const { return size; } - std::uint64_t getAddress() const { return address; } - TerminatorKind getTerminator() const { return terminator; } - - void createConditionalBranch(BasicBlock *ifTrue, BasicBlock *ifFalse); - void createBranch(BasicBlock *target); - void createBranchToUnknown(); - void createReturn(); - - void replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB); - void replacePredecessor(BasicBlock *origBB, BasicBlock *newBB) { - origBB->replaceSuccessor(this, newBB); - } - - template T> void walk(T &&cb) { - std::vector workStack; - std::set processed; - - workStack.push_back(this); - processed.insert(this); - - while (!workStack.empty()) { - auto block = workStack.back(); - workStack.pop_back(); - - block->walkSuccessors([&](BasicBlock *successor) { - if (processed.insert(successor).second) { - workStack.push_back(successor); - } - }); - - cb(*block); - } - } - - template T> void walkSuccessors(T &&cb) const { - if (successors[0]) { - cb(successors[0]); - - if (successors[1]) { - cb(successors[1]); - } - } - } - - template T> - void walkPredecessors(T &&cb) const { - for (auto pred : predecessors) { - cb(pred); - } - } - - std::size_t getPredecessorsCount() const { return predecessors.size(); } - - bool hasDirectPredecessor(const BasicBlock &block) const; - bool hasPredecessor(const BasicBlock &block) const; - - std::size_t getSuccessorsCount() const { - if (successors[0] == nullptr) { - return 0; - } - - return successors[1] != nullptr ? 2 : 1; - } - - BasicBlock *getSuccessor(std::size_t index) const { - return successors[index]; - } - - void split(BasicBlock *target); -}; - -class Context { - std::map> basicBlocks; - -public: - BasicBlock *getBasicBlockAt(std::uint64_t address) { - if (auto it = basicBlocks.find(address); it != basicBlocks.end()) { - return &it->second; - } - - return nullptr; - } - - BasicBlock *getBasicBlock(std::uint64_t address) { - if (auto it = basicBlocks.lower_bound(address); it != basicBlocks.end()) { - auto bb = &it->second; - - if (bb->getAddress() <= address && - bb->getAddress() + bb->getSize() > address) { - return bb; - } - } - - return nullptr; - } - - BasicBlock *getOrCreateBasicBlock(std::uint64_t address, bool split = true) { - auto it = basicBlocks.lower_bound(address); - - if (it != basicBlocks.end()) { - auto bb = &it->second; - - if (bb->getAddress() <= address && - bb->getAddress() + bb->getSize() > address) { - if (split && bb->getAddress() != address) { - auto result = &basicBlocks.emplace_hint(it, address, address)->second; - bb->split(result); - return result; - } - - return bb; - } - } - - return &basicBlocks.emplace_hint(it, address, address)->second; - } -}; -} // namespace cf diff --git a/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp b/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp deleted file mode 100644 index c9755bc..0000000 --- a/hw/amdgpu/shader/include/amdgpu/shader/scf.hpp +++ /dev/null @@ -1,344 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -namespace cf { -class BasicBlock; -} - -namespace scf { -class BasicBlock; -struct PrintOptions { - unsigned char identCount = 2; - char identChar = ' '; - std::function - blockPrinter; - - std::string makeIdent(unsigned depth) const { - return std::string(depth * identCount, identChar); - } -}; - -class Node { - Node *mParent = nullptr; - Node *mNext = nullptr; - Node *mPrev = nullptr; - -public: - virtual ~Node() = default; - virtual void print(const PrintOptions &options, unsigned depth) = 0; - virtual bool isEqual(const Node &other) const { return this == &other; } - - void dump() { print({}, 0); } - - void setParent(Node *parent) { mParent = parent; } - - Node *getParent() const { return mParent; } - - template - requires(std::is_base_of_v) - auto getParent() const -> decltype(dynCast(mParent)) { - return dynCast(mParent); - } - - Node *getNext() const { return mNext; } - - Node *getPrev() const { return mPrev; } - - friend class Block; -}; - -template - requires(std::is_base_of_v && std::is_base_of_v) && - requires(ST *s) { dynamic_cast(s); } -T *dynCast(ST *s) { - return dynamic_cast(s); -} - -template - requires(std::is_base_of_v && std::is_base_of_v) && - requires(const ST *s) { dynamic_cast(s); } -const T *dynCast(const ST *s) { - return dynamic_cast(s); -} - -inline bool isNodeEqual(const Node *lhs, const Node *rhs) { - if (lhs == rhs) { - return true; - } - - return lhs != nullptr && rhs != nullptr && lhs->isEqual(*rhs); -} - -struct UnknownBlock final : Node { - void print(const PrintOptions &options, unsigned depth) override { - std::printf("%sunknown\n", options.makeIdent(depth).c_str()); - } - - bool isEqual(const Node &other) const override { - return this == &other || dynCast(&other) != nullptr; - } -}; - -struct Return final : Node { - void print(const PrintOptions &options, unsigned depth) override { - std::printf("%sreturn\n", options.makeIdent(depth).c_str()); - } - - bool isEqual(const Node &other) const override { - return this == &other || dynCast(&other) != nullptr; - } -}; - -class Context; - -class Block final : public Node { - Node *mBegin = nullptr; - Node *mEnd = nullptr; - - void *mUserData = nullptr; - -public: - void print(const PrintOptions &options, unsigned depth) override { - std::printf("%s{\n", options.makeIdent(depth).c_str()); - - for (auto node = mBegin; node != nullptr; node = node->getNext()) { - node->print(options, depth + 1); - } - std::printf("%s}\n", options.makeIdent(depth).c_str()); - } - - bool isEmpty() const { return mBegin == nullptr; } - - Node *getRootNode() const { return mBegin; } - Node *getLastNode() const { return mEnd; } - - void setUserData(void *data) { mUserData = data; } - void *getUserData() const { return mUserData; } - template T *getUserData() const { - return static_cast(mUserData); - } - - void eraseFrom(Node *endBefore); - void splitInto(Block *target, Node *splitPoint); - Block *split(Context &context, Node *splitPoint); - - void append(Node *node) { - assert(node->mParent == nullptr); - assert(node->mPrev == nullptr); - assert(node->mNext == nullptr); - - node->mParent = this; - node->mPrev = mEnd; - - if (mEnd != nullptr) { - mEnd->mNext = node; - } - - if (mBegin == nullptr) { - mBegin = node; - } - - mEnd = node; - } - - void detachNode(Node *node) { - if (node->mPrev != nullptr) { - node->mPrev->mNext = node->mNext; - } - - if (node->mNext != nullptr) { - node->mNext->mPrev = node->mPrev; - } - - if (mBegin == node) { - mBegin = node->mNext; - } - - if (mEnd == node) { - mEnd = node->mPrev; - } - - node->mNext = nullptr; - node->mPrev = nullptr; - node->mParent = nullptr; - } - - bool isEqual(const Node &other) const override { - if (this == &other) { - return true; - } - - auto otherBlock = dynCast(&other); - - if (otherBlock == nullptr) { - return false; - } - - auto thisIt = mBegin; - auto otherIt = otherBlock->mBegin; - - while (thisIt != nullptr && otherIt != nullptr) { - if (!thisIt->isEqual(*otherIt)) { - return false; - } - - thisIt = thisIt->mNext; - otherIt = otherIt->mNext; - } - - return thisIt == otherIt; - } -}; - -class BasicBlock final : public Node { - std::uint64_t address; - std::uint64_t size = 0; - -public: - explicit BasicBlock(std::uint64_t address, std::uint64_t size = 0) - : address(address), size(size) {} - - std::uint64_t getSize() const { return size; } - std::uint64_t getAddress() const { return address; } - - void print(const PrintOptions &options, unsigned depth) override { - std::printf( - "%sbb%lx\n", - std::string(depth * options.identCount, options.identChar).c_str(), - getAddress()); - if (depth != 0 && options.blockPrinter) { - options.blockPrinter(options, depth + 1, this); - } - } - - Block *getBlock() const { return dynCast(getParent()); } - - bool isEqual(const Node &other) const override { - if (this == &other) { - return true; - } - - if (auto otherBlock = dynCast(&other)) { - return address == otherBlock->address; - } - - return false; - } -}; - -struct IfElse final : Node { - Block *ifTrue; - Block *ifFalse; - - IfElse(Block *ifTrue, Block *ifFalse) : ifTrue(ifTrue), ifFalse(ifFalse) { - ifTrue->setParent(this); - ifFalse->setParent(this); - } - - void print(const PrintOptions &options, unsigned depth) override { - if (ifTrue->isEmpty()) { - std::printf("%sif false\n", options.makeIdent(depth).c_str()); - ifFalse->print(options, depth); - return; - } - - std::printf("%sif true\n", options.makeIdent(depth).c_str()); - ifTrue->print(options, depth); - if (!ifFalse->isEmpty()) { - std::printf("%selse\n", options.makeIdent(depth).c_str()); - ifFalse->print(options, depth); - } - } - - bool isEqual(const Node &other) const override { - if (this == &other) { - return true; - } - - if (auto otherBlock = dynCast(&other)) { - return ifTrue->isEqual(*otherBlock->ifTrue) && - ifFalse->isEqual(*otherBlock->ifFalse); - } - - return false; - } -}; - -struct Jump final : Node { - BasicBlock *target; - - Jump(BasicBlock *target) : target(target) {} - - bool isEqual(const Node &other) const override { - if (this == &other) { - return true; - } - - if (auto otherJump = dynCast(&other)) { - return target == otherJump->target; - } - - return false; - } - - void print(const PrintOptions &options, unsigned depth) override { - std::printf("%sjump ", options.makeIdent(depth).c_str()); - target->print(options, 0); - } -}; - -struct Loop final : Node { - Block *body; - - Loop(Block *body) : body(body) { body->setParent(this); } - - bool isEqual(const Node &other) const override { - if (this == &other) { - return true; - } - - if (auto otherLoop = dynCast(&other)) { - return body->isEqual(*otherLoop->body); - } - - return false; - } - - void print(const PrintOptions &options, unsigned depth) override { - std::printf("%sloop {\n", options.makeIdent(depth).c_str()); - body->print(options, depth + 1); - std::printf("%s}\n", options.makeIdent(depth).c_str()); - } -}; - -struct Break final : Node { - bool isEqual(const Node &other) const override { - return this == &other || dynCast(&other) != nullptr; - } - - void print(const PrintOptions &options, unsigned depth) override { - std::printf("%sbreak\n", options.makeIdent(depth).c_str()); - } -}; - -class Context { - std::forward_list> mNodes; - -public: - template - requires(std::is_constructible_v) - T *create(ArgsT &&...args) { - auto result = new T(std::forward(args)...); - mNodes.push_front(std::unique_ptr{result}); - return result; - } -}; - -scf::Block *structurize(Context &ctxt, cf::BasicBlock *bb); -void makeUniqueBasicBlocks(Context &ctxt, Block *block); -} // namespace scf diff --git a/hw/amdgpu/shader/src/CfBuilder.cpp b/hw/amdgpu/shader/src/CfBuilder.cpp deleted file mode 100644 index 8bad811..0000000 --- a/hw/amdgpu/shader/src/CfBuilder.cpp +++ /dev/null @@ -1,178 +0,0 @@ -#include "CfBuilder.hpp" -#include "Instruction.hpp" -#include -#include -#include - -using namespace amdgpu; -using namespace amdgpu::shader; - -struct CfgBuilder { - cf::Context *context; - RemoteMemory memory; - - std::size_t analyzeBb(cf::BasicBlock *bb, std::uint64_t *successors, - std::size_t *successorsCount) { - auto address = bb->getAddress(); - auto instBegin = memory.getPointer(address); - auto instHex = instBegin; - - while (true) { - auto instruction = Instruction(instHex); - auto size = instruction.size(); - auto pc = address + ((instHex - instBegin) << 2); - instHex += size; - - if (instruction.instClass == InstructionClass::Sop1) { - Sop1 sop1{instHex - size}; - - if (sop1.op == Sop1::Op::S_SETPC_B64 || - sop1.op == Sop1::Op::S_SWAPPC_B64) { - bb->createBranchToUnknown(); - break; - } - - continue; - } - - if (instruction.instClass == InstructionClass::Sopp) { - Sopp sopp{instHex - size}; - - if (sopp.op == Sopp::Op::S_ENDPGM) { - bb->createReturn(); - break; - } - - bool isEnd = false; - switch (sopp.op) { - case Sopp::Op::S_BRANCH: - successors[0] = pc + ((size + sopp.simm) << 2); - *successorsCount = 1; - - isEnd = true; - break; - - case Sopp::Op::S_CBRANCH_SCC0: - case Sopp::Op::S_CBRANCH_SCC1: - case Sopp::Op::S_CBRANCH_VCCZ: - case Sopp::Op::S_CBRANCH_VCCNZ: - case Sopp::Op::S_CBRANCH_EXECZ: - case Sopp::Op::S_CBRANCH_EXECNZ: - successors[0] = pc + ((size + sopp.simm) << 2); - successors[1] = pc + (size << 2); - *successorsCount = 2; - isEnd = true; - break; - - default: - break; - } - - if (isEnd) { - break; - } - continue; - } - - // move instruction that requires EXEC test to separate bb - if (instruction.instClass == InstructionClass::Vop2 || - instruction.instClass == InstructionClass::Vop3 || - instruction.instClass == InstructionClass::Mubuf || - instruction.instClass == InstructionClass::Mtbuf || - instruction.instClass == InstructionClass::Mimg || - instruction.instClass == InstructionClass::Ds || - instruction.instClass == InstructionClass::Vintrp || - instruction.instClass == InstructionClass::Exp || - instruction.instClass == InstructionClass::Vop1 || - instruction.instClass == InstructionClass::Vopc || - instruction.instClass == InstructionClass::Smrd) { - *successorsCount = 1; - - if (instBegin != instHex - size) { - // if it is not first instruction in block, move end to prev - // instruction, successor is current instruction - instHex -= size; - successors[0] = pc; - break; - } - - successors[0] = pc + (size << 2); - break; - } - } - - return (instHex - instBegin) << 2; - } - - cf::BasicBlock *buildCfg(std::uint64_t entryPoint) { - std::vector workList; - workList.push_back(entryPoint); - std::unordered_set processed; - processed.insert(entryPoint); - - struct BranchInfo { - std::uint64_t source; - std::size_t count; - std::uint64_t targets[2]; - }; - - std::vector branches; - - while (!workList.empty()) { - auto address = workList.back(); - workList.pop_back(); - - auto bb = context->getOrCreateBasicBlock(address); - - if (bb->getSize() != 0) { - continue; - } - - std::uint64_t successors[2]; - std::size_t successorsCount = 0; - std::size_t size = analyzeBb(bb, successors, &successorsCount); - bb->setSize(size); - - if (successorsCount == 2) { - branches.push_back( - {address + size - 4, 2, {successors[0], successors[1]}}); - - if (processed.insert(successors[0]).second) { - workList.push_back(successors[0]); - } - if (processed.insert(successors[1]).second) { - workList.push_back(successors[1]); - } - } else if (successorsCount == 1) { - branches.push_back({address + size - 4, 1, {successors[0]}}); - - if (processed.insert(successors[0]).second) { - workList.push_back(successors[0]); - } - } - } - - for (auto branch : branches) { - auto bb = context->getBasicBlock(branch.source); - assert(bb); - if (branch.count == 2) { - bb->createConditionalBranch( - context->getBasicBlockAt(branch.targets[0]), - context->getBasicBlockAt(branch.targets[1])); - } else { - bb->createBranch(context->getBasicBlockAt(branch.targets[0])); - } - } - - return context->getBasicBlockAt(entryPoint); - } -}; - -cf::BasicBlock *amdgpu::shader::buildCf(cf::Context &ctxt, RemoteMemory memory, - std::uint64_t entryPoint) { - CfgBuilder builder; - builder.context = &ctxt; - builder.memory = memory; - - return builder.buildCfg(entryPoint); -} diff --git a/hw/amdgpu/shader/src/Converter.cpp b/hw/amdgpu/shader/src/Converter.cpp deleted file mode 100644 index 8175db8..0000000 --- a/hw/amdgpu/shader/src/Converter.cpp +++ /dev/null @@ -1,499 +0,0 @@ -#include "Converter.hpp" -#include "CfBuilder.hpp" -#include "ConverterContext.hpp" -#include "Fragment.hpp" -#include "Instruction.hpp" -#include "RegisterState.hpp" -#include "UniformBindings.hpp" -#include "amdgpu/RemoteMemory.hpp" -#include "cf.hpp" -#include "scf.hpp" -#include "util/unreachable.hpp" -#include -#include -#include -#include - -static void printInstructions(const scf::PrintOptions &options, unsigned depth, - std::uint32_t *instBegin, std::size_t size) { - auto instHex = instBegin; - auto instEnd = instBegin + size / sizeof(std::uint32_t); - - while (instHex < instEnd) { - auto instruction = amdgpu::shader::Instruction(instHex); - std::printf("%s", options.makeIdent(depth).c_str()); - instruction.dump(); - std::printf("\n"); - instHex += instruction.size(); - } -} - -namespace amdgpu::shader { -class Converter { - scf::Context *scfContext; - cf::Context cfContext; - RemoteMemory memory; - Function *function = nullptr; - std::forward_list states; - std::vector freeStates; - -public: - void convertFunction(RemoteMemory mem, scf::Context *scfCtxt, - scf::Block *block, Function *fn) { - scfContext = scfCtxt; - function = fn; - memory = mem; - - auto lastFragment = convertBlock(block, &function->entryFragment, nullptr); - - if (lastFragment != nullptr) { - lastFragment->builder.createBranch(fn->exitFragment.entryBlockId); - lastFragment->appendBranch(fn->exitFragment); - } - - initState(&fn->exitFragment); - } - -private: - RegisterState *allocateState() { - if (freeStates.empty()) { - return &states.emplace_front(); - } - - auto result = freeStates.back(); - freeStates.pop_back(); - *result = {}; - return result; - } - - void releaseState(RegisterState *state) { - assert(state != nullptr); - freeStates.push_back(state); - } - - void initState(Fragment *fragment, std::uint64_t address = 0) { - if (fragment->registers == nullptr) { - fragment->registers = allocateState(); - } - - if (address != 0) { - fragment->registers->pc = address; - } - - fragment->injectValuesFromPreds(); - fragment->predecessors.clear(); - } - - void releaseStateOf(Fragment *frag) { - releaseState(frag->registers); - frag->registers = nullptr; - frag->values = {}; - frag->outputs = {}; - } - - bool needInjectExecTest(Fragment *fragment) { - auto inst = memory.getPointer(fragment->registers->pc); - auto instClass = getInstructionClass(*inst); - return instClass == InstructionClass::Vop2 || - instClass == InstructionClass::Vop3 || - instClass == InstructionClass::Mubuf || - instClass == InstructionClass::Mtbuf || - instClass == InstructionClass::Mimg || - instClass == InstructionClass::Ds || - instClass == InstructionClass::Vintrp || - instClass == InstructionClass::Exp || - instClass == InstructionClass::Vop1 || - instClass == InstructionClass::Vopc /* || - instClass == InstructionClass::Smrd*/ - ; - } - - spirv::BoolValue createExecTest(Fragment *fragment) { - auto context = fragment->context; - auto &builder = fragment->builder; - auto boolT = context->getBoolType(); - auto uint32_0 = context->getUInt32(0); - auto loIsNotZero = - builder.createINotEqual(boolT, fragment->getExecLo().value, uint32_0); - auto hiIsNotZero = - builder.createINotEqual(boolT, fragment->getExecHi().value, uint32_0); - - return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero); - } - - Fragment *convertBlock(scf::Block *block, Fragment *rootFragment, - Fragment *loopMergeFragment) { - Fragment *currentFragment = nullptr; - - for (scf::Node *node = block->getRootNode(); node != nullptr; - node = node->getNext()) { - - if (auto bb = dynCast(node)) { - if (currentFragment == nullptr) { - currentFragment = rootFragment; - } else { - auto newFragment = function->createFragment(); - currentFragment->appendBranch(*newFragment); - currentFragment->builder.createBranch(newFragment->entryBlockId); - currentFragment = newFragment; - } - - initState(currentFragment, bb->getAddress()); - for (auto pred : currentFragment->predecessors) { - releaseStateOf(pred); - } - - if (needInjectExecTest(currentFragment)) { - auto bodyFragment = function->createFragment(); - auto mergeFragment = function->createFragment(); - - auto cond = createExecTest(currentFragment); - - currentFragment->appendBranch(*bodyFragment); - currentFragment->appendBranch(*mergeFragment); - currentFragment->builder.createSelectionMerge( - mergeFragment->entryBlockId, {}); - currentFragment->builder.createBranchConditional( - cond, bodyFragment->entryBlockId, mergeFragment->entryBlockId); - - initState(bodyFragment, bb->getAddress()); - bodyFragment->convert(bb->getSize()); - - bodyFragment->appendBranch(*mergeFragment); - bodyFragment->builder.createBranch(mergeFragment->entryBlockId); - - initState(mergeFragment); - releaseState(currentFragment->registers); - releaseState(bodyFragment->registers); - - currentFragment = mergeFragment; - } else { - currentFragment->convert(bb->getSize()); - } - continue; - } - - if (auto ifElse = dynCast(node)) { - auto isBreakBlock = [](scf::Block *block) { - if (block->isEmpty()) { - return false; - } - if (block->getLastNode() != block->getRootNode()) { - return false; - } - - return dynamic_cast(block->getRootNode()) != nullptr; - }; - - if (loopMergeFragment != nullptr && ifElse->ifTrue->isEmpty() && - isBreakBlock(ifElse->ifFalse)) { - auto mergeFragment = function->createFragment(); - currentFragment->appendBranch(*mergeFragment); - currentFragment->appendBranch(*loopMergeFragment); - - currentFragment->builder.createBranchConditional( - currentFragment->branchCondition, mergeFragment->entryBlockId, - loopMergeFragment->entryBlockId); - - initState(mergeFragment); - releaseStateOf(currentFragment); - currentFragment = mergeFragment; - continue; - } - - auto ifTrueFragment = function->createFragment(); - auto ifFalseFragment = function->createFragment(); - auto mergeFragment = function->createFragment(); - - currentFragment->appendBranch(*ifTrueFragment); - currentFragment->appendBranch(*ifFalseFragment); - - auto ifTrueLastBlock = - convertBlock(ifElse->ifTrue, ifTrueFragment, loopMergeFragment); - auto ifFalseLastBlock = - convertBlock(ifElse->ifFalse, ifFalseFragment, loopMergeFragment); - - if (ifTrueLastBlock != nullptr) { - if (!ifTrueLastBlock->hasTerminator) { - ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId); - ifTrueLastBlock->appendBranch(*mergeFragment); - } - - if (ifTrueLastBlock->registers == nullptr) { - initState(ifTrueLastBlock); - } - } - - if (ifFalseLastBlock != nullptr) { - if (!ifFalseLastBlock->hasTerminator) { - ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId); - ifFalseLastBlock->appendBranch(*mergeFragment); - } - - if (ifFalseLastBlock->registers == nullptr) { - initState(ifFalseLastBlock); - } - } - - currentFragment->builder.createSelectionMerge( - mergeFragment->entryBlockId, {}); - - currentFragment->builder.createBranchConditional( - currentFragment->branchCondition, ifTrueFragment->entryBlockId, - ifFalseFragment->entryBlockId); - - releaseStateOf(currentFragment); - initState(mergeFragment); - - if (ifTrueLastBlock != nullptr) { - releaseStateOf(ifTrueLastBlock); - } - - if (ifFalseLastBlock != nullptr) { - releaseStateOf(ifFalseLastBlock); - } - currentFragment = mergeFragment; - continue; - } - - if (auto loop = dynCast(node)) { - auto headerFragment = function->createFragment(); - auto bodyFragment = function->createFragment(); - auto mergeFragment = function->createDetachedFragment(); - auto continueFragment = function->createDetachedFragment(); - - currentFragment->builder.createBranch(headerFragment->entryBlockId); - currentFragment->appendBranch(*headerFragment); - - initState(headerFragment); - releaseStateOf(currentFragment); - - headerFragment->builder.createLoopMerge( - mergeFragment->entryBlockId, continueFragment->entryBlockId, - spv::LoopControlMask::MaskNone, {}); - - headerFragment->builder.createBranch(bodyFragment->entryBlockId); - headerFragment->appendBranch(*bodyFragment); - - auto bodyLastBlock = - convertBlock(loop->body, bodyFragment, mergeFragment); - - if (bodyLastBlock != nullptr) { - if (bodyLastBlock->registers == nullptr) { - initState(bodyLastBlock); - } - - bodyLastBlock->builder.createBranch(continueFragment->entryBlockId); - bodyLastBlock->appendBranch(*continueFragment); - } - - continueFragment->builder.createBranch(headerFragment->entryBlockId); - continueFragment->appendBranch(*headerFragment); - initState(continueFragment); - - releaseStateOf(headerFragment); - initState(mergeFragment); - - if (bodyLastBlock != nullptr) { - releaseStateOf(bodyLastBlock); - } - - function->appendFragment(continueFragment); - function->appendFragment(mergeFragment); - releaseStateOf(continueFragment); - - currentFragment = mergeFragment; - continue; - } - - if (dynCast(node)) { - auto jumpAddress = currentFragment->jumpAddress; - - std::printf("jump to %lx\n", jumpAddress); - std::fflush(stdout); - - if (jumpAddress == 0) { - util::unreachable("no jump register on unknown block"); - } - - auto block = buildCf(cfContext, memory, jumpAddress); - auto basicBlockPrinter = [this](const scf::PrintOptions &opts, - unsigned depth, scf::BasicBlock *bb) { - printInstructions(opts, depth, - memory.getPointer(bb->getAddress()), - bb->getSize()); - }; - auto scfBlock = scf::structurize(*scfContext, block); - scfBlock->print({.blockPrinter = basicBlockPrinter}, 0); - std::fflush(stdout); - - auto targetFragment = function->createFragment(); - currentFragment->builder.createBranch(targetFragment->entryBlockId); - currentFragment->appendBranch(*targetFragment); - auto result = convertBlock(scfBlock, targetFragment, nullptr); - - if (currentFragment->registers == nullptr) { - initState(targetFragment); - releaseStateOf(currentFragment); - } - - return result; - } - - if (dynCast(node)) { - currentFragment->appendBranch(function->exitFragment); - currentFragment->builder.createBranch( - function->exitFragment.entryBlockId); - currentFragment->hasTerminator = true; - return nullptr; - } - - node->dump(); - util::unreachable(); - } - - return currentFragment != nullptr ? currentFragment : rootFragment; - } -}; -}; // namespace amdgpu::shader - -amdgpu::shader::Shader -amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry, - std::span userSpgrs, - std::uint32_t dimX, std::uint32_t dimY, - std::uint32_t dimZ, - util::MemoryAreaTable<> &dependencies) { - ConverterContext ctxt(memory, stage, &dependencies); - auto &builder = ctxt.getBuilder(); - builder.createCapability(spv::Capability::Shader); - builder.createCapability(spv::Capability::ImageQuery); - builder.createCapability(spv::Capability::ImageBuffer); - builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess); - builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); - builder.createCapability(spv::Capability::Int64); - builder.createCapability(spv::Capability::StorageImageWriteWithoutFormat); - builder.createCapability(spv::Capability::StorageImageReadWithoutFormat); - builder.setMemoryModel(spv::AddressingModel::Logical, - spv::MemoryModel::GLSL450); - - scf::Context scfContext; - scf::Block *entryBlock = nullptr; - { - cf::Context cfContext; - auto entryBB = buildCf(cfContext, memory, entry); - entryBlock = scf::structurize(scfContext, entryBB); - } - - // std::printf("========== stage: %u, user sgprs: %zu\n", (unsigned)stage, - // userSpgrs.size()); - // std::printf("structurized CFG:\n"); - - // auto basicBlockPrinter = [memory](const scf::PrintOptions &opts, - // unsigned depth, scf::BasicBlock *bb) { - // printInstructions(opts, depth, - // memory.getPointer(bb->getAddress()), - // bb->getSize()); - // }; - - // entryBlock->print({.blockPrinter = basicBlockPrinter}, 0); - // std::printf("==========\n"); - - auto mainFunction = ctxt.createFunction(0); - mainFunction->userSgprs = userSpgrs; - mainFunction->stage = stage; - - Converter converter; - converter.convertFunction(memory, &scfContext, entryBlock, mainFunction); - - Shader result; - - std::fflush(stdout); - mainFunction->exitFragment.outputs.clear(); - - std::size_t samplerCount = 0; - std::size_t imageCount = 0; - std::size_t storageImageCount = 0; - std::size_t bufferCount = 0; - - for (auto &uniform : ctxt.getUniforms()) { - auto &newUniform = result.uniforms.emplace_back(); - - for (int i = 0; i < 8; ++i) { - newUniform.buffer[i] = uniform.buffer[i]; - } - - std::uint32_t descriptorSet = 0; - - switch (uniform.typeId) { - case TypeId::Sampler: - newUniform.kind = Shader::UniformKind::Sampler; - newUniform.binding = - UniformBindings::getSamplerBinding(stage, samplerCount++); - break; - case TypeId::StorageImage2D: - newUniform.kind = Shader::UniformKind::StorageImage; - newUniform.binding = - UniformBindings::getStorageImageBinding(stage, storageImageCount++); - break; - case TypeId::Image2D: - newUniform.kind = Shader::UniformKind::Image; - newUniform.binding = - UniformBindings::getImageBinding(stage, imageCount++); - break; - default: - newUniform.kind = Shader::UniformKind::Buffer; - newUniform.binding = - UniformBindings::getBufferBinding(stage, bufferCount++); - break; - } - - ctxt.getBuilder().createDecorate( - uniform.variable, spv::Decoration::DescriptorSet, {{descriptorSet}}); - ctxt.getBuilder().createDecorate(uniform.variable, spv::Decoration::Binding, - {{newUniform.binding}}); - - newUniform.accessOp = uniform.accessOp; - } - - mainFunction->insertReturn(); - - for (auto frag : mainFunction->fragments) { - mainFunction->builder.insertBlock(frag->builder); - } - - mainFunction->builder.insertBlock(mainFunction->exitFragment.builder); - - builder.insertFunction(mainFunction->builder, mainFunction->getResultType(), - spv::FunctionControlMask::MaskNone, - mainFunction->getFunctionType()); - - if (stage == Stage::Vertex) { - builder.createEntryPoint(spv::ExecutionModel::Vertex, - mainFunction->builder.id, "main", - ctxt.getInterfaces()); - } else if (stage == Stage::Fragment) { - builder.createEntryPoint(spv::ExecutionModel::Fragment, - mainFunction->builder.id, "main", - ctxt.getInterfaces()); - builder.createExecutionMode(mainFunction->builder.id, - spv::ExecutionMode::OriginUpperLeft, {}); - } else if (stage == Stage::Compute) { - builder.createEntryPoint(spv::ExecutionModel::GLCompute, - mainFunction->builder.id, "main", - ctxt.getInterfaces()); - builder.createExecutionMode(mainFunction->builder.id, - spv::ExecutionMode::LocalSize, - {{dimX, dimY, dimZ}}); - } - - // auto maxId = ctxt.getBuilder().getIdGenerator()->bounds; - // for (std::size_t i = 1; i < maxId; ++i) { - // spirv::Id id; - // id.id = i; - // if (builder.isIdDefined(id) && !builder.isIdUsed(id)) { - // std::printf("ssa variable %%%zu defined, but not used\n", i); - // } - // } - result.spirv = builder.build(SPV_VERSION, 0); - return result; -} diff --git a/hw/amdgpu/shader/src/ConverterContext.cpp b/hw/amdgpu/shader/src/ConverterContext.cpp deleted file mode 100644 index 0dc316a..0000000 --- a/hw/amdgpu/shader/src/ConverterContext.cpp +++ /dev/null @@ -1,572 +0,0 @@ -#include "ConverterContext.hpp" -#include "util/unreachable.hpp" -using namespace amdgpu::shader; - -std::optional ConverterContext::getTypeIdOf(spirv::Type type) const { - for (int i = 0; i < kGenericTypesCount; ++i) { - if (mTypes[i] == type) { - return static_cast(i); - } - } - - return std::nullopt; -} - -spirv::StructType -ConverterContext::findStructType(std::span members) { - for (auto &structType : mStructTypes) { - if (structType.match(members)) { - return structType.id; - } - } - - return {}; -} - -spirv::StructType -ConverterContext::getStructType(std::span members) { - for (auto &structType : mStructTypes) { - if (structType.match(members)) { - return structType.id; - } - } - - auto &newType = mStructTypes.emplace_back(); - newType.id = mBuilder.createTypeStruct(members); - newType.members.reserve(members.size()); - for (auto member : members) { - newType.members.push_back(member); - } - return newType.id; -} - -spirv::PointerType -ConverterContext::getStructPointerType(spv::StorageClass storageClass, - spirv::StructType structType) { - StructTypeEntry *entry = nullptr; - for (auto &type : mStructTypes) { - if (type.id != structType) { - continue; - } - - entry = &type; - break; - } - - if (entry == nullptr) { - util::unreachable("Struct type not found"); - } - - auto &ptrType = entry->ptrTypes[static_cast(storageClass)]; - - if (!ptrType) { - ptrType = mBuilder.createTypePointer(storageClass, structType); - } - - return ptrType; -} - -spirv::Type ConverterContext::getType(TypeId id) { - auto &type = mTypes[static_cast(id)]; - - if (type) { - return type; - } - - switch (id) { - case TypeId::Void: - return ((type = mBuilder.createTypeVoid())); - case TypeId::Bool: - return ((type = mBuilder.createTypeBool())); - case TypeId::SInt8: - return ((type = mBuilder.createTypeSInt(8))); - case TypeId::UInt8: - return ((type = mBuilder.createTypeUInt(8))); - case TypeId::SInt16: - return ((type = mBuilder.createTypeSInt(16))); - case TypeId::UInt16: - return ((type = mBuilder.createTypeUInt(16))); - case TypeId::SInt32: - return ((type = mBuilder.createTypeSInt(32))); - case TypeId::UInt32: - return ((type = mBuilder.createTypeUInt(32))); - case TypeId::UInt32x2: - return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 2))); - case TypeId::UInt32x3: - return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 3))); - case TypeId::UInt32x4: - return ((type = mBuilder.createTypeVector(getType(TypeId::UInt32), 4))); - case TypeId::UInt64: - return ((type = mBuilder.createTypeUInt(64))); - case TypeId::SInt64: - return ((type = mBuilder.createTypeSInt(64))); - case TypeId::ArrayUInt32x8: - type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(2)); - getBuilder().createDecorate(type, spv::Decoration::ArrayStride, - std::array{static_cast(16)}); - case TypeId::ArrayUInt32x16: - type = mBuilder.createTypeArray(getType(TypeId::UInt32x4), getUInt32(4)); - getBuilder().createDecorate(type, spv::Decoration::ArrayStride, - std::array{static_cast(16)}); - return type; - case TypeId::Float16: - return ((type = mBuilder.createTypeFloat(16))); - case TypeId::Float32: - return ((type = mBuilder.createTypeFloat(32))); - case TypeId::Float32x2: - return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 2))); - case TypeId::Float32x3: - return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 3))); - case TypeId::Float32x4: - return ((type = mBuilder.createTypeVector(getType(TypeId::Float32), 4))); - case TypeId::Float64: - return ((type = mBuilder.createTypeFloat(64))); - case TypeId::ArrayFloat32x8: - type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(2)); - getBuilder().createDecorate(type, spv::Decoration::ArrayStride, - std::array{static_cast(16)}); - return type; - case TypeId::ArrayFloat32x16: - type = mBuilder.createTypeArray(getType(TypeId::Float32x4), getUInt32(4)); - getBuilder().createDecorate(type, spv::Decoration::ArrayStride, - std::array{static_cast(16)}); - return type; - - case TypeId::Image2D: - return ((type = getBuilder().createTypeImage(getFloat32Type(), - spv::Dim::Dim2D, 0, 0, 0, 1, - spv::ImageFormat::Unknown))); - case TypeId::StorageImage2D: - return ((type = getBuilder().createTypeImage(getFloat32Type(), - spv::Dim::Dim2D, 0, 0, 0, 2, - spv::ImageFormat::Unknown))); - case TypeId::SampledImage2D: - return ((type = getBuilder().createTypeSampledImage(getImage2DType()))); - - case TypeId::Sampler: - return ((type = getBuilder().createTypeSampler())); - } - - util::unreachable(); -} - -spirv::RuntimeArrayType ConverterContext::getRuntimeArrayType(TypeId id) { - auto &type = mRuntimeArrayTypes[static_cast(id)]; - - if (!type) { - type = mBuilder.createTypeRuntimeArray(getType(id)); - mBuilder.createDecorate(type, spv::Decoration::ArrayStride, - {{(std::uint32_t)id.getSize()}}); - } - - return type; -} - -spirv::ConstantUInt ConverterContext::getUInt64(std::uint64_t value) { - auto &id = mConstantUint64Map[value]; - if (!id) { - id = mBuilder.createConstant64(getUInt64Type(), value); - } - return id; -} - -spirv::ConstantUInt ConverterContext::getUInt32(std::uint32_t value) { - auto &id = mConstantUint32Map[value]; - if (!id) { - id = mBuilder.createConstant32(getUInt32Type(), value); - } - return id; -} - -spirv::ConstantSInt ConverterContext::getSInt32(std::uint32_t value) { - auto &id = mConstantSint32Map[value]; - if (!id) { - id = mBuilder.createConstant32(getSint32Type(), value); - } - return id; -} - -spirv::ConstantFloat ConverterContext::getFloat32Raw(std::uint32_t value) { - auto &id = mConstantFloat32Map[value]; - if (!id) { - id = mBuilder.createConstant32(getFloat32Type(), value); - } - return id; -} - -UniformInfo *ConverterContext::createStorageBuffer(TypeId type) { - std::array uniformStructMembers{getRuntimeArrayType(type)}; - auto uniformStruct = findStructType(uniformStructMembers); - - if (!uniformStruct) { - uniformStruct = getStructType(uniformStructMembers); - - getBuilder().createDecorate(uniformStruct, spv::Decoration::Block, {}); - - getBuilder().createMemberDecorate( - uniformStruct, 0, spv::Decoration::Offset, - std::array{static_cast(0)}); - } - - auto uniformType = - getStructPointerType(spv::StorageClass::StorageBuffer, uniformStruct); - auto uniformVariable = getBuilder().createVariable( - uniformType, spv::StorageClass::StorageBuffer); - - mInterfaces.push_back(uniformVariable); - - auto &newUniform = mUniforms.emplace_back(); - newUniform.index = mUniforms.size() - 1; - newUniform.typeId = type; - newUniform.type = uniformType; - newUniform.variable = uniformVariable; - newUniform.isBuffer = true; - std::printf("new storage buffer %u of type %u\n", newUniform.index, - newUniform.typeId.raw); - return &newUniform; -} - -UniformInfo *ConverterContext::getOrCreateStorageBuffer(std::uint32_t *vbuffer, - TypeId type) { - for (auto &uniform : mUniforms) { - if (std::memcmp(uniform.buffer, vbuffer, sizeof(std::uint32_t) * 4)) { - continue; - } - - if (uniform.typeId != type) { - util::unreachable("getOrCreateStorageBuffer: access to the uniform with " - "different type"); - } - - if (!uniform.isBuffer) { - util::unreachable("getOrCreateStorageBuffer: uniform was constant"); - } - - // std::printf("reuse storage buffer %u of type %u\n", uniform.index, - // uniform.typeId.raw); - return &uniform; - } - - auto newUniform = createStorageBuffer(type); - std::memcpy(newUniform->buffer, vbuffer, sizeof(std::uint32_t) * 4); - return newUniform; -} - -UniformInfo *ConverterContext::getOrCreateUniformConstant(std::uint32_t *buffer, - std::size_t size, - TypeId type) { - for (auto &uniform : mUniforms) { - if (std::memcmp(uniform.buffer, buffer, sizeof(std::uint32_t) * size)) { - continue; - } - - if (uniform.typeId != type) { - util::unreachable( - "getOrCreateUniformConstant: access to the uniform with " - "different type"); - } - - if (uniform.isBuffer) { - util::unreachable("getOrCreateUniformConstant: uniform was buffer"); - } - - return &uniform; - } - - auto uniformType = getPointerType(spv::StorageClass::UniformConstant, type); - auto uniformVariable = getBuilder().createVariable( - uniformType, spv::StorageClass::UniformConstant); - mInterfaces.push_back(uniformVariable); - - auto &newUniform = mUniforms.emplace_back(); - newUniform.index = mUniforms.size() - 1; - newUniform.typeId = type; - newUniform.type = uniformType; - newUniform.variable = uniformVariable; - newUniform.isBuffer = false; - std::memcpy(newUniform.buffer, buffer, sizeof(std::uint32_t) * size); - - return &newUniform; -} - -spirv::VariableValue ConverterContext::getThreadId() { - if (mThreadId) { - return mThreadId; - } - - auto inputType = getPointerType(spv::StorageClass::Input, TypeId::UInt32); - mThreadId = mBuilder.createVariable(inputType, spv::StorageClass::Input); - - if (mStage == Stage::Vertex) { - mBuilder.createDecorate( - mThreadId, spv::Decoration::BuiltIn, - std::array{static_cast(spv::BuiltIn::VertexIndex)}); - } else { - util::unreachable(); - } - - mInterfaces.push_back(mThreadId); - - return mThreadId; -} - -spirv::VariableValue ConverterContext::getWorkgroupId() { - if (mWorkgroupId) { - return mWorkgroupId; - } - - if (mStage != Stage::Compute) { - util::unreachable(); - } - - auto workgroupIdType = - getPointerType(spv::StorageClass::Input, TypeId::UInt32x3); - mWorkgroupId = - mBuilder.createVariable(workgroupIdType, spv::StorageClass::Input); - - mBuilder.createDecorate( - mWorkgroupId, spv::Decoration::BuiltIn, - {{static_cast(spv::BuiltIn::WorkgroupId)}}); - mInterfaces.push_back(mWorkgroupId); - - return mWorkgroupId; -} - -spirv::VariableValue ConverterContext::getLocalInvocationId() { - if (mLocalInvocationId) { - return mLocalInvocationId; - } - - if (mStage != Stage::Compute) { - util::unreachable(); - } - - auto localInvocationIdType = - getPointerType(spv::StorageClass::Input, TypeId::UInt32x3); - mLocalInvocationId = - mBuilder.createVariable(localInvocationIdType, spv::StorageClass::Input); - - mBuilder.createDecorate( - mLocalInvocationId, spv::Decoration::BuiltIn, - std::array{static_cast(spv::BuiltIn::LocalInvocationId)}); - - mInterfaces.push_back(mLocalInvocationId); - - return mLocalInvocationId; -} - -spirv::VariableValue ConverterContext::getPerVertex() { - if (mPerVertex) { - return mPerVertex; - } - - auto floatT = getFloat32Type(); - auto float4T = getFloat32x4Type(); - - auto uintConst1 = getUInt32(1); - auto arr1Float = mBuilder.createTypeArray(floatT, uintConst1); - - auto gl_PerVertexStructT = mBuilder.createTypeStruct(std::array{ - static_cast(float4T), - static_cast(floatT), - static_cast(arr1Float), - static_cast(arr1Float), - }); - - mBuilder.createDecorate(gl_PerVertexStructT, spv::Decoration::Block, {}); - mBuilder.createMemberDecorate( - gl_PerVertexStructT, 0, spv::Decoration::BuiltIn, - std::array{static_cast(spv::BuiltIn::Position)}); - mBuilder.createMemberDecorate( - gl_PerVertexStructT, 1, spv::Decoration::BuiltIn, - std::array{static_cast(spv::BuiltIn::PointSize)}); - mBuilder.createMemberDecorate( - gl_PerVertexStructT, 2, spv::Decoration::BuiltIn, - std::array{static_cast(spv::BuiltIn::ClipDistance)}); - mBuilder.createMemberDecorate( - gl_PerVertexStructT, 3, spv::Decoration::BuiltIn, - std::array{static_cast(spv::BuiltIn::CullDistance)}); - - auto gl_PerVertexPtrT = mBuilder.createTypePointer(spv::StorageClass::Output, - gl_PerVertexStructT); - mPerVertex = - mBuilder.createVariable(gl_PerVertexPtrT, spv::StorageClass::Output); - - mInterfaces.push_back(mPerVertex); - return mPerVertex; -} - -spirv::VariableValue ConverterContext::getFragCoord() { - if (mFragCoord) { - return mFragCoord; - } - - auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4); - mFragCoord = mBuilder.createVariable(inputType, spv::StorageClass::Input); - - mBuilder.createDecorate( - mFragCoord, spv::Decoration::BuiltIn, - {{static_cast(spv::BuiltIn::FragCoord)}}); - - mInterfaces.push_back(mFragCoord); - return mFragCoord; -} - -spirv::VariableValue ConverterContext::getIn(unsigned location) { - auto [it, inserted] = mIns.try_emplace(location); - if (!inserted) { - return it->second; - } - - auto inputType = getPointerType(spv::StorageClass::Input, TypeId::Float32x4); - auto inputVariable = - mBuilder.createVariable(inputType, spv::StorageClass::Input); - - mBuilder.createDecorate(inputVariable, spv::Decoration::Location, - {{location}}); - - mInterfaces.push_back(inputVariable); - it->second = inputVariable; - return inputVariable; -} - -spirv::VariableValue ConverterContext::getOut(unsigned location) { - auto [it, inserted] = mOuts.try_emplace(location); - if (!inserted) { - return it->second; - } - auto outputType = - getPointerType(spv::StorageClass::Output, TypeId::Float32x4); - auto outputVariable = - mBuilder.createVariable(outputType, spv::StorageClass::Output); - - mBuilder.createDecorate(outputVariable, spv::Decoration::Location, - {{location}}); - - mInterfaces.push_back(outputVariable); - it->second = outputVariable; - return outputVariable; -} - -spirv::Function ConverterContext::getDiscardFn() { - if (mDiscardFn) { - return mDiscardFn; - } - - if (mStage != Stage::Fragment) { - util::unreachable(); - } - - auto fn = mBuilder.createFunctionBuilder(5); - mDiscardFn = fn.id; - auto entry = fn.createBlockBuilder(5); - entry.createKill(); - - fn.insertBlock(entry); - mBuilder.insertFunction(fn, getVoidType(), {}, - getFunctionType(getVoidType(), {})); - - return mDiscardFn; -} - -std::optional -ConverterContext::findUint32Value(spirv::Value id) const { - for (auto [value, constId] : mConstantUint32Map) { - if (constId == id) { - return value; - } - } - - return std::nullopt; -} - -std::optional -ConverterContext::findSint32Value(spirv::Value id) const { - for (auto [value, constId] : mConstantSint32Map) { - if (constId == id) { - return value; - } - } - - return std::nullopt; -} - -std::optional ConverterContext::findFloat32Value(spirv::Value id) const { - for (auto [value, constId] : mConstantFloat32Map) { - if (constId == id) { - return std::bit_cast(value); - } - } - - return std::nullopt; -} - -spirv::FunctionType -ConverterContext::getFunctionType(spirv::Type resultType, - std::span params) { - for (auto fnType : mFunctionTypes) { - if (fnType.resultType != resultType) { - continue; - } - - if (fnType.params.size() != params.size()) { - continue; - } - - bool match = true; - for (std::size_t i = 0, end = params.size(); i < end; ++i) { - if (fnType.params[i] != params[i]) { - match = false; - break; - } - } - if (!match) { - continue; - } - - return fnType.id; - } - - auto id = mBuilder.createTypeFunction(resultType, params); - - std::vector paramsVec; - paramsVec.reserve(params.size()); - - for (auto param : params) { - paramsVec.push_back(param); - } - - mFunctionTypes.push_back(FunctionType{ - .resultType = resultType, .params = std::move(paramsVec), .id = id}); - - return id; -} - -Function *ConverterContext::createFunction(std::size_t expectedSize) { - auto result = &mFunctions.emplace_front(); - - result->context = this; - result->entryFragment.context = this; - result->entryFragment.function = result; - result->entryFragment.builder = mBuilder.createBlockBuilder(expectedSize); - result->entryFragment.entryBlockId = result->entryFragment.builder.id; - result->fragments.push_back(&result->entryFragment); - - result->exitFragment.context = this; - result->exitFragment.function = result; - result->exitFragment.builder = mBuilder.createBlockBuilder(0); - result->exitFragment.entryBlockId = result->exitFragment.builder.id; - result->builder = mBuilder.createFunctionBuilder(expectedSize); - - return result; -} - -Fragment *ConverterContext::createFragment(std::size_t expectedSize) { - auto result = &mFragments.emplace_front(); - - result->context = this; - result->builder = mBuilder.createBlockBuilder(expectedSize); - result->entryBlockId = result->builder.id; - - return result; -} diff --git a/hw/amdgpu/shader/src/Fragment.cpp b/hw/amdgpu/shader/src/Fragment.cpp deleted file mode 100644 index d0a5b8a..0000000 --- a/hw/amdgpu/shader/src/Fragment.cpp +++ /dev/null @@ -1,6116 +0,0 @@ -#include "Fragment.hpp" -#include "ConverterContext.hpp" -#include "Instruction.hpp" -#include "RegisterId.hpp" -#include "RegisterState.hpp" - -#include -#include -#include -#include - -#include -#include - -using namespace amdgpu::shader; - -namespace { -std::uint32_t getChannelsCount(SurfaceFormat format) { - switch (format) { - case kSurfaceFormat8: - return 1; - case kSurfaceFormat16: - return 1; - case kSurfaceFormat8_8: - return 2; - case kSurfaceFormat32: - return 1; - case kSurfaceFormat16_16: - return 2; - case kSurfaceFormat10_11_11: - return 3; - case kSurfaceFormat11_11_10: - return 3; - case kSurfaceFormat10_10_10_2: - return 4; - case kSurfaceFormat2_10_10_10: - return 4; - case kSurfaceFormat8_8_8_8: - return 4; - case kSurfaceFormat32_32: - return 2; - case kSurfaceFormat16_16_16_16: - return 4; - case kSurfaceFormat32_32_32: - return 3; - case kSurfaceFormat32_32_32_32: - return 4; - default: - util::unreachable(); - } -} - -std::uint32_t sizeOfFormat(SurfaceFormat format) { - switch (format) { - case kSurfaceFormat8: - return 8; - case kSurfaceFormat16: - return 16; - case kSurfaceFormat8_8: - return 16; - case kSurfaceFormat32: - return 32; - case kSurfaceFormat16_16: - return 32; - case kSurfaceFormat10_11_11: - return 32; - case kSurfaceFormat11_11_10: - return 32; - case kSurfaceFormat10_10_10_2: - return 32; - case kSurfaceFormat2_10_10_10: - return 32; - case kSurfaceFormat8_8_8_8: - return 32; - case kSurfaceFormat32_32: - return 64; - case kSurfaceFormat16_16_16_16: - return 64; - case kSurfaceFormat32_32_32: - return 96; - case kSurfaceFormat32_32_32_32: - return 128; - default: - util::unreachable("unsupported format %u", format); - } -} - -TypeId pickBufferType(SurfaceFormat surfaceFormat, - TextureChannelType channelType) { - auto size = sizeOfFormat(surfaceFormat) / getChannelsCount(surfaceFormat); - - if (size == 8) { - switch (channelType) { - case kTextureChannelTypeUNorm: - case kTextureChannelTypeUScaled: - case kTextureChannelTypeUInt: - return TypeId::UInt8; - - default: - return TypeId::SInt8; - } - } - - if (size == 16) { - switch (channelType) { - case kTextureChannelTypeUNorm: - case kTextureChannelTypeUScaled: - case kTextureChannelTypeUInt: - return TypeId::UInt16; - - case kTextureChannelTypeFloat: - return TypeId::Float16; - - default: - return TypeId::SInt16; - } - } - - if (size == 32) { - switch (channelType) { - case kTextureChannelTypeUNorm: - case kTextureChannelTypeUScaled: - case kTextureChannelTypeUInt: - return TypeId::UInt32; - - case kTextureChannelTypeFloat: - return TypeId::Float32; - - default: - return TypeId::SInt32; - } - } - - if (size == 64) { - switch (channelType) { - case kTextureChannelTypeUNorm: - case kTextureChannelTypeUScaled: - case kTextureChannelTypeUInt: - return TypeId::UInt64; - - case kTextureChannelTypeFloat: - return TypeId::Float64; - - default: - return TypeId::SInt64; - } - } - - util::unreachable(); -} - -spirv::Type convertFromFormat(spirv::Value *result, int count, - Fragment &fragment, std::uint32_t *vBufferData, - spirv::UIntValue offset, - SurfaceFormat surfaceFormat, - TextureChannelType channelType) { - auto loadType = pickBufferType(surfaceFormat, channelType); - - auto uniform = - fragment.context->getOrCreateStorageBuffer(vBufferData, loadType); - uniform->accessOp |= AccessOp::Load; - - auto storageBufferPointerType = fragment.context->getPointerType( - spv::StorageClass::StorageBuffer, loadType); - - auto &builder = fragment.builder; - - switch (surfaceFormat) { - case kSurfaceFormat32: - case kSurfaceFormat32_32: - case kSurfaceFormat32_32_32: - case kSurfaceFormat32_32_32_32: - case kSurfaceFormat16: - case kSurfaceFormat16_16: - case kSurfaceFormat16_16_16_16: - case kSurfaceFormat8: - case kSurfaceFormat8_8: - case kSurfaceFormat8_8_8_8: { - // format not requires bit fetching - auto totalChannelsCount = getChannelsCount(surfaceFormat); - auto channelSize = sizeOfFormat(surfaceFormat) / 8 / totalChannelsCount; - auto channelsCount = std::min(count, totalChannelsCount); - - if (channelSize != 1) { - offset = builder.createUDiv(fragment.context->getUInt32Type(), offset, - fragment.context->getUInt32(channelSize)); - } - - int channel = 0; - auto resultType = fragment.context->getType(loadType); - for (; channel < channelsCount; ++channel) { - auto channelOffset = offset; - - if (channel != 0) { - channelOffset = - builder.createIAdd(fragment.context->getUInt32Type(), channelOffset, - fragment.context->getUInt32(channel)); - } - - auto uniformPointerValue = fragment.builder.createAccessChain( - storageBufferPointerType, uniform->variable, - {{fragment.context->getUInt32(0), channelOffset}}); - - auto channelValue = fragment.builder.createLoad( - fragment.context->getType(loadType), uniformPointerValue); - - switch (channelType) { - case kTextureChannelTypeFloat: { - if (loadType != TypeId::Float32) { - channelValue = fragment.builder.createFConvert( - fragment.context->getFloat32Type(), channelValue); - - resultType = fragment.context->getFloat32Type(); - } - - result[channel] = channelValue; - break; - } - case kTextureChannelTypeSInt: { - if (loadType != TypeId::SInt32) { - channelValue = fragment.builder.createSConvert( - fragment.context->getSint32Type(), - spirv::cast(channelValue)); - - resultType = fragment.context->getSint32Type(); - } - - result[channel] = channelValue; - break; - } - case kTextureChannelTypeUInt: - if (loadType != TypeId::UInt32) { - channelValue = fragment.builder.createUConvert( - fragment.context->getUInt32Type(), - spirv::cast(channelValue)); - - resultType = fragment.context->getUInt32Type(); - } - - result[channel] = channelValue; - break; - - case kTextureChannelTypeUNorm: { - auto maxValue = - (static_cast(1) << (channelSize * 8)) - 1; - - auto uintChannelValue = spirv::cast(channelValue); - - if (loadType != TypeId::UInt32) { - uintChannelValue = builder.createUConvert( - fragment.context->getUInt32Type(), uintChannelValue); - } - - auto floatChannelValue = builder.createConvertUToF( - fragment.context->getFloat32Type(), uintChannelValue); - floatChannelValue = builder.createFDiv( - fragment.context->getFloat32Type(), floatChannelValue, - fragment.context->getFloat32(maxValue)); - result[channel] = floatChannelValue; - resultType = fragment.context->getFloat32Type(); - break; - } - - case kTextureChannelTypeSNorm: { - auto maxValue = - (static_cast(1) << (channelSize * 8 - 1)) - 1; - - auto uintChannelValue = spirv::cast(channelValue); - - if (loadType != TypeId::SInt32) { - uintChannelValue = builder.createSConvert( - fragment.context->getSint32Type(), uintChannelValue); - } - - auto floatChannelValue = builder.createConvertSToF( - fragment.context->getFloat32Type(), uintChannelValue); - - floatChannelValue = builder.createFDiv( - fragment.context->getFloat32Type(), floatChannelValue, - fragment.context->getFloat32(maxValue)); - - auto glslStd450 = fragment.context->getGlslStd450(); - floatChannelValue = - spirv::cast(fragment.builder.createExtInst( - fragment.context->getFloat32Type(), glslStd450, - GLSLstd450FClamp, - {{floatChannelValue, fragment.context->getFloat32(-1), - fragment.context->getFloat32(1)}})); - result[channel] = floatChannelValue; - resultType = fragment.context->getFloat32Type(); - break; - } - - case kTextureChannelTypeUScaled: { - auto uintChannelValue = spirv::cast(channelValue); - - if (loadType != TypeId::UInt32) { - uintChannelValue = builder.createUConvert( - fragment.context->getUInt32Type(), uintChannelValue); - } - - auto floatChannelValue = builder.createConvertUToF( - fragment.context->getFloat32Type(), uintChannelValue); - - result[channel] = floatChannelValue; - resultType = fragment.context->getFloat32Type(); - break; - } - - case kTextureChannelTypeSScaled: { - auto uintChannelValue = spirv::cast(channelValue); - - if (loadType != TypeId::SInt32) { - uintChannelValue = builder.createSConvert( - fragment.context->getSint32Type(), uintChannelValue); - } - - auto floatChannelValue = builder.createConvertSToF( - fragment.context->getFloat32Type(), uintChannelValue); - - result[channel] = floatChannelValue; - resultType = fragment.context->getFloat32Type(); - break; - } - - case kTextureChannelTypeSNormNoZero: { - auto maxValue = - (static_cast(1) << (channelSize * 8)) - 1; - - auto uintChannelValue = spirv::cast(channelValue); - - if (loadType != TypeId::SInt32) { - uintChannelValue = builder.createSConvert( - fragment.context->getSint32Type(), uintChannelValue); - } - - auto floatChannelValue = builder.createConvertSToF( - fragment.context->getFloat32Type(), uintChannelValue); - - floatChannelValue = builder.createFMul( - fragment.context->getFloat32Type(), floatChannelValue, - fragment.context->getFloat32(2)); - floatChannelValue = builder.createFAdd( - fragment.context->getFloat32Type(), floatChannelValue, - fragment.context->getFloat32(1)); - - floatChannelValue = builder.createFDiv( - fragment.context->getFloat32Type(), floatChannelValue, - fragment.context->getFloat32(maxValue)); - - result[channel] = floatChannelValue; - resultType = fragment.context->getFloat32Type(); - break; - } - - default: - util::unreachable("unimplemented channel type %u", channelType); - } - } - - // for (; channel < count; ++channel) { - // result[channel] = fragment.createBitcast( - // resultType, fragment.context->getUInt32Type(), - // fragment.context->getUInt32(0)); - // } - return resultType; - } - - default: - break; - } - - util::unreachable("unimplemented conversion type. %u.%u", surfaceFormat, - channelType); -} - -void convertToFormat(RegisterId sourceRegister, int count, Fragment &fragment, - std::uint32_t *vBufferData, spirv::UIntValue offset, - SurfaceFormat surfaceFormat, - TextureChannelType channelType) { - - auto storeType = pickBufferType(surfaceFormat, channelType); - - auto uniform = - fragment.context->getOrCreateStorageBuffer(vBufferData, storeType); - uniform->accessOp |= AccessOp::Store; - - auto uniformPointerType = fragment.context->getPointerType( - spv::StorageClass::StorageBuffer, storeType); - - auto &builder = fragment.builder; - switch (surfaceFormat) { - case kSurfaceFormat8: - case kSurfaceFormat8_8: - case kSurfaceFormat8_8_8_8: - case kSurfaceFormat16: - case kSurfaceFormat16_16: - case kSurfaceFormat16_16_16_16: - case kSurfaceFormat32: - case kSurfaceFormat32_32: - case kSurfaceFormat32_32_32: - case kSurfaceFormat32_32_32_32: { - // format not requires bit fetching - auto totalChannelsCount = getChannelsCount(surfaceFormat); - auto channelSize = sizeOfFormat(surfaceFormat) / 8 / totalChannelsCount; - auto channelsCount = std::min(count, totalChannelsCount); - - if (channelSize != 1) { - offset = builder.createUDiv(fragment.context->getUInt32Type(), offset, - fragment.context->getUInt32(channelSize)); - } - - int channel = 0; - - for (; channel < channelsCount; ++channel) { - auto channelOffset = offset; - - if (channel != 0) { - channelOffset = - builder.createIAdd(fragment.context->getUInt32Type(), channelOffset, - fragment.context->getUInt32(channel)); - } - - auto uniformPointerValue = fragment.builder.createAccessChain( - uniformPointerType, uniform->variable, - {{fragment.context->getUInt32(0), channelOffset}}); - - spirv::Value channelValue; - - switch (channelType) { - case kTextureChannelTypeUNorm: { - channelValue = - fragment - .getOperand(RegisterId::Raw(sourceRegister + channel), - TypeId::Float32) - .value; - - auto maxValue = - (static_cast(1) << (channelSize * 8)) - 1; - - channelValue = - builder.createFMul(fragment.context->getFloat32Type(), - spirv::cast(channelValue), - fragment.context->getFloat32(maxValue)); - - channelValue = builder.createConvertFToU( - fragment.context->getType(TypeId::UInt32), channelValue); - - if (storeType != TypeId::UInt32) { - channelValue = builder.createUConvert( - fragment.context->getType(storeType), - spirv::cast(channelValue)); - } - break; - } - case kTextureChannelTypeFloat: - channelValue = - fragment - .getOperand(RegisterId::Raw(sourceRegister + channel), - TypeId::Float32) - .value; - - if (storeType != TypeId::Float32) { - channelValue = fragment.builder.createFConvert( - fragment.context->getType(storeType), channelValue); - } - break; - - case kTextureChannelTypeSInt: - channelValue = - fragment - .getOperand(RegisterId::Raw(sourceRegister + channel), - TypeId::SInt32) - .value; - - if (storeType != TypeId::SInt32) { - channelValue = fragment.builder.createSConvert( - fragment.context->getType(storeType), - spirv::cast(channelValue)); - } - break; - case kTextureChannelTypeUInt: - channelValue = - fragment - .getOperand(RegisterId::Raw(sourceRegister + channel), - TypeId::UInt32) - .value; - - if (storeType != TypeId::UInt32) { - channelValue = fragment.builder.createUConvert( - fragment.context->getType(storeType), - spirv::cast(channelValue)); - } - break; - - default: - util::unreachable("unimplemented channel type %u", channelType); - } - - fragment.builder.createStore(uniformPointerValue, channelValue); - } - - for (; channel < count; ++channel) { - auto channelOffset = - builder.createIAdd(fragment.context->getUInt32Type(), offset, - fragment.context->getUInt32(channel)); - auto uniformPointerValue = fragment.builder.createAccessChain( - uniformPointerType, uniform->variable, - {{fragment.context->getUInt32(0), channelOffset}}); - - fragment.builder.createStore( - uniformPointerValue, - fragment.createBitcast(fragment.context->getType(storeType), - fragment.context->getUInt32Type(), - fragment.context->getUInt32(0))); - } - - return; - } - - default: - break; - } - - util::unreachable("unimplemented conversion type. %u.%u", surfaceFormat, - channelType); -} - -struct GnmVBuffer { - uint64_t base : 44; - uint64_t mtype_L1s : 2; - uint64_t mtype_L2 : 2; - uint64_t stride : 14; - uint64_t cache_swizzle : 1; - uint64_t swizzle_en : 1; - - uint32_t num_records; - - uint32_t dst_sel_x : 3; - uint32_t dst_sel_y : 3; - uint32_t dst_sel_z : 3; - uint32_t dst_sel_w : 3; - - TextureChannelType nfmt : 3; - SurfaceFormat dfmt : 4; - uint32_t element_size : 2; - uint32_t index_stride : 2; - uint32_t addtid_en : 1; - uint32_t reserved0 : 1; - uint32_t hash_en : 1; - uint32_t reserved1 : 1; - uint32_t mtype : 3; - uint32_t type : 2; - - std::uint64_t getAddress() const { return base; } - - uint32_t getStride() const { return stride; } - - uint32_t getSize() const { - uint32_t stride = getStride(); - uint32_t numElements = getNumRecords(); - return stride ? numElements * stride : numElements; - } - - uint32_t getNumRecords() const { return num_records; } - uint32_t getElementSize() const { return element_size; } - uint32_t getIndexStrideSize() const { return index_stride; } - SurfaceFormat getSurfaceFormat() const { return (SurfaceFormat)dfmt; } - TextureChannelType getChannelType() const { return (TextureChannelType)nfmt; } -}; - -static_assert(sizeof(GnmVBuffer) == sizeof(std::uint64_t) * 2); - -enum class TextureType { - Dim1D = 8, - Dim2D, - Dim3D, - Cube, - Array1D, - Array2D, - Msaa2D, - MsaaArray2D, -}; - -struct GnmTBuffer { - uint64_t baseaddr256 : 38; - uint64_t mtype_L2 : 2; - uint64_t min_lod : 12; - SurfaceFormat dfmt : 6; - TextureChannelType nfmt : 4; - uint64_t mtype01 : 2; - - uint64_t width : 14; - uint64_t height : 14; - uint64_t perfMod : 3; - uint64_t interlaced : 1; - uint64_t dst_sel_x : 3; - uint64_t dst_sel_y : 3; - uint64_t dst_sel_z : 3; - uint64_t dst_sel_w : 3; - uint64_t base_level : 4; - uint64_t last_level : 4; - uint64_t tiling_idx : 5; - uint64_t pow2pad : 1; - uint64_t mtype2 : 1; - uint64_t : 1; // reserved - TextureType type : 4; - - uint64_t depth : 13; - uint64_t pitch : 14; - uint64_t : 5; // reserved - uint64_t base_array : 13; - uint64_t last_array : 13; - uint64_t : 6; // reserved - - uint64_t min_lod_warn : 12; // fixed point 4.8 - uint64_t counter_bank_id : 8; - uint64_t LOD_hdw_cnt_en : 1; - uint64_t : 42; // reserved - - std::uint64_t getAddress() const { - return static_cast(static_cast(baseaddr256)) - << 8; - } -}; - -static_assert(sizeof(GnmTBuffer) == sizeof(std::uint64_t) * 4); - -enum class CmpKind { - F, - LT, - EQ, - LE, - GT, - LG, - GE, - O, - U, - NGE, - NLG, - NGT, - NLE, - NEQ, - NLT, - NE, - TRU, - T = TRU, - CLASS -}; - -enum class CmpFlags { None = 0, X = 1 << 0, S = 1 << 1, SX = S | X }; -inline CmpFlags operator&(CmpFlags a, CmpFlags b) { - return static_cast(static_cast(a) & static_cast(b)); -} - -Value doCmpOp(Fragment &fragment, TypeId type, spirv::Value src0, - spirv::Value src1, CmpKind kind, CmpFlags flags, - std::uint8_t typeMask = 0) { - spirv::BoolValue cmp; - auto boolT = fragment.context->getBoolType(); - - switch (kind) { - case CmpKind::F: - cmp = fragment.context->getFalse(); - break; - case CmpKind::LT: - if (type.isFloatPoint()) { - cmp = fragment.builder.createFOrdLessThan(boolT, src0, src1); - } else if (type.isSignedInt()) { - cmp = fragment.builder.createSLessThan(boolT, src0, src1); - } else { - cmp = fragment.builder.createULessThan(boolT, src0, src1); - } - break; - case CmpKind::EQ: - if (type.isFloatPoint()) { - cmp = fragment.builder.createFOrdEqual(boolT, src0, src1); - } else { - cmp = fragment.builder.createIEqual(boolT, src0, src1); - } - break; - case CmpKind::LE: - if (type.isFloatPoint()) { - cmp = fragment.builder.createFOrdLessThanEqual(boolT, src0, src1); - } else if (type.isSignedInt()) { - cmp = fragment.builder.createSLessThanEqual(boolT, src0, src1); - } else { - cmp = fragment.builder.createULessThanEqual(boolT, src0, src1); - } - break; - case CmpKind::GT: - if (type.isFloatPoint()) { - cmp = fragment.builder.createFOrdGreaterThan(boolT, src0, src1); - } else if (type.isSignedInt()) { - cmp = fragment.builder.createSGreaterThan(boolT, src0, src1); - } else { - cmp = fragment.builder.createUGreaterThan(boolT, src0, src1); - } - break; - case CmpKind::LG: - if (type.isFloatPoint()) { - cmp = fragment.builder.createFOrdNotEqual(boolT, src0, src1); - } else { - cmp = fragment.builder.createINotEqual(boolT, src0, src1); - } - break; - case CmpKind::GE: - if (type.isFloatPoint()) { - cmp = fragment.builder.createFOrdGreaterThanEqual(boolT, src0, src1); - } else if (type.isSignedInt()) { - cmp = fragment.builder.createSGreaterThanEqual(boolT, src0, src1); - } else { - cmp = fragment.builder.createUGreaterThanEqual(boolT, src0, src1); - } - break; - case CmpKind::O: - cmp = fragment.builder.createLogicalAnd( - boolT, fragment.builder.createFOrdEqual(boolT, src0, src0), - fragment.builder.createFOrdEqual(boolT, src1, src1)); - break; - case CmpKind::U: - cmp = fragment.builder.createLogicalAnd( - boolT, fragment.builder.createFUnordNotEqual(boolT, src0, src0), - fragment.builder.createFUnordNotEqual(boolT, src1, src1)); - break; - case CmpKind::NGE: - cmp = fragment.builder.createFUnordLessThan(boolT, src0, src1); - break; - case CmpKind::NLG: - cmp = fragment.builder.createFUnordGreaterThanEqual(boolT, src0, src1); - break; - case CmpKind::NGT: - cmp = fragment.builder.createFUnordLessThanEqual(boolT, src0, src1); - break; - case CmpKind::NLE: - cmp = fragment.builder.createFUnordGreaterThan(boolT, src0, src1); - break; - case CmpKind::NE: - case CmpKind::NEQ: - if (type.isFloatPoint()) { - cmp = fragment.builder.createFUnordNotEqual(boolT, src0, src1); - } else { - cmp = fragment.builder.createINotEqual(boolT, src0, src1); - } - break; - case CmpKind::NLT: - cmp = fragment.builder.createFUnordGreaterThanEqual(boolT, src0, src1); - break; - case CmpKind::TRU: - cmp = fragment.context->getTrue(); - break; - - case CmpKind::CLASS: { - enum class FloatClass { - SNan = 0, - QNan = 1, - NInf = 2, - NNorm = 3, - NDenom = 4, - NZero = 5, - PZero = 6, - PDenom = 7, - PNorm = 8, - PInf = 9, - }; - - auto testCmpClass = [&](FloatClass fclass, - spirv::FloatValue val) -> spirv::BoolValue { - switch (fclass) { - case FloatClass::SNan: - case FloatClass::QNan: - return fragment.builder.createIsNan(boolT, val); - - case FloatClass::NInf: - return fragment.builder.createLogicalAnd( - boolT, - fragment.builder.createFOrdLessThan( - boolT, val, fragment.context->getFloat32(0)), - fragment.builder.createIsInf(boolT, val)); - - case FloatClass::NZero: - case FloatClass::PZero: - return fragment.builder.createFOrdEqual( - boolT, val, fragment.context->getFloat32(0)); - - case FloatClass::NNorm: - case FloatClass::NDenom: - case FloatClass::PDenom: - case FloatClass::PNorm: - util::unreachable(); - - case FloatClass::PInf: - return fragment.builder.createLogicalAnd( - boolT, - fragment.builder.createFOrdGreaterThan( - boolT, val, fragment.context->getFloat32(0)), - fragment.builder.createIsInf(boolT, val)); - } - - util::unreachable(); - }; - - // we cannot differ signaling and quiet nan - if (typeMask & 3) { - typeMask = (typeMask & ~3) | 2; - } - - // we cannot differ positive and negative zero - if (typeMask & 0x60) { - typeMask = (typeMask & ~0x60) | 0x40; - } - - for (int i = 0; i < 10; ++i) { - if (typeMask & (1 << i)) { - auto lhs = - testCmpClass((FloatClass)i, spirv::cast(src0)); - auto rhs = - testCmpClass((FloatClass)i, spirv::cast(src1)); - - auto bitResult = fragment.builder.createLogicalAnd(boolT, lhs, rhs); - - if (cmp) { - cmp = fragment.builder.createLogicalOr(boolT, cmp, bitResult); - } else { - cmp = bitResult; - } - } - } - - if (!cmp) { - cmp = fragment.context->getFalse(); - } - break; - } - } - - if (!cmp) { - util::unreachable(); - } - - auto uint32T = fragment.context->getUInt32Type(); - auto uint32_0 = fragment.context->getUInt32(0); - auto result = fragment.builder.createSelect( - uint32T, cmp, fragment.context->getUInt32(1), uint32_0); - - if ((flags & CmpFlags::X) == CmpFlags::X) { - fragment.setOperand(RegisterId::ExecLo, {uint32T, result}); - fragment.setOperand(RegisterId::ExecHi, {uint32T, uint32_0}); - } - - // TODO: handle flags - return {uint32T, result}; -}; - -void convertVop2(Fragment &fragment, Vop2 inst) { - fragment.registers->pc += Vop2::kMinInstSize * sizeof(std::uint32_t); - switch (inst.op) { - case Vop2::Op::V_CVT_PKRTZ_F16_F32: { - auto float2T = fragment.context->getType(TypeId::Float32x2); - auto uintT = fragment.context->getType(TypeId::UInt32); - auto glslStd450 = fragment.context->getGlslStd450(); - - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::Float32).value; - auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value; - - auto src = fragment.builder.createCompositeConstruct( - float2T, std::array{src0, src1}); - auto dst = fragment.builder.createExtInst( - uintT, glslStd450, GLSLstd450PackHalf2x16, std::array{src}); - - fragment.setVectorOperand(inst.vdst, {uintT, dst}); - break; - } - case Vop2::Op::V_AND_B32: { - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createBitwiseAnd(uintT, src0, src1)}); - break; - } - - case Vop2::Op::V_OR_B32: { - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createBitwiseOr(uintT, src0, src1)}); - break; - } - - case Vop2::Op::V_ADD_I32: { - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - auto resultStruct = - fragment.context->getStructType(std::array{uintT, uintT}); - auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1); - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createCompositeExtract( - uintT, result, std::array{static_cast(0)})}); - fragment.setVcc( - {uintT, fragment.builder.createCompositeExtract( - uintT, result, std::array{static_cast(1)})}); - // TODO: update vcc hi - break; - } - - case Vop2::Op::V_SUB_I32: { - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - auto resultStruct = - fragment.context->getStructType(std::array{uintT, uintT}); - auto result = fragment.builder.createISubBorrow(resultStruct, src0, src1); - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createCompositeExtract( - uintT, result, std::array{static_cast(0)})}); - fragment.setVcc( - {uintT, fragment.builder.createCompositeExtract( - uintT, result, std::array{static_cast(1)})}); - // TODO: update vcc hi - break; - } - - case Vop2::Op::V_MAC_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto dst = spirv::cast( - fragment.getVectorOperand(inst.vdst, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFAdd( - floatT, fragment.builder.createFMul(floatT, src0, src1), dst); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_MAC_LEGACY_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto dst = spirv::cast( - fragment.getVectorOperand(inst.vdst, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - auto float0 = fragment.context->getFloat32(0); - - auto src0IsZero = fragment.builder.createFOrdEqual(boolT, src0, float0); - auto src1IsZero = fragment.builder.createFOrdEqual(boolT, src1, float0); - auto anySrcIsZero = - fragment.builder.createLogicalOr(boolT, src0IsZero, src1IsZero); - - auto result = fragment.builder.createFAdd( - floatT, - fragment.builder.createSelect( - floatT, anySrcIsZero, float0, - fragment.builder.createFMul(floatT, src0, src1)), - dst); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_MUL_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFMul(floatT, src0, src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_ADD_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFAdd(floatT, src0, src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_SUB_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFSub(floatT, src0, src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop2::Op::V_SUBREV_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFSub(floatT, src1, src0); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop2::Op::V_SUBREV_I32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::SInt32).value); - auto floatT = fragment.context->getSint32Type(); - - auto result = fragment.builder.createISub(floatT, src1, src0); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_MIN_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - - auto result = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdLessThan(boolT, src0, src1), src0, - src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_MAX_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - - auto result = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdGreaterThanEqual(boolT, src0, src1), - src0, src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_MUL_LEGACY_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - auto float0 = fragment.context->getFloat32(0); - - auto src0IsZero = fragment.builder.createFOrdEqual(boolT, src0, float0); - auto src1IsZero = fragment.builder.createFOrdEqual(boolT, src1, float0); - auto anySrcIsZero = - fragment.builder.createLogicalOr(boolT, src0IsZero, src1IsZero); - - auto result = fragment.builder.createSelect( - floatT, anySrcIsZero, float0, - fragment.builder.createFMul(floatT, src0, src1)); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_MADAK_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto constant = spirv::cast( - fragment.getScalarOperand(255, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFAdd( - floatT, fragment.builder.createFMul(floatT, src0, src1), constant); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_MADMK_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value); - auto constant = spirv::cast( - fragment.getScalarOperand(255, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFAdd( - floatT, fragment.builder.createFMul(floatT, src0, constant), src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop2::Op::V_LSHL_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); - auto uintT = fragment.context->getType(TypeId::UInt32); - - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createShiftLeftLogical(uintT, src0, src1)}); - break; - } - - case Vop2::Op::V_LSHLREV_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); - auto uintT = fragment.context->getType(TypeId::UInt32); - - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createShiftLeftLogical(uintT, src1, src0)}); - break; - } - - case Vop2::Op::V_LSHR_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); - auto uintT = fragment.context->getType(TypeId::UInt32); - - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createShiftRightLogical(uintT, src0, src1)}); - break; - } - - case Vop2::Op::V_LSHRREV_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value); - auto uintT = fragment.context->getType(TypeId::UInt32); - - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createShiftRightLogical(uintT, src1, src0)}); - break; - } - - case Vop2::Op::V_ASHR_I32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::SInt32).value); - auto sintT = fragment.context->getType(TypeId::SInt32); - - fragment.setVectorOperand( - inst.vdst, {sintT, fragment.builder.createShiftRightArithmetic( - sintT, src0, src1)}); - break; - } - - case Vop2::Op::V_ASHRREV_I32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getVectorOperand(inst.vsrc1, TypeId::SInt32).value); - auto sintT = fragment.context->getType(TypeId::SInt32); - - fragment.setVectorOperand( - inst.vdst, {sintT, fragment.builder.createShiftRightArithmetic( - sintT, src1, src0)}); - break; - } - - case Vop2::Op::V_CNDMASK_B32: { - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto src1 = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; - auto vcc = fragment.getVccLo(); - - auto cmp = fragment.builder.createINotEqual(fragment.context->getBoolType(), - vcc.value, - fragment.context->getUInt32(0)); - - auto uint32T = fragment.context->getUInt32Type(); - auto result = fragment.builder.createSelect(uint32T, cmp, src1, src0); - fragment.setVectorOperand(inst.vdst, {uint32T, result}); - break; - } - - default: - inst.dump(); - util::unreachable(); - } -} -void convertSop2(Fragment &fragment, Sop2 inst) { - fragment.registers->pc += Sop2::kMinInstSize * sizeof(std::uint32_t); - auto &builder = fragment.builder; - auto context = fragment.context; - auto sCarry = [&](spirv::SIntValue a, spirv::SIntValue b, - spirv::SIntValue result) { - auto boolT = context->getBoolType(); - auto uint32T = context->getUInt32Type(); - auto s0 = context->getSInt32(0); - auto u1 = context->getUInt32(1); - auto u0 = context->getUInt32(0); - auto aLtZero = builder.createSelect( - uint32T, builder.createSLessThan(boolT, a, s0), u1, u0); - auto bLtZero = builder.createSelect( - uint32T, builder.createSLessThan(boolT, b, s0), u1, u0); - auto resultLtZero = builder.createSelect( - uint32T, builder.createSLessThan(boolT, result, s0), u1, u0); - - auto argsSignEq = builder.createIEqual(boolT, aLtZero, bLtZero); - auto resSignNe = builder.createINotEqual(boolT, resultLtZero, aLtZero); - return Value{boolT, builder.createLogicalAnd(boolT, argsSignEq, resSignNe)}; - }; - - switch (inst.op) { - case Sop2::Op::S_ADDC_U32: { - auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value; - auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - auto scc = fragment.getScc(); - - auto src0Value = fragment.context->findUint32Value(src0); - auto src1Value = fragment.context->findUint32Value(src1); - - if (src0Value && src1Value && - (scc == context->getTrue() || scc == context->getFalse())) { - std::uint64_t result = *src0Value; - result += *src1Value; - result += (scc == context->getTrue() ? 1 : 0); - - std::fprintf(stderr, "saddc result: %lx\n", result); - - fragment.setScalarOperand(inst.sdst, - {uintT, fragment.context->getUInt32(result)}); - fragment.setScc( - {uintT, fragment.context->getUInt32(result > 0xffff'ffff ? 1 : 0)}); - } else { - auto resultStruct = - fragment.context->getStructType(std::array{uintT, uintT}); - auto tmpResult = - fragment.builder.createIAddCarry(resultStruct, src0, src1); - auto tmpVal = - fragment.builder.createCompositeExtract(uintT, tmpResult, {{0u}}); - auto tmpCarry = - fragment.builder.createCompositeExtract(uintT, tmpResult, {{1u}}); - auto result = fragment.builder.createIAddCarry(resultStruct, tmpVal, scc); - - auto value = - fragment.builder.createCompositeExtract(uintT, result, {{0u}}); - auto carry = - fragment.builder.createCompositeExtract(uintT, result, {{1u}}); - - fragment.setScalarOperand(inst.sdst, {uintT, value}); - fragment.setScc({uintT, builder.createBitwiseOr(uintT, tmpCarry, carry)}); - } - break; - } - case Sop2::Op::S_ADD_U32: { - auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value; - auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - - auto src0Value = fragment.context->findUint32Value(src0); - auto src1Value = fragment.context->findUint32Value(src1); - if (src0Value && src1Value) { - std::uint64_t result = *src0Value; - result += *src1Value; - - std::fprintf(stderr, "sadd result: %lx\n", result); - - fragment.setScalarOperand(inst.sdst, - {uintT, fragment.context->getUInt32(result)}); - fragment.setScc( - {uintT, fragment.context->getUInt32(result > 0xffff'ffff ? 1 : 0)}); - } else { - auto resultStruct = - fragment.context->getStructType(std::array{uintT, uintT}); - auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1); - fragment.setScalarOperand( - inst.sdst, - {uintT, fragment.builder.createCompositeExtract( - uintT, result, {{static_cast(0)}})}); - fragment.setScc( - {uintT, fragment.builder.createCompositeExtract( - uintT, result, {{static_cast(1)}})}); - } - break; - } - case Sop2::Op::S_ADD_I32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value); - auto resultT = fragment.context->getSint32Type(); - auto result = fragment.builder.createIAdd(resultT, src0, src1); - fragment.setScc(sCarry(src0, src1, result)); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_SUB_U32: { - auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value; - auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - auto resultStruct = - fragment.context->getStructType(std::array{uintT, uintT}); - auto result = fragment.builder.createISubBorrow(resultStruct, src0, src1); - fragment.setScalarOperand( - inst.sdst, - {uintT, fragment.builder.createCompositeExtract( - uintT, result, {{static_cast(0)}})}); - fragment.setScc( - {uintT, fragment.builder.createCompositeExtract( - uintT, result, {{static_cast(1)}})}); - break; - } - case Sop2::Op::S_SUB_I32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value); - auto resultT = fragment.context->getSint32Type(); - auto result = fragment.builder.createISub(resultT, src0, src1); - fragment.setScc(sCarry(src0, src1, result)); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_ASHR_I32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - - auto resultT = fragment.context->getSint32Type(); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - resultT, src1, fragment.context->getUInt32(0x3f))); - - auto result = - fragment.builder.createShiftRightArithmetic(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_ASHR_I64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::SInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - - auto resultT = fragment.context->getSint64Type(); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - resultT, src1, fragment.context->getUInt32(0x3f))); - - auto result = - fragment.builder.createShiftRightArithmetic(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_LSHR_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - - auto resultT = fragment.context->getUInt32Type(); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - resultT, src1, fragment.context->getUInt32(0x1f))); - - auto result = fragment.builder.createShiftRightLogical(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_LSHR_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - - auto resultT = fragment.context->getUInt64Type(); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - resultT, src1, fragment.context->getUInt32(0x3f))); - - auto result = fragment.builder.createShiftRightLogical(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_LSHL_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - - auto resultT = fragment.context->getUInt32Type(); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - resultT, src1, fragment.context->getUInt32(0x1f))); - - auto result = fragment.builder.createShiftLeftLogical(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_LSHL_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); - - auto resultT = fragment.context->getUInt64Type(); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - resultT, src1, fragment.context->getUInt32(0x3f))); - - auto result = fragment.builder.createShiftLeftLogical(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_CSELECT_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - - auto resultT = fragment.context->getUInt32Type(); - auto result = - fragment.builder.createSelect(resultT, fragment.getScc(), src0, src1); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_CSELECT_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); - - auto resultT = fragment.context->getUInt64Type(); - auto result = - fragment.builder.createSelect(resultT, fragment.getScc(), src0, src1); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_MUL_I32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value); - auto resultT = fragment.context->getSint32Type(); - auto result = fragment.builder.createIMul(resultT, src0, src1); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_AND_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto resultT = fragment.context->getUInt32Type(); - auto result = fragment.builder.createBitwiseAnd(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_ANDN2_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto resultT = fragment.context->getUInt32Type(); - auto result = fragment.builder.createBitwiseAnd( - resultT, src0, fragment.builder.createNot(resultT, src1)); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_AND_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); - auto resultT = fragment.context->getUInt64Type(); - auto result = fragment.builder.createBitwiseAnd(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_ANDN2_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); - auto resultT = fragment.context->getUInt64Type(); - auto result = fragment.builder.createBitwiseAnd( - resultT, src0, fragment.builder.createNot(resultT, src1)); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_OR_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto resultT = fragment.context->getUInt32Type(); - auto result = fragment.builder.createBitwiseOr(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_OR_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); - auto resultT = fragment.context->getUInt64Type(); - auto result = fragment.builder.createBitwiseOr(resultT, src0, src1); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_NAND_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto resultT = fragment.context->getUInt32Type(); - auto result = fragment.builder.createNot( - resultT, fragment.builder.createBitwiseAnd(resultT, src0, src1)); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_NAND_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); - auto resultT = fragment.context->getUInt64Type(); - auto result = fragment.builder.createNot( - resultT, fragment.builder.createBitwiseAnd(resultT, src0, src1)); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_NOR_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto resultT = fragment.context->getUInt32Type(); - auto result = fragment.builder.createNot( - resultT, fragment.builder.createBitwiseOr(resultT, src0, src1)); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - case Sop2::Op::S_NOR_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt64).value); - auto resultT = fragment.context->getUInt64Type(); - auto result = fragment.builder.createNot( - resultT, fragment.builder.createBitwiseOr(resultT, src0, src1)); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - case Sop2::Op::S_BFE_U32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - - auto operandT = fragment.context->getUInt32Type(); - - auto offset = - spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32(0x1f))); - auto size = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, - fragment.builder.createShiftRightLogical( - operandT, src1, fragment.context->getUInt32(16)), - fragment.context->getUInt32(0x7f))); - - auto field = - fragment.builder.createShiftRightLogical(operandT, src0, offset); - auto mask = fragment.builder.createISub( - operandT, - fragment.builder.createShiftLeftLogical( - operandT, fragment.context->getUInt32(1), size), - fragment.context->getUInt32(1)); - - auto result = fragment.builder.createBitwiseAnd(operandT, field, mask); - auto resultT = fragment.context->getUInt32Type(); - fragment.setScc({resultT, result}); - fragment.setScalarOperand(inst.sdst, {resultT, result}); - break; - } - - default: - inst.dump(); - util::unreachable(); - } -} -void convertSopk(Fragment &fragment, Sopk inst) { - fragment.registers->pc += Sopk::kMinInstSize * sizeof(std::uint32_t); - switch (inst.op) { - case Sopk::Op::S_MOVK_I32: - fragment.setScalarOperand(inst.sdst, - {fragment.context->getSint32Type(), - fragment.context->getSInt32(inst.simm)}); - break; - default: - inst.dump(); - util::unreachable(); - } -} -void convertSmrd(Fragment &fragment, Smrd inst) { - fragment.registers->pc += Smrd::kMinInstSize * sizeof(std::uint32_t); - - auto getOffset = [&](std::int32_t adv = 0) -> spirv::IntValue { - if (inst.imm) { - return fragment.context->getUInt32(inst.offset + adv); - } - - auto resultT = fragment.context->getUInt32Type(); - auto resultV = fragment.getScalarOperand(inst.offset, TypeId::UInt32).value; - - if (auto constVal = fragment.context->findUint32Value(resultV)) { - return fragment.context->getUInt32(*constVal / 4 + adv); - } - - auto result = fragment.builder.createUDiv( - resultT, spirv::cast(resultV), - fragment.context->getUInt32(4)); - - if (adv != 0) { - result = fragment.builder.createIAdd(resultT, result, - fragment.context->getUInt32(adv)); - } - return result; - }; - - switch (inst.op) { - case Smrd::Op::S_BUFFER_LOAD_DWORD: - case Smrd::Op::S_BUFFER_LOAD_DWORDX2: - case Smrd::Op::S_BUFFER_LOAD_DWORDX4: - case Smrd::Op::S_BUFFER_LOAD_DWORDX8: - case Smrd::Op::S_BUFFER_LOAD_DWORDX16: { - std::uint32_t count = 1 - << (static_cast(inst.op) - - static_cast(Smrd::Op::S_BUFFER_LOAD_DWORD)); - auto vBuffer0 = - fragment.getScalarOperand((inst.sbase << 1) + 0, TypeId::UInt32); - auto vBuffer1 = - fragment.getScalarOperand((inst.sbase << 1) + 1, TypeId::UInt32); - auto vBuffer2 = - fragment.getScalarOperand((inst.sbase << 1) + 2, TypeId::UInt32); - auto vBuffer3 = - fragment.getScalarOperand((inst.sbase << 1) + 3, TypeId::UInt32); - - auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); - auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); - auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); - auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); - - if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && - optVBuffer3Value) { - std::uint32_t vBufferData[] = {*optVBuffer0Value, *optVBuffer1Value, - *optVBuffer2Value, *optVBuffer3Value}; - auto vbuffer = reinterpret_cast(vBufferData); - // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); - - auto valueT = fragment.context->getFloat32Type(); - auto uniform = fragment.context->getOrCreateStorageBuffer( - vBufferData, TypeId::Float32); - uniform->accessOp |= AccessOp::Load; - auto storageBufferPointerType = fragment.context->getPointerType( - spv::StorageClass::StorageBuffer, TypeId::Float32); - - for (std::uint32_t i = 0; i < count; ++i) { - auto storageBufferPointerValue = fragment.builder.createAccessChain( - storageBufferPointerType, uniform->variable, - {{fragment.context->getUInt32(0), getOffset(i)}}); - - auto value = - fragment.builder.createLoad(valueT, storageBufferPointerValue); - fragment.setScalarOperand(inst.sdst + i, {valueT, value}); - } - } else { - // FIXME: implement runtime V# buffer fetching - util::unreachable(); - } - break; - } - - case Smrd::Op::S_LOAD_DWORD: - case Smrd::Op::S_LOAD_DWORDX2: - case Smrd::Op::S_LOAD_DWORDX4: - case Smrd::Op::S_LOAD_DWORDX8: - case Smrd::Op::S_LOAD_DWORDX16: { - std::uint32_t count = 1 << (static_cast(inst.op) - - static_cast(Smrd::Op::S_LOAD_DWORD)); - - auto uint32T = fragment.context->getUInt32Type(); - auto sgprLo = fragment.getScalarOperand(inst.sbase << 1, TypeId::UInt32); - auto sgprHi = - fragment.getScalarOperand((inst.sbase << 1) + 1, TypeId::UInt32); - auto optLoAddress = fragment.context->findUint32Value(sgprLo.value); - auto optHiAddress = fragment.context->findUint32Value(sgprHi.value); - - if (inst.imm && optLoAddress && optHiAddress) { - // if it is imm and address is known, read the values now - auto memory = fragment.context->getMemory(); - auto address = - *optLoAddress | (static_cast(*optHiAddress) << 32); - - fragment.context->dependencies->map(address + (inst.offset << 2), - address + (inst.offset << 2) + - sizeof(std::uint32_t) * count); - - auto data = - memory.getPointer(address + (inst.offset << 2)); - for (std::uint32_t i = 0; i < count; ++i) { - fragment.setScalarOperand( - inst.sdst + i, {uint32T, fragment.context->getUInt32(data[i])}); - } - } else { - // FIXME: implement - // TODO: create uniform and do load from it - util::unreachable(); - } - - break; - } - - default: - inst.dump(); - util::unreachable(); - } -} -void convertVop3(Fragment &fragment, Vop3 inst) { - fragment.registers->pc += Vop3::kMinInstSize * sizeof(std::uint32_t); - - auto applyOmod = [&](Value result) -> Value { - switch (inst.omod) { - case 1: - return {result.type, fragment.builder.createFMul( - spirv::cast(result.type), - spirv::cast(result.value), - fragment.context->getFloat32(2))}; - - case 2: - return {result.type, fragment.builder.createFMul( - spirv::cast(result.type), - spirv::cast(result.value), - fragment.context->getFloat32(4))}; - case 3: - return {result.type, fragment.builder.createFDiv( - spirv::cast(result.type), - spirv::cast(result.value), - fragment.context->getFloat32(2))}; - - default: - case 0: - return result; - } - }; - - auto applyClamp = [&](Value result) -> Value { - if (inst.clmp) { - auto glslStd450 = fragment.context->getGlslStd450(); - result.value = fragment.builder.createExtInst( - result.type, glslStd450, GLSLstd450FClamp, - {{result.value, fragment.context->getFloat32(0), - fragment.context->getFloat32(1)}}); - } - - return result; - }; - - auto getSrc = [&](int index, TypeId type) -> Value { - std::uint32_t src = - index == 0 ? inst.src0 : (index == 1 ? inst.src1 : inst.src2); - - auto result = fragment.getScalarOperand(src, type); - - if (inst.abs & (1 << index)) { - auto glslStd450 = fragment.context->getGlslStd450(); - result.value = fragment.builder.createExtInst( - result.type, glslStd450, GLSLstd450FAbs, {{result.value}}); - } - - if (inst.neg & (1 << index)) { - result.value = fragment.builder.createFNegate( - spirv::cast(result.type), - spirv::cast(result.value)); - } - - return result; - }; - - auto getSdstSrc = [&](int index, TypeId type) -> Value { - std::uint32_t src = - index == 0 ? inst.src0 : (index == 1 ? inst.src1 : inst.src2); - - auto result = fragment.getScalarOperand(src, type); - - if (inst.neg & (1 << index)) { - result.value = fragment.builder.createFNegate( - spirv::cast(result.type), - spirv::cast(result.value)); - } - - return result; - }; - - auto roundEven = [&](spirv::Type type, spirv::Value value) { - auto glslStd450 = fragment.context->getGlslStd450(); - return Value{type, fragment.builder.createExtInst( - type, glslStd450, GLSLstd450RoundEven, {{value}})}; - }; - - auto cmpOp = [&](TypeId type, CmpKind kind, CmpFlags flags = CmpFlags::None) { - auto src0 = fragment.getScalarOperand(inst.src0, type).value; - auto src1 = fragment.getScalarOperand(inst.src1, type).value; - - std::int8_t typeMask = 0; - if (kind == CmpKind::CLASS) { - auto value = fragment.context->findSint32Value( - fragment.getScalarOperand(inst.src2, type).value); - - if (!value) { - // util::unreachable(); - typeMask = 2; - } else { - typeMask = *value; - } - } - - auto result = doCmpOp(fragment, type, src0, src1, kind, flags, typeMask); - fragment.setScalarOperand(inst.vdst, result); - fragment.setScalarOperand(inst.vdst + 1, {fragment.context->getUInt32Type(), - fragment.context->getUInt32(0)}); - }; - - switch (inst.op) { - case Vop3::Op::V3_CMP_F_F32: - cmpOp(TypeId::Float32, CmpKind::F); - break; - case Vop3::Op::V3_CMP_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG); - break; - case Vop3::Op::V3_CMP_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_O_F32: - cmpOp(TypeId::Float32, CmpKind::O); - break; - case Vop3::Op::V3_CMP_U_F32: - cmpOp(TypeId::Float32, CmpKind::U); - break; - case Vop3::Op::V3_CMP_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE); - break; - case Vop3::Op::V3_CMP_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG); - break; - case Vop3::Op::V3_CMP_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT); - break; - case Vop3::Op::V3_CMP_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE); - break; - case Vop3::Op::V3_CMP_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ); - break; - case Vop3::Op::V3_CMP_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT); - break; - case Vop3::Op::V3_CMP_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU); - break; - case Vop3::Op::V3_CMPX_F_F32: - cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_O_F32: - cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_U_F32: - cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::X); - break; - case Vop3::Op::V3_CMP_F_F64: - cmpOp(TypeId::Float64, CmpKind::F); - break; - case Vop3::Op::V3_CMP_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG); - break; - case Vop3::Op::V3_CMP_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_O_F64: - cmpOp(TypeId::Float64, CmpKind::O); - break; - case Vop3::Op::V3_CMP_U_F64: - cmpOp(TypeId::Float64, CmpKind::U); - break; - case Vop3::Op::V3_CMP_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE); - break; - case Vop3::Op::V3_CMP_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG); - break; - case Vop3::Op::V3_CMP_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT); - break; - case Vop3::Op::V3_CMP_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE); - break; - case Vop3::Op::V3_CMP_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ); - break; - case Vop3::Op::V3_CMP_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT); - break; - case Vop3::Op::V3_CMP_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU); - break; - case Vop3::Op::V3_CMPX_F_F64: - cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_O_F64: - cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_U_F64: - cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::X); - break; - case Vop3::Op::V3_CMPS_F_F32: - cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_O_F32: - cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_U_F32: - cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::S); - break; - case Vop3::Op::V3_CMPSX_F_F32: - cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_O_F32: - cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_U_F32: - cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPS_F_F64: - cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_O_F64: - cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_U_F64: - cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::S); - break; - case Vop3::Op::V3_CMPS_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::S); - break; - case Vop3::Op::V3_CMPSX_F_F64: - cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_O_F64: - cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_U_F64: - cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::SX); - break; - case Vop3::Op::V3_CMPSX_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::SX); - break; - case Vop3::Op::V3_CMP_F_I32: - cmpOp(TypeId::SInt32, CmpKind::F); - break; - case Vop3::Op::V3_CMP_LT_I32: - cmpOp(TypeId::SInt32, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_I32: - cmpOp(TypeId::SInt32, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_I32: - cmpOp(TypeId::SInt32, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_I32: - cmpOp(TypeId::SInt32, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_NE_I32: - cmpOp(TypeId::SInt32, CmpKind::NE); - break; - case Vop3::Op::V3_CMP_GE_I32: - cmpOp(TypeId::SInt32, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_T_I32: - cmpOp(TypeId::SInt32, CmpKind::T); - break; - case Vop3::Op::V3_CMP_CLASS_F32: - cmpOp(TypeId::Float32, CmpKind::CLASS); - break; - case Vop3::Op::V3_CMP_LT_I16: - cmpOp(TypeId::SInt16, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_I16: - cmpOp(TypeId::SInt16, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_I16: - cmpOp(TypeId::SInt16, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_I16: - cmpOp(TypeId::SInt16, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_NE_I16: - cmpOp(TypeId::SInt16, CmpKind::NE); - break; - case Vop3::Op::V3_CMP_GE_I16: - cmpOp(TypeId::SInt16, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_CLASS_F16: - cmpOp(TypeId::Float16, CmpKind::CLASS); - break; - case Vop3::Op::V3_CMPX_F_I32: - cmpOp(TypeId::SInt32, CmpKind::F, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_I32: - cmpOp(TypeId::SInt32, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_I32: - cmpOp(TypeId::SInt32, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_I32: - cmpOp(TypeId::SInt32, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_I32: - cmpOp(TypeId::SInt32, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NE_I32: - cmpOp(TypeId::SInt32, CmpKind::NE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_I32: - cmpOp(TypeId::SInt32, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_T_I32: - cmpOp(TypeId::SInt32, CmpKind::T, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_CLASS_F32: - cmpOp(TypeId::Float32, CmpKind::CLASS, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_I16: - cmpOp(TypeId::SInt16, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_I16: - cmpOp(TypeId::SInt16, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_I16: - cmpOp(TypeId::SInt16, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_I16: - cmpOp(TypeId::SInt16, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NE_I16: - cmpOp(TypeId::SInt16, CmpKind::NE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_I16: - cmpOp(TypeId::SInt16, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_CLASS_F16: - cmpOp(TypeId::Float16, CmpKind::CLASS, CmpFlags::X); - break; - case Vop3::Op::V3_CMP_F_I64: - cmpOp(TypeId::SInt64, CmpKind::F); - break; - case Vop3::Op::V3_CMP_LT_I64: - cmpOp(TypeId::SInt64, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_I64: - cmpOp(TypeId::SInt64, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_I64: - cmpOp(TypeId::SInt64, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_I64: - cmpOp(TypeId::SInt64, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_NE_I64: - cmpOp(TypeId::SInt64, CmpKind::NE); - break; - case Vop3::Op::V3_CMP_GE_I64: - cmpOp(TypeId::SInt64, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_T_I64: - cmpOp(TypeId::SInt64, CmpKind::T); - break; - case Vop3::Op::V3_CMP_CLASS_F64: - cmpOp(TypeId::Float64, CmpKind::CLASS); - break; - case Vop3::Op::V3_CMP_LT_U16: - cmpOp(TypeId::UInt16, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_U16: - cmpOp(TypeId::UInt16, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_U16: - cmpOp(TypeId::UInt16, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_U16: - cmpOp(TypeId::UInt16, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_NE_U16: - cmpOp(TypeId::UInt16, CmpKind::NE); - break; - case Vop3::Op::V3_CMP_GE_U16: - cmpOp(TypeId::UInt16, CmpKind::GE); - break; - case Vop3::Op::V3_CMPX_F_I64: - cmpOp(TypeId::SInt64, CmpKind::F, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_I64: - cmpOp(TypeId::SInt64, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_I64: - cmpOp(TypeId::SInt64, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_I64: - cmpOp(TypeId::SInt64, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_I64: - cmpOp(TypeId::SInt64, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NE_I64: - cmpOp(TypeId::SInt64, CmpKind::NE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_I64: - cmpOp(TypeId::SInt64, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_T_I64: - cmpOp(TypeId::SInt64, CmpKind::T, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_CLASS_F64: - cmpOp(TypeId::Float64, CmpKind::CLASS, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_U16: - cmpOp(TypeId::UInt16, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_U16: - cmpOp(TypeId::UInt16, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_U16: - cmpOp(TypeId::UInt16, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_U16: - cmpOp(TypeId::UInt16, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NE_U16: - cmpOp(TypeId::UInt16, CmpKind::NE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_U16: - cmpOp(TypeId::UInt16, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMP_F_U32: - cmpOp(TypeId::UInt32, CmpKind::F); - break; - case Vop3::Op::V3_CMP_LT_U32: - cmpOp(TypeId::UInt32, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_U32: - cmpOp(TypeId::UInt32, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_U32: - cmpOp(TypeId::UInt32, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_U32: - cmpOp(TypeId::UInt32, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_NE_U32: - cmpOp(TypeId::UInt32, CmpKind::NE); - break; - case Vop3::Op::V3_CMP_GE_U32: - cmpOp(TypeId::UInt32, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_T_U32: - cmpOp(TypeId::UInt32, CmpKind::T); - break; - case Vop3::Op::V3_CMP_F_F16: - cmpOp(TypeId::Float16, CmpKind::F); - break; - case Vop3::Op::V3_CMP_LT_F16: - cmpOp(TypeId::Float16, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_F16: - cmpOp(TypeId::Float16, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_F16: - cmpOp(TypeId::Float16, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_F16: - cmpOp(TypeId::Float16, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_LG_F16: - cmpOp(TypeId::Float16, CmpKind::LG); - break; - case Vop3::Op::V3_CMP_GE_F16: - cmpOp(TypeId::Float16, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_O_F16: - cmpOp(TypeId::Float16, CmpKind::O); - break; - case Vop3::Op::V3_CMPX_F_U32: - cmpOp(TypeId::UInt32, CmpKind::F, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_U32: - cmpOp(TypeId::UInt32, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_U32: - cmpOp(TypeId::UInt32, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_U32: - cmpOp(TypeId::UInt32, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_U32: - cmpOp(TypeId::UInt32, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NE_U32: - cmpOp(TypeId::UInt32, CmpKind::NE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_U32: - cmpOp(TypeId::UInt32, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_T_U32: - cmpOp(TypeId::UInt32, CmpKind::T, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_F_F16: - cmpOp(TypeId::Float16, CmpKind::F, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_F16: - cmpOp(TypeId::Float16, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_F16: - cmpOp(TypeId::Float16, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_F16: - cmpOp(TypeId::Float16, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_F16: - cmpOp(TypeId::Float16, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LG_F16: - cmpOp(TypeId::Float16, CmpKind::LG, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_F16: - cmpOp(TypeId::Float16, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_O_F16: - cmpOp(TypeId::Float16, CmpKind::O, CmpFlags::X); - break; - case Vop3::Op::V3_CMP_F_U64: - cmpOp(TypeId::UInt64, CmpKind::F); - break; - case Vop3::Op::V3_CMP_LT_U64: - cmpOp(TypeId::UInt64, CmpKind::LT); - break; - case Vop3::Op::V3_CMP_EQ_U64: - cmpOp(TypeId::UInt64, CmpKind::EQ); - break; - case Vop3::Op::V3_CMP_LE_U64: - cmpOp(TypeId::UInt64, CmpKind::LE); - break; - case Vop3::Op::V3_CMP_GT_U64: - cmpOp(TypeId::UInt64, CmpKind::GT); - break; - case Vop3::Op::V3_CMP_NE_U64: - cmpOp(TypeId::UInt64, CmpKind::NE); - break; - case Vop3::Op::V3_CMP_GE_U64: - cmpOp(TypeId::UInt64, CmpKind::GE); - break; - case Vop3::Op::V3_CMP_T_U64: - cmpOp(TypeId::UInt64, CmpKind::T); - break; - case Vop3::Op::V3_CMP_U_F16: - cmpOp(TypeId::Float16, CmpKind::U); - break; - case Vop3::Op::V3_CMP_NGE_F16: - cmpOp(TypeId::Float16, CmpKind::NGE); - break; - case Vop3::Op::V3_CMP_NLG_F16: - cmpOp(TypeId::Float16, CmpKind::NLG); - break; - case Vop3::Op::V3_CMP_NGT_F16: - cmpOp(TypeId::Float16, CmpKind::NGT); - break; - case Vop3::Op::V3_CMP_NLE_F16: - cmpOp(TypeId::Float16, CmpKind::NLE); - break; - case Vop3::Op::V3_CMP_NEQ_F16: - cmpOp(TypeId::Float16, CmpKind::NEQ); - break; - case Vop3::Op::V3_CMP_NLT_F16: - cmpOp(TypeId::Float16, CmpKind::NLT); - break; - case Vop3::Op::V3_CMP_TRU_F16: - cmpOp(TypeId::Float16, CmpKind::TRU); - break; - case Vop3::Op::V3_CMPX_F_U64: - cmpOp(TypeId::UInt64, CmpKind::F, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LT_U64: - cmpOp(TypeId::UInt64, CmpKind::LT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_EQ_U64: - cmpOp(TypeId::UInt64, CmpKind::EQ, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_LE_U64: - cmpOp(TypeId::UInt64, CmpKind::LE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GT_U64: - cmpOp(TypeId::UInt64, CmpKind::GT, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_NE_U64: - cmpOp(TypeId::UInt64, CmpKind::NE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_GE_U64: - cmpOp(TypeId::UInt64, CmpKind::GE, CmpFlags::X); - break; - case Vop3::Op::V3_CMPX_T_U64: - cmpOp(TypeId::UInt64, CmpKind::T, CmpFlags::X); - break; - - case Vop3::Op::V3_RCP_F32: { - auto src = getSrc(0, TypeId::Float32); - auto floatT = fragment.context->getFloat32Type(); - auto float1 = fragment.context->getFloat32(1); - auto resultValue = fragment.builder.createFDiv( - floatT, float1, spirv::cast(src.value)); - auto result = applyClamp(applyOmod({floatT, resultValue})); - - fragment.setVectorOperand(inst.vdst, roundEven(result.type, result.value)); - break; - } - - case Vop3::Op::V3_ADD_I32: { - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto src1 = fragment.getScalarOperand(inst.src1, TypeId::UInt32).value; - auto uintT = fragment.context->getType(TypeId::UInt32); - auto resultStruct = - fragment.context->getStructType(std::array{uintT, uintT}); - auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1); - fragment.setVectorOperand( - inst.vdst, - {uintT, fragment.builder.createCompositeExtract( - uintT, result, std::array{static_cast(0)})}); - fragment.setScalarOperand( - inst.sdst, - {uintT, fragment.builder.createCompositeExtract( - uintT, result, std::array{static_cast(1)})}); - fragment.setScalarOperand(inst.sdst + 1, - {uintT, fragment.context->getUInt32(0)}); - break; - } - - case Vop3::Op::V3_MOV_B32: { - auto src0 = getSrc(0, TypeId::Float32); - - fragment.setVectorOperand(inst.vdst, src0); - break; - } - - case Vop3::Op::V3_ADD_F32: { - auto floatT = fragment.context->getFloat32Type(); - auto src0 = getSrc(0, TypeId::Float32); - auto src1 = getSrc(1, TypeId::Float32); - auto resultValue = fragment.builder.createFAdd( - floatT, spirv::cast(src0.value), - spirv::cast(src1.value)); - auto result = applyClamp(applyOmod({floatT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - - case Vop3::Op::V3_SUB_F32: { - auto floatT = fragment.context->getFloat32Type(); - auto src0 = getSrc(0, TypeId::Float32); - auto src1 = getSrc(1, TypeId::Float32); - auto resultValue = fragment.builder.createFSub( - floatT, spirv::cast(src0.value), - spirv::cast(src1.value)); - auto result = applyClamp(applyOmod({floatT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - - case Vop3::Op::V3_MUL_F32: { - auto floatT = fragment.context->getFloat32Type(); - auto src0 = getSrc(0, TypeId::Float32); - auto src1 = getSrc(1, TypeId::Float32); - auto resultValue = fragment.builder.createFMul( - floatT, spirv::cast(src0.value), - spirv::cast(src1.value)); - auto result = applyClamp(applyOmod({floatT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - case Vop3::Op::V3_MUL_LO_U32: { - auto resultT = fragment.context->getUInt32Type(); - auto src0 = getSrc(0, TypeId::UInt32); - auto src1 = getSrc(1, TypeId::UInt32); - auto resultValue = fragment.builder.createIMul( - resultT, spirv::cast(src0.value), - spirv::cast(src1.value)); - auto result = applyClamp(applyOmod({resultT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - case Vop3::Op::V3_MUL_LO_I32: { - auto resultT = fragment.context->getSint32Type(); - auto src0 = getSrc(0, TypeId::SInt32); - auto src1 = getSrc(1, TypeId::SInt32); - auto resultValue = fragment.builder.createIMul( - resultT, spirv::cast(src0.value), - spirv::cast(src1.value)); - auto result = applyClamp(applyOmod({resultT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - case Vop3::Op::V3_MUL_HI_I32: { - auto resultT = fragment.context->getSint32Type(); - auto src0 = getSrc(0, TypeId::SInt32); - auto src1 = getSrc(1, TypeId::SInt32); - - auto sint64T = fragment.context->getSint64Type(); - - auto src0_64 = fragment.builder.createSConvert( - sint64T, spirv::cast(src0.value)); - auto src1_64 = fragment.builder.createSConvert( - sint64T, spirv::cast(src1.value)); - - auto resultValue64 = fragment.builder.createIMul( - sint64T, spirv::cast(src0_64), - spirv::cast(src1_64)); - - resultValue64 = fragment.builder.createShiftRightLogical( - sint64T, resultValue64, fragment.context->getUInt32(32)); - auto resultValue = fragment.builder.createSConvert(resultT, resultValue64); - auto result = applyClamp(applyOmod({resultT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - case Vop3::Op::V3_MUL_HI_U32: { - auto resultT = fragment.context->getUInt32Type(); - auto src0 = spirv::cast(getSrc(0, TypeId::UInt32).value); - auto src1 = spirv::cast(getSrc(1, TypeId::UInt32).value); - - auto uint64T = fragment.context->getUInt64Type(); - - auto src0_64 = fragment.builder.createUConvert(uint64T, src0); - auto src1_64 = fragment.builder.createUConvert(uint64T, src1); - - auto resultValue64 = fragment.builder.createIMul(uint64T, src0_64, src1_64); - - resultValue64 = fragment.builder.createShiftRightLogical( - uint64T, resultValue64, fragment.context->getUInt32(32)); - auto resultValue = fragment.builder.createUConvert(resultT, resultValue64); - auto result = applyClamp(applyOmod({resultT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - - case Vop3::Op::V3_MAC_F32: { - auto floatT = fragment.context->getFloat32Type(); - auto src0 = getSrc(0, TypeId::Float32); - auto src1 = getSrc(1, TypeId::Float32); - - auto dst = spirv::cast( // FIXME: should use src2? - fragment.getVectorOperand(inst.vdst, TypeId::Float32).value); - - auto resultValue = fragment.builder.createFAdd( - floatT, - fragment.builder.createFMul(floatT, - spirv::cast(src0.value), - spirv::cast(src1.value)), - dst); - - auto result = applyClamp(applyOmod({floatT, resultValue})); - - fragment.setVectorOperand(inst.vdst, result); - break; - } - case Vop3::Op::V3_MAD_U32_U24: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::UInt32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::UInt32).value); - auto operandT = fragment.context->getUInt32Type(); - - src0 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src0, fragment.context->getUInt32((1 << 24) - 1))); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32((1 << 24) - 1))); - - auto result = fragment.builder.createIAdd( - operandT, fragment.builder.createIMul(operandT, src0, src1), src2); - - fragment.setVectorOperand(inst.vdst, {operandT, result}); - break; - } - case Vop3::Op::V3_MAD_I32_I24: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::SInt32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::SInt32).value); - auto operandT = fragment.context->getSint32Type(); - - src0 = fragment.builder.createShiftLeftLogical( - operandT, src0, fragment.context->getUInt32(8)); - src0 = fragment.builder.createShiftRightArithmetic( - operandT, src0, fragment.context->getUInt32(8)); - src1 = fragment.builder.createShiftLeftLogical( - operandT, src1, fragment.context->getUInt32(8)); - src1 = fragment.builder.createShiftRightArithmetic( - operandT, src1, fragment.context->getUInt32(8)); - - auto result = fragment.builder.createIAdd( - operandT, fragment.builder.createIMul(operandT, src0, src1), src2); - - fragment.setVectorOperand(inst.vdst, {operandT, result}); - break; - } - case Vop3::Op::V3_MUL_U32_U24: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::UInt32).value); - auto operandT = fragment.context->getUInt32Type(); - - src0 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src0, fragment.context->getUInt32((1 << 24) - 1))); - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32((1 << 24) - 1))); - - auto result = fragment.builder.createIMul(operandT, src0, src1); - - fragment.setVectorOperand(inst.vdst, {operandT, result}); - break; - } - case Vop3::Op::V3_MUL_I32_I24: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::SInt32).value); - auto operandT = fragment.context->getSint32Type(); - - src0 = fragment.builder.createShiftLeftLogical( - operandT, src0, fragment.context->getUInt32(8)); - src0 = fragment.builder.createShiftRightArithmetic( - operandT, src0, fragment.context->getUInt32(8)); - src1 = fragment.builder.createShiftLeftLogical( - operandT, src1, fragment.context->getUInt32(8)); - src1 = fragment.builder.createShiftRightArithmetic( - operandT, src1, fragment.context->getUInt32(8)); - - auto result = fragment.builder.createIMul(operandT, src0, src1); - - fragment.setVectorOperand(inst.vdst, {operandT, result}); - break; - } - case Vop3::Op::V3_MAD_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::Float32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto result = fragment.builder.createFAdd( - floatT, fragment.builder.createFMul(floatT, src0, src1), src2); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop3::Op::V3_MAX_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::Float32).value); - - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - - auto result = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdGreaterThanEqual(boolT, src0, src1), - src0, src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop3::Op::V3_MAX3_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::Float32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - - auto max01 = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdGreaterThanEqual(boolT, src0, src1), - src0, src1); - auto result = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdGreaterThanEqual(boolT, max01, src2), - max01, src2); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop3::Op::V3_MIN_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::Float32).value); - - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - - auto result = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdLessThan(boolT, src0, src1), src0, - src1); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop3::Op::V3_MIN3_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::Float32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto boolT = fragment.context->getBoolType(); - - auto min01 = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdLessThan(boolT, src0, src1), src0, - src1); - auto result = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdLessThan(boolT, min01, src2), min01, - src2); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop3::Op::V3_MED3_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::Float32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::Float32).value); - auto boolT = fragment.context->getBoolType(); - auto floatT = fragment.context->getFloat32Type(); - auto glslStd450 = fragment.context->getGlslStd450(); - - auto min01 = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdLessThan(boolT, src0, src1), src0, - src1); - auto max01 = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdGreaterThan(boolT, src0, src1), src0, - src1); - auto minMax011 = fragment.builder.createSelect( - floatT, fragment.builder.createFOrdLessThan(boolT, max01, src2), max01, - src2); - - auto result = fragment.builder.createExtInst( - floatT, glslStd450, GLSLstd450NMax, {{min01, minMax011}}); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - } - case Vop3::Op::V3_FMA_F32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::Float32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto glslStd450 = fragment.context->getGlslStd450(); - - auto result = fragment.builder.createExtInst( - floatT, glslStd450, GLSLstd450Fma, {{src0, src1, src2}}); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop3::Op::V3_CNDMASK_B32: { - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto src1 = fragment.getScalarOperand(inst.src1, TypeId::UInt32).value; - auto src2 = fragment.getScalarOperand(inst.src2, TypeId::UInt32).value; - - auto cmp = fragment.builder.createINotEqual( - fragment.context->getBoolType(), src2, fragment.context->getUInt32(0)); - - auto uint32T = fragment.context->getUInt32Type(); - auto result = fragment.builder.createSelect(uint32T, cmp, src1, src0); - fragment.setVectorOperand(inst.vdst, {uint32T, result}); - break; - } - - case Vop3::Op::V3_BFE_U32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.src1, TypeId::UInt32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::UInt32).value); - - auto operandT = fragment.context->getUInt32Type(); - - auto voffset = - spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32(0x1f))); - auto vsize = - spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src2, fragment.context->getUInt32(0x1f))); - auto field = - fragment.builder.createShiftRightLogical(operandT, src0, voffset); - auto mask = fragment.builder.createISub( - operandT, - fragment.builder.createShiftLeftLogical( - operandT, fragment.context->getUInt32(1), vsize), - fragment.context->getUInt32(1)); - - auto resultT = fragment.context->getUInt32Type(); - auto result = fragment.builder.createSelect( - operandT, - fragment.builder.createIEqual(fragment.context->getBoolType(), vsize, - fragment.context->getUInt32(0)), - fragment.context->getUInt32(0), - fragment.builder.createBitwiseAnd(operandT, field, mask)); - fragment.setVectorOperand(inst.vdst, {resultT, result}); - break; - } - - case Vop3::Op::V3_CVT_PKRTZ_F16_F32: { - auto float2T = fragment.context->getType(TypeId::Float32x2); - auto uintT = fragment.context->getType(TypeId::UInt32); - auto glslStd450 = fragment.context->getGlslStd450(); - - auto src0 = fragment.getScalarOperand(inst.src0, TypeId::Float32).value; - auto src1 = fragment.getScalarOperand(inst.src1, TypeId::Float32).value; - - auto src = fragment.builder.createCompositeConstruct( - float2T, std::array{src0, src1}); - auto dst = fragment.builder.createExtInst( - uintT, glslStd450, GLSLstd450PackHalf2x16, std::array{src}); - - fragment.setVectorOperand(inst.vdst, {uintT, dst}); - break; - } - - case Vop3::Op::V3_SAD_U32: { - auto src0 = spirv::cast(getSrc(0, TypeId::UInt32).value); - auto src1 = spirv::cast(getSrc(1, TypeId::UInt32).value); - auto src2 = spirv::cast(getSrc(2, TypeId::UInt32).value); - - auto uint32T = fragment.context->getUInt32Type(); - auto sint32T = fragment.context->getSint32Type(); - - auto diff = fragment.builder.createISub(uint32T, src0, src1); - auto sdiff = fragment.builder.createBitcast(sint32T, diff); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto sabsdiff = fragment.builder.createExtInst(sint32T, glslStd450, - GLSLstd450SAbs, {{sdiff}}); - - auto absdiff = fragment.builder.createBitcast(uint32T, sabsdiff); - auto result = fragment.builder.createIAdd(uint32T, absdiff, src2); - fragment.setVectorOperand(inst.vdst, {uint32T, result}); - break; - } - case Vop3::Op::V3_RSQ_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst( - floatT, glslStd450, GLSLstd450InverseSqrt, {{src}}); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - default: - inst.dump(); - util::unreachable(); - } -} - -void convertMubuf(Fragment &fragment, Mubuf inst) { - fragment.registers->pc += Mubuf::kMinInstSize * sizeof(std::uint32_t); - /* - printMubufOpcode(op); - printf(" "); - printVectorOperand(vdata, inst + instSize); - printf(", "); - printScalarOperand(srsrc << 2, inst + instSize); - printf(", "); - printScalarOperand(soffset, inst + instSize); - */ - - auto getSOffset = [&](std::int32_t adv = 0) -> spirv::UIntValue { - auto resultT = fragment.context->getUInt32Type(); - auto resultV = - fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value; - auto result = spirv::cast(resultV); - - if (adv != 0) { - if (auto constVal = fragment.context->findSint32Value(result)) { - return fragment.context->getUInt32(*constVal + adv); - } - - result = fragment.builder.createIAdd(resultT, result, - fragment.context->getUInt32(adv)); - } - - return result; - }; - - auto getVBuffer = [&] { - auto vBuffer0 = - fragment.getScalarOperand((inst.srsrc << 2) + 0, TypeId::UInt32); - auto vBuffer1 = - fragment.getScalarOperand((inst.srsrc << 2) + 1, TypeId::UInt32); - auto vBuffer2 = - fragment.getScalarOperand((inst.srsrc << 2) + 2, TypeId::UInt32); - auto vBuffer3 = - fragment.getScalarOperand((inst.srsrc << 2) + 3, TypeId::UInt32); - - auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); - auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); - auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); - auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); - - if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && - optVBuffer3Value) { - // V# buffer value is known, read the buffer now - std::array vBufferData = { - *optVBuffer0Value, *optVBuffer1Value, *optVBuffer2Value, - *optVBuffer3Value}; - - GnmVBuffer result; - std::memcpy(&result, vBufferData.data(), sizeof(result)); - return result; - } - - util::unreachable(); - }; - - auto getAddress = [&](GnmVBuffer *vbuffer) { - auto &builder = fragment.builder; - auto uint32T = fragment.context->getUInt32Type(); - - spirv::UIntValue index; - if (inst.idxen) { - index = spirv::cast( - fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value); - } - - // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); - - if (vbuffer->addtid_en) { - spirv::UIntValue threadId = - builder.createLoad(uint32T, fragment.context->getThreadId()); - - if (index) { - index = builder.createIAdd(uint32T, index, threadId); - } else { - index = threadId; - } - } - - auto offset = inst.offset ? fragment.context->getUInt32(inst.offset) - : spirv::UIntValue{}; - - if (inst.offen) { - auto off = spirv::cast( - fragment - .getVectorOperand(inst.vaddr + (inst.idxen ? 1 : 0), - TypeId::UInt32) - .value); - - if (offset) { - offset = builder.createIAdd(uint32T, off, offset); - } else { - offset = off; - } - } - - spirv::UIntValue address = getSOffset(); - - if (vbuffer->swizzle_en == 0) { - if (vbuffer->stride == 0 || !index) { - return address; - } - - auto offset = builder.createIMul( - uint32T, index, fragment.context->getUInt32(vbuffer->stride)); - if (address == fragment.context->getUInt32(0)) { - return offset; - } - - return builder.createIAdd(uint32T, address, offset); - } - - if (!index && !offset) { - return address; - } - - if (index && offset) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto index_msb = builder.createUDiv(uint32T, index, indexStride); - auto index_lsb = builder.createUMod(uint32T, index, indexStride); - - auto elementSize = fragment.context->getUInt32(vbuffer->element_size); - auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); - auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); - - auto indexMsb = builder.createIMul( - uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); - auto offsetMsb = builder.createIMul( - uint32T, offset_msb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, - builder.createIAdd(uint32T, indexMsb, offsetMsb), - indexStride)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, index_lsb, elementSize)); - - return builder.createIAdd(uint32T, address, offset_lsb); - } - - if (index) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto index_msb = builder.createUDiv(uint32T, index, indexStride); - auto index_lsb = builder.createUMod(uint32T, index, indexStride); - - auto indexMsb = builder.createIMul( - uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); - - return builder.createIAdd( - uint32T, address, builder.createIMul(uint32T, indexMsb, indexStride)); - } - - if (!offset) { - util::unreachable(); - } - - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto elementSize = fragment.context->getUInt32(vbuffer->element_size); - auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); - auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); - - auto offsetMsb = - builder.createIMul(uint32T, offset_msb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, builder.createIMul(uint32T, offsetMsb, indexStride)); - - return builder.createIAdd(uint32T, address, offset_lsb); - }; - - switch (inst.op) { - case Mubuf::Op::BUFFER_LOAD_FORMAT_X: - case Mubuf::Op::BUFFER_LOAD_FORMAT_XY: - case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZ: - case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZW: { - std::uint32_t count = static_cast(inst.op) - - static_cast(Mubuf::Op::BUFFER_LOAD_FORMAT_X) + 1; - - auto vbuffer = getVBuffer(); - if (vbuffer.dfmt != kSurfaceFormatInvalid) { - auto address = getAddress(&vbuffer); - - spirv::Value result[4]; - auto resultType = convertFromFormat( - result, count, fragment, reinterpret_cast(&vbuffer), - address, vbuffer.dfmt, vbuffer.nfmt); - - for (std::uint32_t i = 0; i < count; ++i) { - fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]}); - } - } else { - auto floatT = fragment.context->getFloat32Type(); - auto zero = fragment.context->getFloat32(0); - for (std::uint32_t i = 0; i < count; ++i) { - fragment.setVectorOperand(inst.vdata + i, {floatT, zero}); - } - } - break; - } - - case Mubuf::Op::BUFFER_STORE_FORMAT_X: - case Mubuf::Op::BUFFER_STORE_FORMAT_XY: - case Mubuf::Op::BUFFER_STORE_FORMAT_XYZ: - case Mubuf::Op::BUFFER_STORE_FORMAT_XYZW: { - std::uint32_t count = static_cast(inst.op) - - static_cast(Mubuf::Op::BUFFER_STORE_FORMAT_X) + - 1; - - auto vbuffer = getVBuffer(); - if (vbuffer.dfmt != kSurfaceFormatInvalid) { - auto address = getAddress(&vbuffer); - - convertToFormat(RegisterId::Vector(inst.vdata), count, fragment, - reinterpret_cast(&vbuffer), address, - vbuffer.dfmt, vbuffer.nfmt); - } - break; - } - - case Mubuf::Op::BUFFER_LOAD_UBYTE: - case Mubuf::Op::BUFFER_LOAD_USHORT: - case Mubuf::Op::BUFFER_LOAD_SSHORT: - case Mubuf::Op::BUFFER_LOAD_SBYTE: - inst.dump(); - util::unreachable(); - - case Mubuf::Op::BUFFER_LOAD_DWORD: - case Mubuf::Op::BUFFER_LOAD_DWORDX2: - case Mubuf::Op::BUFFER_LOAD_DWORDX4: - case Mubuf::Op::BUFFER_LOAD_DWORDX3: { - std::uint32_t count = static_cast(inst.op) - - static_cast(Mubuf::Op::BUFFER_LOAD_DWORD) + 1; - - auto vbuffer = getVBuffer(); - auto address = getAddress(&vbuffer); - auto loadType = fragment.context->getType(TypeId::UInt32); - auto uniform = fragment.context->getOrCreateStorageBuffer( - reinterpret_cast(&vbuffer), TypeId::UInt32); - uniform->accessOp |= AccessOp::Load; - - auto uniformPointerType = fragment.context->getPointerType( - spv::StorageClass::StorageBuffer, TypeId::UInt32); - address = - fragment.builder.createUDiv(fragment.context->getUInt32Type(), address, - fragment.context->getUInt32(4)); - - for (int i = 0; i < count; ++i) { - auto channelOffset = address; - - if (i != 0) { - channelOffset = fragment.builder.createIAdd( - fragment.context->getUInt32Type(), channelOffset, - fragment.context->getUInt32(i)); - } - - auto uniformPointerValue = fragment.builder.createAccessChain( - uniformPointerType, uniform->variable, - {{fragment.context->getUInt32(0), channelOffset}}); - - auto value = fragment.builder.createLoad(loadType, uniformPointerValue); - fragment.setVectorOperand(inst.vdata + i, {loadType, value}); - } - break; - } - - case Mubuf::Op::BUFFER_STORE_BYTE: - case Mubuf::Op::BUFFER_STORE_SHORT: - inst.dump(); - util::unreachable(); - - case Mubuf::Op::BUFFER_STORE_DWORD: - case Mubuf::Op::BUFFER_STORE_DWORDX2: - case Mubuf::Op::BUFFER_STORE_DWORDX4: - case Mubuf::Op::BUFFER_STORE_DWORDX3: { - std::uint32_t count = static_cast(inst.op) - - static_cast(Mubuf::Op::BUFFER_STORE_DWORD) + 1; - - auto vbuffer = getVBuffer(); - auto address = getAddress(&vbuffer); - auto uniform = fragment.context->getOrCreateStorageBuffer( - reinterpret_cast(&vbuffer), TypeId::UInt32); - uniform->accessOp |= AccessOp::Store; - - auto uniformPointerType = fragment.context->getPointerType( - spv::StorageClass::StorageBuffer, TypeId::UInt32); - address = - fragment.builder.createUDiv(fragment.context->getUInt32Type(), address, - fragment.context->getUInt32(4)); - - for (int i = 0; i < count; ++i) { - auto channelOffset = address; - - if (i != 0) { - channelOffset = fragment.builder.createIAdd( - fragment.context->getUInt32Type(), channelOffset, - fragment.context->getUInt32(i)); - } - - auto uniformPointerValue = fragment.builder.createAccessChain( - uniformPointerType, uniform->variable, - {{fragment.context->getUInt32(0), channelOffset}}); - - fragment.builder.createStore( - uniformPointerValue, - fragment.getVectorOperand(inst.vdata + i, TypeId::UInt32).value); - } - } - - default: - inst.dump(); - util::unreachable(); - } -} - -void convertMtbuf(Fragment &fragment, Mtbuf inst) { - fragment.registers->pc += Mtbuf::kMinInstSize * sizeof(std::uint32_t); - - switch (inst.op) { - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_X: - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XY: - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZ: - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZW: { - std::uint32_t count = static_cast(inst.op) - - static_cast(Mtbuf::Op::TBUFFER_LOAD_FORMAT_X) + - 1; - - auto &builder = fragment.builder; - - auto vBuffer0 = - fragment.getScalarOperand((inst.srsrc << 2) + 0, TypeId::UInt32); - auto vBuffer1 = - fragment.getScalarOperand((inst.srsrc << 2) + 1, TypeId::UInt32); - auto vBuffer2 = - fragment.getScalarOperand((inst.srsrc << 2) + 2, TypeId::UInt32); - auto vBuffer3 = - fragment.getScalarOperand((inst.srsrc << 2) + 3, TypeId::UInt32); - - auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); - auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); - auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); - auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); - - if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && - optVBuffer3Value) { - // V# buffer value is known, read the buffer now - std::uint32_t vBufferData[] = {*optVBuffer0Value, *optVBuffer1Value, - *optVBuffer2Value, *optVBuffer3Value}; - - auto vbuffer = reinterpret_cast(vBufferData); - std::fprintf(stderr, "address0: %lx\n", vbuffer->getAddress()); - auto base = spirv::cast( - fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value); - - auto uint32T = fragment.context->getUInt32Type(); - auto uint32_0 = fragment.context->getUInt32(0); - - if (inst.dfmt == kSurfaceFormatInvalid) { - util::unreachable("!! dfmt is invalid !!\n"); - - for (std::uint32_t i = 0; i < count; ++i) { - fragment.setVectorOperand(inst.vdata + i, {uint32T, uint32_0}); - } - - return; - } - - spirv::UIntValue index; - if (inst.idxen) { - index = spirv::cast( - fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value); - } - - // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); - - if (vbuffer->addtid_en) { - spirv::UIntValue threadId = - builder.createLoad(uint32T, fragment.context->getThreadId()); - - if (index) { - index = builder.createIAdd(uint32T, index, threadId); - } else { - index = threadId; - } - } - - auto offset = inst.offset ? fragment.context->getUInt32(inst.offset) - : spirv::UIntValue{}; - - if (inst.offen) { - auto off = spirv::cast( - fragment - .getVectorOperand(inst.vaddr + (inst.idxen ? 1 : 0), - TypeId::UInt32) - .value); - - if (offset) { - offset = builder.createIAdd(uint32T, off, offset); - } else { - offset = off; - } - } - - spirv::UIntValue address = base; - if (vbuffer->swizzle_en == 0) { - if (vbuffer->stride != 0 && index) { - auto offset = builder.createIMul( - uint32T, index, fragment.context->getUInt32(vbuffer->stride)); - if (address == uint32_0) { - address = offset; - } else { - address = builder.createIAdd(uint32T, address, offset); - } - } - } else { - if (index && offset) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto index_msb = builder.createUDiv(uint32T, index, indexStride); - auto index_lsb = builder.createUMod(uint32T, index, indexStride); - - auto elementSize = fragment.context->getUInt32(vbuffer->element_size); - auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); - auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); - - auto indexMsb = builder.createIMul( - uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); - auto offsetMsb = builder.createIMul( - uint32T, offset_msb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul( - uint32T, builder.createIAdd(uint32T, indexMsb, offsetMsb), - indexStride)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, index_lsb, elementSize)); - - address = builder.createIAdd(uint32T, address, offset_lsb); - } else if (index) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto index_msb = builder.createUDiv(uint32T, index, indexStride); - auto index_lsb = builder.createUMod(uint32T, index, indexStride); - - auto indexMsb = builder.createIMul( - uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); - - auto indexLsb = builder.createIMul( - uint32T, index_lsb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, indexMsb, indexStride)); - - address = builder.createIAdd(uint32T, address, indexLsb); - } else if (offset) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto elementSize = fragment.context->getUInt32(vbuffer->element_size); - auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); - auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); - - auto offsetMsb = builder.createIMul( - uint32T, offset_msb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, offsetMsb, indexStride)); - - address = builder.createIAdd(uint32T, address, offset_lsb); - } - } - - spirv::Value result[4]; - auto resultType = convertFromFormat(result, count, fragment, vBufferData, - address, inst.dfmt, inst.nfmt); - - for (std::uint32_t i = 0; i < count; ++i) { - fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]}); - } - break; - } else { - util::unreachable(); - } - } - - case Mtbuf::Op::TBUFFER_STORE_FORMAT_X: - case Mtbuf::Op::TBUFFER_STORE_FORMAT_XY: - case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZ: - case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZW: { - std::uint32_t count = static_cast(inst.op) - - static_cast(Mtbuf::Op::TBUFFER_STORE_FORMAT_X) + - 1; - auto &builder = fragment.builder; - - auto vBuffer0 = - fragment.getScalarOperand((inst.srsrc << 2) + 0, TypeId::UInt32); - auto vBuffer1 = - fragment.getScalarOperand((inst.srsrc << 2) + 1, TypeId::UInt32); - auto vBuffer2 = - fragment.getScalarOperand((inst.srsrc << 2) + 2, TypeId::UInt32); - auto vBuffer3 = - fragment.getScalarOperand((inst.srsrc << 2) + 3, TypeId::UInt32); - - auto optVBuffer0Value = fragment.context->findUint32Value(vBuffer0.value); - auto optVBuffer1Value = fragment.context->findUint32Value(vBuffer1.value); - auto optVBuffer2Value = fragment.context->findUint32Value(vBuffer2.value); - auto optVBuffer3Value = fragment.context->findUint32Value(vBuffer3.value); - - if (optVBuffer0Value && optVBuffer1Value && optVBuffer2Value && - optVBuffer3Value) { - // V# buffer value is known, read the buffer now - std::uint32_t vBufferData[] = {*optVBuffer0Value, *optVBuffer1Value, - *optVBuffer2Value, *optVBuffer3Value}; - - auto vbuffer = reinterpret_cast(vBufferData); - // std::printf("vBuffer address = %lx\n", vbuffer->getAddress()); - - auto base = spirv::cast( - fragment.getScalarOperand(inst.soffset, TypeId::UInt32).value); - - auto uint32T = fragment.context->getUInt32Type(); - auto uint32_0 = fragment.context->getUInt32(0); - - if (inst.dfmt == kSurfaceFormatInvalid) { - util::unreachable("!! dfmt is invalid !!\n"); - - for (std::uint32_t i = 0; i < count; ++i) { - fragment.setVectorOperand(inst.vdata + i, {uint32T, uint32_0}); - } - - return; - } - - spirv::UIntValue index; - if (inst.idxen) { - index = spirv::cast( - fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value); - } - - if (vbuffer->addtid_en) { - spirv::UIntValue threadId = - builder.createLoad(uint32T, fragment.context->getThreadId()); - - if (index) { - index = builder.createIAdd(uint32T, index, threadId); - } else { - index = threadId; - } - } - - auto offset = inst.offset ? fragment.context->getUInt32(inst.offset) - : spirv::UIntValue{}; - - if (inst.offen) { - auto off = spirv::cast( - fragment - .getVectorOperand(inst.vaddr + (inst.idxen ? 1 : 0), - TypeId::UInt32) - .value); - - if (offset) { - offset = builder.createIAdd(uint32T, off, offset); - } else { - offset = off; - } - } - - spirv::UIntValue address = base; - if (vbuffer->swizzle_en == 0) { - if (vbuffer->stride != 0 && index) { - auto offset = builder.createIMul( - uint32T, index, fragment.context->getUInt32(vbuffer->stride)); - if (address == uint32_0) { - address = offset; - } else { - address = builder.createIAdd(uint32T, address, offset); - } - } - } else { - if (index && offset) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto index_msb = builder.createUDiv(uint32T, index, indexStride); - auto index_lsb = builder.createUMod(uint32T, index, indexStride); - - auto elementSize = fragment.context->getUInt32(vbuffer->element_size); - auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); - auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); - - auto indexMsb = builder.createIMul( - uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); - auto offsetMsb = builder.createIMul( - uint32T, offset_msb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul( - uint32T, builder.createIAdd(uint32T, indexMsb, offsetMsb), - indexStride)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, index_lsb, elementSize)); - - address = builder.createIAdd(uint32T, address, offset_lsb); - } else if (index) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto index_msb = builder.createUDiv(uint32T, index, indexStride); - auto index_lsb = builder.createUMod(uint32T, index, indexStride); - - auto indexMsb = builder.createIMul( - uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride)); - - auto indexLsb = builder.createIMul( - uint32T, index_lsb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, indexMsb, indexStride)); - - address = builder.createIAdd(uint32T, address, indexLsb); - } else if (offset) { - auto indexStride = fragment.context->getUInt32(vbuffer->index_stride); - auto elementSize = fragment.context->getUInt32(vbuffer->element_size); - auto offset_msb = builder.createUDiv(uint32T, offset, elementSize); - auto offset_lsb = builder.createUMod(uint32T, offset, elementSize); - - auto offsetMsb = builder.createIMul( - uint32T, offset_msb, - fragment.context->getUInt32(vbuffer->element_size)); - - address = builder.createIAdd( - uint32T, address, - builder.createIMul(uint32T, offsetMsb, indexStride)); - - address = builder.createIAdd(uint32T, address, offset_lsb); - } - } - - convertToFormat(RegisterId::Vector(inst.vdata), count, fragment, - vBufferData, address, inst.dfmt, inst.nfmt); - } else { - util::unreachable(); - } - break; - } - - default: - inst.dump(); - util::unreachable(); - } -} -void convertMimg(Fragment &fragment, Mimg inst) { - fragment.registers->pc += Mimg::kMinInstSize * sizeof(std::uint32_t); - switch (inst.op) { - case Mimg::Op::IMAGE_GET_RESINFO: { - auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), - inst.r128, true, // fixme, should be any - AccessOp::None); - spirv::Value values[4]; - auto uint32T = fragment.context->getUInt32Type(); - - if (inst.dmask & 3) { - // query whd - // TODO: support other than 2D textures - auto uint32x2T = fragment.context->getUint32x2Type(); - auto lod = fragment.getScalarOperand(inst.vaddr, TypeId::UInt32); - auto sizeResult = - fragment.builder.createImageQuerySizeLod(uint32x2T, image, lod.value); - - values[0] = - fragment.builder.createCompositeExtract(uint32T, sizeResult, {{0}}); - values[1] = - fragment.builder.createCompositeExtract(uint32T, sizeResult, {{1}}); - values[2] = fragment.context->getUInt32(1); - } - - if (inst.dmask & (1 << 3)) { - // query total mip count - values[3] = fragment.builder.createImageQueryLevels(uint32T, image); - } - - for (std::size_t dstOffset = 0, i = 0; i < 4; ++i) { - if (inst.dmask & (1 << i)) { - fragment.setVectorOperand(inst.vdata + dstOffset++, - {uint32T, values[i]}); - } - } - break; - } - - case Mimg::Op::IMAGE_SAMPLE_LZ: { - auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), - inst.r128, true, AccessOp::Load); - auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2)); - auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value; - auto coord1 = - fragment.getVectorOperand(inst.vaddr + 1, TypeId::Float32).value; - auto coord2 = - fragment.getVectorOperand(inst.vaddr + 2, TypeId::Float32).value; - auto coords = fragment.builder.createCompositeConstruct( - fragment.context->getFloat32x3Type(), - {{coord0, coord1, coord2}}); // TODO - - auto sampledImage2dT = fragment.context->getSampledImage2DType(); - auto float4T = fragment.context->getFloat32x4Type(); - auto floatT = fragment.context->getFloat32Type(); - auto sampledImage = - fragment.builder.createSampledImage(sampledImage2dT, image, sampler); - auto value = fragment.builder.createImageSampleExplicitLod( - float4T, sampledImage, coords, spv::ImageOperandsMask::Lod, - {{fragment.context->getFloat32(0)}}); - - for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) { - if (inst.dmask & (1 << i)) { - fragment.setVectorOperand( - inst.vdata + dstOffset++, - {floatT, - fragment.builder.createCompositeExtract(floatT, value, {{i}})}); - } - } - break; - } - - case Mimg::Op::IMAGE_SAMPLE: { - auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), - inst.r128, true, AccessOp::Load); - auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2)); - auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value; - auto coord1 = - fragment.getVectorOperand(inst.vaddr + 1, TypeId::Float32).value; - auto coord2 = - fragment.getVectorOperand(inst.vaddr + 2, TypeId::Float32).value; - auto coords = fragment.builder.createCompositeConstruct( - fragment.context->getFloat32x3Type(), - {{coord0, coord1, coord2}}); // TODO - - auto sampledImage2dT = fragment.context->getSampledImage2DType(); - auto float4T = fragment.context->getFloat32x4Type(); - auto floatT = fragment.context->getFloat32Type(); - auto sampledImage = - fragment.builder.createSampledImage(sampledImage2dT, image, sampler); - auto value = fragment.builder.createImageSampleImplicitLod( - float4T, sampledImage, coords); - - for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) { - if (inst.dmask & (1 << i)) { - fragment.setVectorOperand( - inst.vdata + dstOffset++, - {floatT, - fragment.builder.createCompositeExtract(floatT, value, {{i}})}); - } - } - break; - } - - case Mimg::Op::IMAGE_STORE: - case Mimg::Op::IMAGE_STORE_MIP: { - auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), - inst.r128, false, AccessOp::Store); - auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value; - auto coord1 = - fragment.getVectorOperand(inst.vaddr + 1, TypeId::UInt32).value; - auto coord2 = - fragment.getVectorOperand(inst.vaddr + 2, TypeId::UInt32).value; - auto coords = fragment.builder.createCompositeConstruct( - fragment.context->getUint32x3Type(), - {{coord0, coord1, coord2}}); // TODO - - auto float4T = fragment.context->getFloat32x4Type(); - spirv::Value values[4]; - - for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) { - if (inst.dmask & (1 << i)) { - values[i] = - fragment.getVectorOperand(inst.vdata + dstOffset++, TypeId::Float32) - .value; - } else { - values[i] = fragment.context->getFloat32(0); - } - } - - auto value = fragment.builder.createCompositeConstruct(float4T, values); - fragment.builder.createImageWrite(image, coords, value); - break; - } - - case Mimg::Op::IMAGE_LOAD: - case Mimg::Op::IMAGE_LOAD_MIP: { - auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), - inst.r128, false, AccessOp::Load); - auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value; - auto coord1 = - fragment.getVectorOperand(inst.vaddr + 1, TypeId::UInt32).value; - auto coord2 = - fragment.getVectorOperand(inst.vaddr + 2, TypeId::UInt32).value; - auto coords = fragment.builder.createCompositeConstruct( - fragment.context->getUint32x3Type(), - {{coord0, coord1, coord2}}); // TODO - - auto float4T = fragment.context->getFloat32x4Type(); - auto floatT = fragment.context->getFloat32Type(); - - auto value = fragment.builder.createImageRead(float4T, image, coords); - - for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) { - if (inst.dmask & (1 << i)) { - fragment.setVectorOperand( - inst.vdata + dstOffset++, - {floatT, - fragment.builder.createCompositeExtract(floatT, value, {{i}})}); - } - } - break; - } - - case Mimg::Op::IMAGE_GET_LOD: { - auto intT = fragment.context->getUInt32Type(); - for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) { - if (inst.dmask & (1 << i)) { - fragment.setVectorOperand(inst.vdata + dstOffset++, - {intT, fragment.context->getUInt32(0)}); - } - } - break; - } - - default: - inst.dump(); - util::unreachable(); - } -} -void convertDs(Fragment &fragment, Ds inst) { - fragment.registers->pc += Ds::kMinInstSize * sizeof(std::uint32_t); - switch (inst.op) { - - default: - inst.dump(); - util::unreachable(); - } -} -void convertVintrp(Fragment &fragment, Vintrp inst) { - fragment.registers->pc += Vintrp::kMinInstSize * sizeof(std::uint32_t); - switch (inst.op) { - case Vintrp::Op::V_INTERP_P1_F32: - // TODO: operation should read from LDS - // TODO: accurate emulation - - // In current inaccurate emulation we just ignore phase 1 and vsrc argument - // interpolated value stored in attr# - break; - - case Vintrp::Op::V_INTERP_P2_F32: - case Vintrp::Op::V_INTERP_MOV_F32: { - // TODO: operation should read from LDS - // TODO: accurate emulation - - auto attr = fragment.getAttrOperand(inst.attr, TypeId::Float32x4); - auto channelType = fragment.context->getType(TypeId::Float32); - auto attrChan = fragment.builder.createCompositeExtract( - channelType, attr.value, - std::array{static_cast(inst.attrChan)}); - fragment.setVectorOperand(inst.vdst, {channelType, attrChan}); - break; - } - // { - // fragment.setVectorOperand( - // inst.vdst, fragment.getScalarOperand(inst.vsrc, - // TypeId::Float32x4)); - // break; - // } - - default: - inst.dump(); - util::unreachable(); - } -} - -void convertExp(Fragment &fragment, Exp inst) { - fragment.registers->pc += Exp::kMinInstSize * sizeof(std::uint32_t); - - if (inst.en == 0) { - fragment.builder.createFunctionCall(fragment.context->getVoidType(), - fragment.context->getDiscardFn(), {}); - return; - } - - // spirv::Value value; - std::array exports; - - // TODO: handle vm - if (inst.compr) { - auto floatT = fragment.context->getType(TypeId::Float32); - auto float2T = fragment.context->getType(TypeId::Float32x2); - auto glslStd450 = fragment.context->getGlslStd450(); - - auto xyUint = fragment.getVectorOperand(inst.vsrc0, TypeId::UInt32).value; - auto zwUint = fragment.getVectorOperand(inst.vsrc1, TypeId::UInt32).value; - - auto xy = fragment.builder.createExtInst( - float2T, glslStd450, GLSLstd450UnpackHalf2x16, std::array{xyUint}); - auto zw = fragment.builder.createExtInst( - float2T, glslStd450, GLSLstd450UnpackHalf2x16, std::array{zwUint}); - exports[0] = fragment.builder.createCompositeExtract( - floatT, xy, std::array{static_cast(0)}); - exports[1] = fragment.builder.createCompositeExtract( - floatT, xy, std::array{static_cast(1)}); - exports[2] = fragment.builder.createCompositeExtract( - floatT, zw, std::array{static_cast(0)}); - exports[3] = fragment.builder.createCompositeExtract( - floatT, zw, std::array{static_cast(1)}); - // value = fragment.builder.createCompositeConstruct(type, std::array{x, y, - // z, w}); - } else { - exports[0] = fragment.getVectorOperand(inst.vsrc0, TypeId::Float32).value; - exports[1] = fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value; - exports[2] = fragment.getVectorOperand(inst.vsrc2, TypeId::Float32).value; - exports[3] = fragment.getVectorOperand(inst.vsrc3, TypeId::Float32).value; - /* - value = fragment.builder.createCompositeConstruct( - type, - std::array{ - fragment.getVectorOperand(inst.vsrc0, TypeId::Float32).value, - fragment.getVectorOperand(inst.vsrc1, TypeId::Float32).value, - fragment.getVectorOperand(inst.vsrc2, TypeId::Float32).value, - fragment.getVectorOperand(inst.vsrc3, TypeId::Float32).value}); - */ - } - - auto resultType = fragment.context->getFloat32x4Type(); - auto floatType = fragment.context->getFloat32Type(); - /* - if (inst.en != 0xf) { - auto prevValue = fragment.getExportTarget(inst.target, TypeId::Float32x4); - if (prevValue) { - for (std::uint32_t i = 0; i < 4; ++i) { - if (~inst.en & (1 << i)) { - exports[i] = fragment.builder.createCompositeExtract( - floatType, prevValue.value, {{i}}); - } - } - } - } - */ - - auto value = fragment.builder.createCompositeConstruct(resultType, exports); - fragment.setExportTarget(inst.target, {resultType, value}); -} - -void convertVop1(Fragment &fragment, Vop1 inst) { - fragment.registers->pc += Vop1::kMinInstSize * sizeof(std::uint32_t); - auto roundEven = [&](spirv::Type type, spirv::Value value) { - // auto glslStd450 = fragment.context->getGlslStd450(); - // return Value{type, fragment.builder.createExtInst( - // type, glslStd450, GLSLstd450RoundEven, - // {{value}})}; - return Value{type, value}; - }; - - switch (inst.op) { - case Vop1::Op::V_MOV_B32: - fragment.setVectorOperand( - inst.vdst, fragment.getScalarOperand(inst.src0, TypeId::UInt32, - OperandGetFlags::PreserveType)); - break; - - case Vop1::Op::V_RCP_IFLAG_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto isNotZero = fragment.builder.createFOrdNotEqual( - fragment.context->getBoolType(), src, fragment.context->getFloat32(0)); - - src = fragment.builder.createSelect( - floatT, isNotZero, src, fragment.context->getFloat32(0.0000001)); - auto float1 = fragment.context->getFloat32(1); - auto result = fragment.builder.createFDiv(floatT, float1, src); - - fragment.setVectorOperand(inst.vdst, roundEven(floatT, result)); - break; - } - case Vop1::Op::V_RCP_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto float1 = fragment.context->getFloat32(1); - auto result = fragment.builder.createFDiv(floatT, float1, src); - - fragment.setVectorOperand(inst.vdst, roundEven(floatT, result)); - break; - } - - case Vop1::Op::V_CVT_OFF_F32_I4: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); - auto floatT = fragment.context->getFloat32Type(); - auto int32T = fragment.context->getSint32Type(); - src = spirv::cast(fragment.builder.createBitwiseAnd( - int32T, src, fragment.context->getSInt32(0b1111))); - src = fragment.builder.createISub(int32T, src, - fragment.context->getSInt32(8)); - - auto fsrc = fragment.builder.createConvertSToF(floatT, src); - auto result = fragment.builder.createFDiv(floatT, fsrc, - fragment.context->getFloat32(16)); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop1::Op::V_RSQ_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst( - floatT, glslStd450, GLSLstd450InverseSqrt, {{src}}); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop1::Op::V_SQRT_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst(floatT, glslStd450, - GLSLstd450Sqrt, {{src}}); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop1::Op::V_EXP_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst(floatT, glslStd450, - GLSLstd450Exp2, {{src}}); - - fragment.setVectorOperand(inst.vdst, roundEven(floatT, result)); - break; - } - - case Vop1::Op::V_FRACT_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst(floatT, glslStd450, - GLSLstd450Fract, {{src}}); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - - case Vop1::Op::V_CVT_I32_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto resultType = fragment.context->getType(TypeId::SInt32); - auto result = fragment.builder.createConvertFToS(resultType, src); - - fragment.setVectorOperand(inst.vdst, {resultType, result}); - break; - } - case Vop1::Op::V_CVT_F32_I32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); - auto resultType = fragment.context->getType(TypeId::Float32); - auto result = fragment.builder.createConvertSToF(resultType, src); - - fragment.setVectorOperand(inst.vdst, {resultType, result}); - break; - } - - case Vop1::Op::V_CVT_U32_F32: { - auto src = fragment.getScalarOperand(inst.src0, TypeId::Float32).value; - auto resultType = fragment.context->getType(TypeId::UInt32); - auto result = fragment.builder.createConvertFToU(resultType, src); - - fragment.setVectorOperand(inst.vdst, {resultType, result}); - break; - } - case Vop1::Op::V_CVT_F32_U32: { - auto src = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; - auto resultType = fragment.context->getFloat32Type(); - auto result = fragment.builder.createConvertUToF( - resultType, spirv::cast(src)); - - fragment.setVectorOperand(inst.vdst, {resultType, result}); - break; - } - case Vop1::Op::V_FLOOR_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst(floatT, glslStd450, - GLSLstd450Floor, {{src}}); - - fragment.setVectorOperand(inst.vdst, {floatT, result}); - break; - } - case Vop1::Op::V_SIN_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto constant = fragment.context->getFloat32(M_PI * 2); // 2pi - src = fragment.builder.createFMul(floatT, src, constant); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst(floatT, glslStd450, - GLSLstd450Sin, {{src}}); - - fragment.setVectorOperand(inst.vdst, roundEven(floatT, result)); - break; - } - case Vop1::Op::V_COS_F32: { - auto src = spirv::cast( - fragment.getScalarOperand(inst.src0, TypeId::Float32).value); - auto floatT = fragment.context->getFloat32Type(); - auto constant = fragment.context->getFloat32(M_PI * 2); // 2pi - src = fragment.builder.createFMul(floatT, src, constant); - - auto glslStd450 = fragment.context->getGlslStd450(); - auto result = fragment.builder.createExtInst(floatT, glslStd450, - GLSLstd450Cos, {{src}}); - - fragment.setVectorOperand(inst.vdst, roundEven(floatT, result)); - break; - } - - default: - inst.dump(); - util::unreachable(); - } -} - -void convertVopc(Fragment &fragment, Vopc inst) { - fragment.registers->pc += Vopc::kMinInstSize * sizeof(std::uint32_t); - - auto cmpOp = [&](TypeId type, CmpKind kind, CmpFlags flags = CmpFlags::None) { - auto src0 = fragment.getScalarOperand(inst.src0, type).value; - auto src1 = fragment.getVectorOperand(inst.vsrc1, type).value; - - auto result = doCmpOp(fragment, type, src0, src1, kind, flags); - fragment.setVcc(result); - }; - - switch (inst.op) { - case Vopc::Op::V_CMP_F_F32: - cmpOp(TypeId::Float32, CmpKind::F); - break; - case Vopc::Op::V_CMP_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT); - break; - case Vopc::Op::V_CMP_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG); - break; - case Vopc::Op::V_CMP_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE); - break; - case Vopc::Op::V_CMP_O_F32: - cmpOp(TypeId::Float32, CmpKind::O); - break; - case Vopc::Op::V_CMP_U_F32: - cmpOp(TypeId::Float32, CmpKind::U); - break; - case Vopc::Op::V_CMP_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE); - break; - case Vopc::Op::V_CMP_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG); - break; - case Vopc::Op::V_CMP_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT); - break; - case Vopc::Op::V_CMP_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE); - break; - case Vopc::Op::V_CMP_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ); - break; - case Vopc::Op::V_CMP_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT); - break; - case Vopc::Op::V_CMP_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU); - break; - case Vopc::Op::V_CMPX_F_F32: - cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_O_F32: - cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_U_F32: - cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::X); - break; - case Vopc::Op::V_CMP_F_F64: - cmpOp(TypeId::Float64, CmpKind::F); - break; - case Vopc::Op::V_CMP_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT); - break; - case Vopc::Op::V_CMP_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG); - break; - case Vopc::Op::V_CMP_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE); - break; - case Vopc::Op::V_CMP_O_F64: - cmpOp(TypeId::Float64, CmpKind::O); - break; - case Vopc::Op::V_CMP_U_F64: - cmpOp(TypeId::Float64, CmpKind::U); - break; - case Vopc::Op::V_CMP_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE); - break; - case Vopc::Op::V_CMP_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG); - break; - case Vopc::Op::V_CMP_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT); - break; - case Vopc::Op::V_CMP_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE); - break; - case Vopc::Op::V_CMP_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ); - break; - case Vopc::Op::V_CMP_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT); - break; - case Vopc::Op::V_CMP_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU); - break; - case Vopc::Op::V_CMPX_F_F64: - cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_O_F64: - cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_U_F64: - cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::X); - break; - case Vopc::Op::V_CMPS_F_F32: - cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_O_F32: - cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_U_F32: - cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::S); - break; - case Vopc::Op::V_CMPSX_F_F32: - cmpOp(TypeId::Float32, CmpKind::F, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_LT_F32: - cmpOp(TypeId::Float32, CmpKind::LT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_EQ_F32: - cmpOp(TypeId::Float32, CmpKind::EQ, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_LE_F32: - cmpOp(TypeId::Float32, CmpKind::LE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_GT_F32: - cmpOp(TypeId::Float32, CmpKind::GT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_LG_F32: - cmpOp(TypeId::Float32, CmpKind::LG, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_GE_F32: - cmpOp(TypeId::Float32, CmpKind::GE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_O_F32: - cmpOp(TypeId::Float32, CmpKind::O, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_U_F32: - cmpOp(TypeId::Float32, CmpKind::U, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NGE_F32: - cmpOp(TypeId::Float32, CmpKind::NGE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NLG_F32: - cmpOp(TypeId::Float32, CmpKind::NLG, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NGT_F32: - cmpOp(TypeId::Float32, CmpKind::NGT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NLE_F32: - cmpOp(TypeId::Float32, CmpKind::NLE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NEQ_F32: - cmpOp(TypeId::Float32, CmpKind::NEQ, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NLT_F32: - cmpOp(TypeId::Float32, CmpKind::NLT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_TRU_F32: - cmpOp(TypeId::Float32, CmpKind::TRU, CmpFlags::SX); - break; - case Vopc::Op::V_CMPS_F_F64: - cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_O_F64: - cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_U_F64: - cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::S); - break; - case Vopc::Op::V_CMPS_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::S); - break; - case Vopc::Op::V_CMPSX_F_F64: - cmpOp(TypeId::Float64, CmpKind::F, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_LT_F64: - cmpOp(TypeId::Float64, CmpKind::LT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_EQ_F64: - cmpOp(TypeId::Float64, CmpKind::EQ, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_LE_F64: - cmpOp(TypeId::Float64, CmpKind::LE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_GT_F64: - cmpOp(TypeId::Float64, CmpKind::GT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_LG_F64: - cmpOp(TypeId::Float64, CmpKind::LG, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_GE_F64: - cmpOp(TypeId::Float64, CmpKind::GE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_O_F64: - cmpOp(TypeId::Float64, CmpKind::O, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_U_F64: - cmpOp(TypeId::Float64, CmpKind::U, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NGE_F64: - cmpOp(TypeId::Float64, CmpKind::NGE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NLG_F64: - cmpOp(TypeId::Float64, CmpKind::NLG, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NGT_F64: - cmpOp(TypeId::Float64, CmpKind::NGT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NLE_F64: - cmpOp(TypeId::Float64, CmpKind::NLE, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NEQ_F64: - cmpOp(TypeId::Float64, CmpKind::NEQ, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_NLT_F64: - cmpOp(TypeId::Float64, CmpKind::NLT, CmpFlags::SX); - break; - case Vopc::Op::V_CMPSX_TRU_F64: - cmpOp(TypeId::Float64, CmpKind::TRU, CmpFlags::SX); - break; - case Vopc::Op::V_CMP_F_I32: - cmpOp(TypeId::SInt32, CmpKind::F); - break; - case Vopc::Op::V_CMP_LT_I32: - cmpOp(TypeId::SInt32, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_I32: - cmpOp(TypeId::SInt32, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_I32: - cmpOp(TypeId::SInt32, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_I32: - cmpOp(TypeId::SInt32, CmpKind::GT); - break; - case Vopc::Op::V_CMP_NE_I32: - cmpOp(TypeId::SInt32, CmpKind::NE); - break; - case Vopc::Op::V_CMP_GE_I32: - cmpOp(TypeId::SInt32, CmpKind::GE); - break; - case Vopc::Op::V_CMP_T_I32: - cmpOp(TypeId::SInt32, CmpKind::T); - break; - // case Vopc::Op::V_CMP_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS); - // break; - case Vopc::Op::V_CMP_LT_I16: - cmpOp(TypeId::SInt16, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_I16: - cmpOp(TypeId::SInt16, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_I16: - cmpOp(TypeId::SInt16, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_I16: - cmpOp(TypeId::SInt16, CmpKind::GT); - break; - case Vopc::Op::V_CMP_NE_I16: - cmpOp(TypeId::SInt16, CmpKind::NE); - break; - case Vopc::Op::V_CMP_GE_I16: - cmpOp(TypeId::SInt16, CmpKind::GE); - break; - // case Vopc::Op::V_CMP_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS); - // break; - case Vopc::Op::V_CMPX_F_I32: - cmpOp(TypeId::SInt32, CmpKind::F, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LT_I32: - cmpOp(TypeId::SInt32, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_I32: - cmpOp(TypeId::SInt32, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_I32: - cmpOp(TypeId::SInt32, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_I32: - cmpOp(TypeId::SInt32, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NE_I32: - cmpOp(TypeId::SInt32, CmpKind::NE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_I32: - cmpOp(TypeId::SInt32, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_T_I32: - cmpOp(TypeId::SInt32, CmpKind::T, CmpFlags::X); - break; - // case Vopc::Op::V_CMPX_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS, - // CmpFlags::X); break; - case Vopc::Op::V_CMPX_LT_I16: - cmpOp(TypeId::SInt16, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_I16: - cmpOp(TypeId::SInt16, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_I16: - cmpOp(TypeId::SInt16, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_I16: - cmpOp(TypeId::SInt16, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NE_I16: - cmpOp(TypeId::SInt16, CmpKind::NE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_I16: - cmpOp(TypeId::SInt16, CmpKind::GE, CmpFlags::X); - break; - // case Vopc::Op::V_CMPX_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS, - // CmpFlags::X); break; - case Vopc::Op::V_CMP_F_I64: - cmpOp(TypeId::SInt64, CmpKind::F); - break; - case Vopc::Op::V_CMP_LT_I64: - cmpOp(TypeId::SInt64, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_I64: - cmpOp(TypeId::SInt64, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_I64: - cmpOp(TypeId::SInt64, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_I64: - cmpOp(TypeId::SInt64, CmpKind::GT); - break; - case Vopc::Op::V_CMP_NE_I64: - cmpOp(TypeId::SInt64, CmpKind::NE); - break; - case Vopc::Op::V_CMP_GE_I64: - cmpOp(TypeId::SInt64, CmpKind::GE); - break; - case Vopc::Op::V_CMP_T_I64: - cmpOp(TypeId::SInt64, CmpKind::T); - break; - // case Vopc::Op::V_CMP_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS); - // break; - case Vopc::Op::V_CMP_LT_U16: - cmpOp(TypeId::UInt16, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_U16: - cmpOp(TypeId::UInt16, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_U16: - cmpOp(TypeId::UInt16, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_U16: - cmpOp(TypeId::UInt16, CmpKind::GT); - break; - case Vopc::Op::V_CMP_NE_U16: - cmpOp(TypeId::UInt16, CmpKind::NE); - break; - case Vopc::Op::V_CMP_GE_U16: - cmpOp(TypeId::UInt16, CmpKind::GE); - break; - case Vopc::Op::V_CMPX_F_I64: - cmpOp(TypeId::SInt64, CmpKind::F, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LT_I64: - cmpOp(TypeId::SInt64, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_I64: - cmpOp(TypeId::SInt64, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_I64: - cmpOp(TypeId::SInt64, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_I64: - cmpOp(TypeId::SInt64, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NE_I64: - cmpOp(TypeId::SInt64, CmpKind::NE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_I64: - cmpOp(TypeId::SInt64, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_T_I64: - cmpOp(TypeId::SInt64, CmpKind::T, CmpFlags::X); - break; - // case Vopc::Op::V_CMPX_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS, - // CmpFlags::X); break; - case Vopc::Op::V_CMPX_LT_U16: - cmpOp(TypeId::UInt16, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_U16: - cmpOp(TypeId::UInt16, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_U16: - cmpOp(TypeId::UInt16, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_U16: - cmpOp(TypeId::UInt16, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NE_U16: - cmpOp(TypeId::UInt16, CmpKind::NE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_U16: - cmpOp(TypeId::UInt16, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMP_F_U32: - cmpOp(TypeId::UInt32, CmpKind::F); - break; - case Vopc::Op::V_CMP_LT_U32: - cmpOp(TypeId::UInt32, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_U32: - cmpOp(TypeId::UInt32, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_U32: - cmpOp(TypeId::UInt32, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_U32: - cmpOp(TypeId::UInt32, CmpKind::GT); - break; - case Vopc::Op::V_CMP_NE_U32: - cmpOp(TypeId::UInt32, CmpKind::NE); - break; - case Vopc::Op::V_CMP_GE_U32: - cmpOp(TypeId::UInt32, CmpKind::GE); - break; - case Vopc::Op::V_CMP_T_U32: - cmpOp(TypeId::UInt32, CmpKind::T); - break; - case Vopc::Op::V_CMP_F_F16: - cmpOp(TypeId::Float16, CmpKind::F); - break; - case Vopc::Op::V_CMP_LT_F16: - cmpOp(TypeId::Float16, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_F16: - cmpOp(TypeId::Float16, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_F16: - cmpOp(TypeId::Float16, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_F16: - cmpOp(TypeId::Float16, CmpKind::GT); - break; - case Vopc::Op::V_CMP_LG_F16: - cmpOp(TypeId::Float16, CmpKind::LG); - break; - case Vopc::Op::V_CMP_GE_F16: - cmpOp(TypeId::Float16, CmpKind::GE); - break; - case Vopc::Op::V_CMP_O_F16: - cmpOp(TypeId::Float16, CmpKind::O); - break; - case Vopc::Op::V_CMPX_F_U32: - cmpOp(TypeId::UInt32, CmpKind::F, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LT_U32: - cmpOp(TypeId::UInt32, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_U32: - cmpOp(TypeId::UInt32, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_U32: - cmpOp(TypeId::UInt32, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_U32: - cmpOp(TypeId::UInt32, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NE_U32: - cmpOp(TypeId::UInt32, CmpKind::NE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_U32: - cmpOp(TypeId::UInt32, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_T_U32: - cmpOp(TypeId::UInt32, CmpKind::T, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_F_F16: - cmpOp(TypeId::Float16, CmpKind::F, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LT_F16: - cmpOp(TypeId::Float16, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_F16: - cmpOp(TypeId::Float16, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_F16: - cmpOp(TypeId::Float16, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_F16: - cmpOp(TypeId::Float16, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LG_F16: - cmpOp(TypeId::Float16, CmpKind::LG, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_F16: - cmpOp(TypeId::Float16, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_O_F16: - cmpOp(TypeId::Float16, CmpKind::O, CmpFlags::X); - break; - case Vopc::Op::V_CMP_F_U64: - cmpOp(TypeId::UInt64, CmpKind::F); - break; - case Vopc::Op::V_CMP_LT_U64: - cmpOp(TypeId::UInt64, CmpKind::LT); - break; - case Vopc::Op::V_CMP_EQ_U64: - cmpOp(TypeId::UInt64, CmpKind::EQ); - break; - case Vopc::Op::V_CMP_LE_U64: - cmpOp(TypeId::UInt64, CmpKind::LE); - break; - case Vopc::Op::V_CMP_GT_U64: - cmpOp(TypeId::UInt64, CmpKind::GT); - break; - case Vopc::Op::V_CMP_NE_U64: - cmpOp(TypeId::UInt64, CmpKind::NE); - break; - case Vopc::Op::V_CMP_GE_U64: - cmpOp(TypeId::UInt64, CmpKind::GE); - break; - case Vopc::Op::V_CMP_T_U64: - cmpOp(TypeId::UInt64, CmpKind::T); - break; - case Vopc::Op::V_CMP_U_F16: - cmpOp(TypeId::Float16, CmpKind::U); - break; - case Vopc::Op::V_CMP_NGE_F16: - cmpOp(TypeId::Float16, CmpKind::NGE); - break; - case Vopc::Op::V_CMP_NLG_F16: - cmpOp(TypeId::Float16, CmpKind::NLG); - break; - case Vopc::Op::V_CMP_NGT_F16: - cmpOp(TypeId::Float16, CmpKind::NGT); - break; - case Vopc::Op::V_CMP_NLE_F16: - cmpOp(TypeId::Float16, CmpKind::NLE); - break; - case Vopc::Op::V_CMP_NEQ_F16: - cmpOp(TypeId::Float16, CmpKind::NEQ); - break; - case Vopc::Op::V_CMP_NLT_F16: - cmpOp(TypeId::Float16, CmpKind::NLT); - break; - case Vopc::Op::V_CMP_TRU_F16: - cmpOp(TypeId::Float16, CmpKind::TRU); - break; - case Vopc::Op::V_CMPX_F_U64: - cmpOp(TypeId::UInt64, CmpKind::F, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LT_U64: - cmpOp(TypeId::UInt64, CmpKind::LT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_EQ_U64: - cmpOp(TypeId::UInt64, CmpKind::EQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_LE_U64: - cmpOp(TypeId::UInt64, CmpKind::LE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GT_U64: - cmpOp(TypeId::UInt64, CmpKind::GT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NE_U64: - cmpOp(TypeId::UInt64, CmpKind::NE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_GE_U64: - cmpOp(TypeId::UInt64, CmpKind::GE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_T_U64: - cmpOp(TypeId::UInt64, CmpKind::T, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_U_F16: - cmpOp(TypeId::Float16, CmpKind::U, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NGE_F16: - cmpOp(TypeId::Float16, CmpKind::NGE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLG_F16: - cmpOp(TypeId::Float16, CmpKind::NLG, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NGT_F16: - cmpOp(TypeId::Float16, CmpKind::NGT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLE_F16: - cmpOp(TypeId::Float16, CmpKind::NLE, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NEQ_F16: - cmpOp(TypeId::Float16, CmpKind::NEQ, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_NLT_F16: - cmpOp(TypeId::Float16, CmpKind::NLT, CmpFlags::X); - break; - case Vopc::Op::V_CMPX_TRU_F16: - cmpOp(TypeId::Float16, CmpKind::TRU, CmpFlags::X); - break; - - default: - inst.dump(); - util::unreachable(); - } -} -void convertSop1(Fragment &fragment, Sop1 inst) { - fragment.registers->pc += Sop1::kMinInstSize * sizeof(std::uint32_t); - - switch (inst.op) { - case Sop1::Op::S_MOV_B32: - fragment.setScalarOperand( - inst.sdst, fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32)); - break; - - case Sop1::Op::S_MOV_B64: - fragment.setScalarOperand( - inst.sdst, fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32)); - fragment.setScalarOperand( - inst.sdst + 1, - fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32)); - break; - - case Sop1::Op::S_WQM_B32: { - // TODO: whole quad mode - break; - } - case Sop1::Op::S_WQM_B64: { - // TODO: whole quad mode - break; - } - case Sop1::Op::S_AND_SAVEEXEC_B64: { - auto execLo = fragment.getExecLo(); - auto execHi = fragment.getExecHi(); - - auto srcLo = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32); - auto srcHi = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32); - - fragment.setOperand( - RegisterId::ExecLo, - {srcLo.type, fragment.builder.createBitwiseAnd(srcLo.type, srcLo.value, - execLo.value)}); - fragment.setOperand( - RegisterId::ExecHi, - {srcHi.type, fragment.builder.createBitwiseAnd(srcHi.type, srcHi.value, - execHi.value)}); - auto uint32_0 = fragment.context->getUInt32(0); - auto boolT = fragment.context->getBoolType(); - auto loIsNotZero = - fragment.builder.createINotEqual(boolT, execLo.value, uint32_0); - auto hiIsNotZero = - fragment.builder.createINotEqual(boolT, execHi.value, uint32_0); - fragment.setScc({boolT, fragment.builder.createLogicalAnd( - boolT, loIsNotZero, hiIsNotZero)}); - fragment.setScalarOperand(inst.sdst, execLo); - fragment.setScalarOperand(inst.sdst + 1, execHi); - break; - } - - case Sop1::Op::S_SETPC_B64: - if (auto ssrc0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32), - ssrc1 = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32); - ssrc0 && ssrc1) { - auto ssrc0OptValue = fragment.context->findUint32Value(ssrc0.value); - auto ssrc1OptValue = fragment.context->findUint32Value(ssrc1.value); - - if (!ssrc0OptValue.has_value() || !ssrc1OptValue.has_value()) { - util::unreachable(); - } - - fragment.jumpAddress = - *ssrc0OptValue | (static_cast(*ssrc1OptValue) << 32); - } else { - util::unreachable(); - } - return; - - case Sop1::Op::S_GETPC_B64: { - auto pc = fragment.registers->pc; - std::fprintf(stderr, "getpc result: %lx\n", pc); - fragment.setScalarOperand(inst.sdst, {fragment.context->getUInt32Type(), - fragment.context->getUInt32(pc)}); - fragment.setScalarOperand(inst.sdst + 1, - {fragment.context->getUInt32Type(), - fragment.context->getUInt32(pc >> 32)}); - return; - } - - case Sop1::Op::S_SWAPPC_B64: { - if (auto ssrc0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32), - ssrc1 = fragment.getScalarOperand(inst.ssrc0 + 1, TypeId::UInt32); - ssrc0 && ssrc1) { - auto ssrc0OptValue = fragment.context->findUint32Value(ssrc0.value); - auto ssrc1OptValue = fragment.context->findUint32Value(ssrc1.value); - - if (!ssrc0OptValue.has_value() || !ssrc1OptValue.has_value()) { - util::unreachable(); - } - - auto pc = fragment.registers->pc; - fragment.setScalarOperand(inst.sdst, {fragment.context->getUInt32Type(), - fragment.context->getUInt32(pc)}); - fragment.setScalarOperand(inst.sdst + 1, - {fragment.context->getUInt32Type(), - fragment.context->getUInt32(pc >> 32)}); - - fragment.jumpAddress = - *ssrc0OptValue | (static_cast(*ssrc1OptValue) << 32); - } else { - inst.dump(); - util::unreachable(); - } - return; - } - - default: - inst.dump(); - util::unreachable(); - } -} - -void convertSopc(Fragment &fragment, Sopc inst) { - fragment.registers->pc += Sopc::kMinInstSize * sizeof(std::uint32_t); - - auto cmpOp = [&](CmpKind kind, TypeId type) { - auto src0 = fragment.getScalarOperand(inst.ssrc0, type).value; - auto src1 = fragment.getScalarOperand(inst.ssrc1, type).value; - - auto result = doCmpOp(fragment, type, src0, src1, kind, CmpFlags::None); - fragment.setScc(result); - }; - - switch (inst.op) { - case Sopc::Op::S_CMP_EQ_I32: - cmpOp(CmpKind::EQ, TypeId::SInt32); - break; - case Sopc::Op::S_CMP_LG_I32: - cmpOp(CmpKind::LG, TypeId::SInt32); - break; - case Sopc::Op::S_CMP_GT_I32: - cmpOp(CmpKind::GT, TypeId::SInt32); - break; - case Sopc::Op::S_CMP_GE_I32: - cmpOp(CmpKind::GE, TypeId::SInt32); - break; - case Sopc::Op::S_CMP_LT_I32: - cmpOp(CmpKind::LT, TypeId::SInt32); - break; - case Sopc::Op::S_CMP_LE_I32: - cmpOp(CmpKind::LE, TypeId::SInt32); - break; - case Sopc::Op::S_CMP_EQ_U32: - cmpOp(CmpKind::EQ, TypeId::UInt32); - break; - case Sopc::Op::S_CMP_LG_U32: - cmpOp(CmpKind::LG, TypeId::UInt32); - break; - case Sopc::Op::S_CMP_GT_U32: - cmpOp(CmpKind::GT, TypeId::UInt32); - break; - case Sopc::Op::S_CMP_GE_U32: - cmpOp(CmpKind::GE, TypeId::UInt32); - break; - case Sopc::Op::S_CMP_LT_U32: - cmpOp(CmpKind::LT, TypeId::UInt32); - break; - case Sopc::Op::S_CMP_LE_U32: - cmpOp(CmpKind::LE, TypeId::UInt32); - break; - - case Sopc::Op::S_BITCMP0_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto operandT = fragment.context->getUInt32Type(); - - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32(0x1f))); - auto bit = fragment.builder.createBitwiseAnd( - operandT, - fragment.builder.createShiftRightLogical(operandT, src0, src1), - fragment.context->getUInt32(1)); - - auto boolT = fragment.context->getBoolType(); - fragment.setScc({boolT, fragment.builder.createIEqual( - boolT, bit, fragment.context->getUInt32(0))}); - break; - } - case Sopc::Op::S_BITCMP1_B32: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto operandT = fragment.context->getUInt32Type(); - - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32(0x1f))); - auto bit = fragment.builder.createBitwiseAnd( - operandT, - fragment.builder.createShiftRightLogical(operandT, src0, src1), - fragment.context->getUInt32(1)); - - auto boolT = fragment.context->getBoolType(); - fragment.setScc({boolT, fragment.builder.createIEqual( - boolT, bit, fragment.context->getUInt32(1))}); - break; - } - case Sopc::Op::S_BITCMP0_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto operandT = fragment.context->getUInt64Type(); - - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32(0x3f))); - auto bit = fragment.builder.createBitwiseAnd( - operandT, - fragment.builder.createShiftRightLogical(operandT, src0, src1), - fragment.context->getUInt64(1)); - - auto boolT = fragment.context->getBoolType(); - fragment.setScc({boolT, fragment.builder.createIEqual( - boolT, bit, fragment.context->getUInt64(0))}); - break; - } - case Sopc::Op::S_BITCMP1_B64: { - auto src0 = spirv::cast( - fragment.getScalarOperand(inst.ssrc0, TypeId::UInt64).value); - auto src1 = spirv::cast( - fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value); - auto operandT = fragment.context->getUInt64Type(); - - src1 = spirv::cast(fragment.builder.createBitwiseAnd( - operandT, src1, fragment.context->getUInt32(0x3f))); - auto bit = fragment.builder.createBitwiseAnd( - operandT, - fragment.builder.createShiftRightLogical(operandT, src0, src1), - fragment.context->getUInt64(1)); - - auto boolT = fragment.context->getBoolType(); - fragment.setScc({boolT, fragment.builder.createIEqual( - boolT, bit, fragment.context->getUInt64(1))}); - break; - } - default: - inst.dump(); - util::unreachable(); - } -} - -void convertSopp(Fragment &fragment, Sopp inst) { - fragment.registers->pc += Sopp::kMinInstSize * sizeof(std::uint32_t); - - auto createCondBranch = [&](spirv::BoolValue condition) { - fragment.branchCondition = condition; - /* - auto address = fragment.registers->pc + (inst.simm << 2); - - Fragment *ifTrueTarget = - fragment.context->getOrCreateFragment(address, 0x100); - Fragment *ifFalseTarget = - fragment.context->getOrCreateFragment(fragment.registers->pc, - 0x100); - - fragment.builder.createSelectionMerge(ifTrueTarget->entryBlockId, {}); - fragment.builder.createBranchConditional(condition, - ifTrueTarget->builder.id, ifFalseTarget->entryBlockId); - */ - }; - - switch (inst.op) { - case Sopp::Op::S_WAITCNT: - // TODO - break; - - case Sopp::Op::S_BRANCH: { - fragment.jumpAddress = fragment.registers->pc + (inst.simm << 2); - // auto address = fragment.registers->pc + (inst.simm << 2); - // Fragment *target = fragment.context->getOrCreateFragment(address, 0x100); - - // fragment.builder.createBranch(target->entryBlockId); - // fragment.terminator = FragmentTerminator::Branch; - // target->predecessors.insert(&fragment); - // fragment.successors.insert(target); - break; - } - - case Sopp::Op::S_CBRANCH_SCC0: { - createCondBranch(fragment.builder.createLogicalNot( - fragment.context->getBoolType(), fragment.getScc())); - break; - } - - case Sopp::Op::S_CBRANCH_SCC1: { - createCondBranch(fragment.getScc()); - break; - } - - case Sopp::Op::S_CBRANCH_VCCZ: { - auto loIsZero = fragment.builder.createIEqual( - fragment.context->getBoolType(), fragment.getVccLo().value, - fragment.context->getUInt32(0)); - auto hiIsZero = fragment.builder.createIEqual( - fragment.context->getBoolType(), fragment.getVccHi().value, - fragment.context->getUInt32(0)); - createCondBranch(fragment.builder.createLogicalAnd( - fragment.context->getBoolType(), loIsZero, hiIsZero)); - break; - } - - case Sopp::Op::S_CBRANCH_VCCNZ: { - auto loIsNotZero = fragment.builder.createINotEqual( - fragment.context->getBoolType(), fragment.getVccLo().value, - fragment.context->getUInt32(0)); - auto hiIsNotZero = fragment.builder.createINotEqual( - fragment.context->getBoolType(), fragment.getVccHi().value, - fragment.context->getUInt32(0)); - - createCondBranch(fragment.builder.createLogicalOr( - fragment.context->getBoolType(), loIsNotZero, hiIsNotZero)); - break; - } - - case Sopp::Op::S_CBRANCH_EXECZ: { - auto loIsZero = fragment.builder.createIEqual( - fragment.context->getBoolType(), fragment.getExecLo().value, - fragment.context->getUInt32(0)); - auto hiIsZero = fragment.builder.createIEqual( - fragment.context->getBoolType(), fragment.getExecHi().value, - fragment.context->getUInt32(0)); - createCondBranch(fragment.builder.createLogicalAnd( - fragment.context->getBoolType(), loIsZero, hiIsZero)); - break; - } - - case Sopp::Op::S_CBRANCH_EXECNZ: { - auto loIsNotZero = fragment.builder.createINotEqual( - fragment.context->getBoolType(), fragment.getExecLo().value, - fragment.context->getUInt32(0)); - auto hiIsNotZero = fragment.builder.createINotEqual( - fragment.context->getBoolType(), fragment.getExecHi().value, - fragment.context->getUInt32(0)); - - createCondBranch(fragment.builder.createLogicalOr( - fragment.context->getBoolType(), loIsNotZero, hiIsNotZero)); - break; - } - - case Sopp::Op::S_ENDPGM: - // fragment.terminator = FragmentTerminator::EndProgram; - return; - - case Sopp::Op::S_NOP: - break; - - default: - inst.dump(); - util::unreachable(); - } -} - -void convertInstruction(Fragment &fragment, Instruction inst) { - switch (inst.instClass) { - case InstructionClass::Vop2: - return convertVop2(fragment, Vop2(inst.inst)); - case InstructionClass::Sop2: - return convertSop2(fragment, Sop2(inst.inst)); - case InstructionClass::Sopk: - return convertSopk(fragment, Sopk(inst.inst)); - case InstructionClass::Smrd: - return convertSmrd(fragment, Smrd(inst.inst)); - case InstructionClass::Vop3: - return convertVop3(fragment, Vop3(inst.inst)); - case InstructionClass::Mubuf: - return convertMubuf(fragment, Mubuf(inst.inst)); - case InstructionClass::Mtbuf: - return convertMtbuf(fragment, Mtbuf(inst.inst)); - case InstructionClass::Mimg: - return convertMimg(fragment, Mimg(inst.inst)); - case InstructionClass::Ds: - return convertDs(fragment, Ds(inst.inst)); - case InstructionClass::Vintrp: - return convertVintrp(fragment, Vintrp(inst.inst)); - case InstructionClass::Exp: - return convertExp(fragment, Exp(inst.inst)); - case InstructionClass::Vop1: - return convertVop1(fragment, Vop1(inst.inst)); - case InstructionClass::Vopc: - return convertVopc(fragment, Vopc(inst.inst)); - case InstructionClass::Sop1: - return convertSop1(fragment, Sop1(inst.inst)); - case InstructionClass::Sopc: - return convertSopc(fragment, Sopc(inst.inst)); - case InstructionClass::Sopp: - return convertSopp(fragment, Sopp(inst.inst)); - - case InstructionClass::Invalid: - break; - } - - inst.dump(); - util::unreachable(); -} - -} // namespace - -void Fragment::injectValuesFromPreds() { - for (auto pred : predecessors) { - for (auto value : pred->values) { - values.insert(value); - } - - for (auto output : pred->outputs) { - outputs.insert(output); - } - } - - std::vector> predValues; - - // std::printf("injection values for bb%lx\n", registers->pc); - - // auto getRegName = [](RegisterId id) { - // if (id.isScalar()) { - // return "sgpr"; - // } - - // if (id.isVector()) { - // return "vgpr"; - // } - - // if (id.isExport()) { - // return "exp"; - // } - - // if (id.isAttr()) { - // return "attr"; - // } - - // return ""; - // }; - - auto setupRegisterValue = [&](RegisterId id) { - bool allSameValues = true; - predValues.clear(); - - spirv::Type type; - - for (auto pred : predecessors) { - Value value; - - if (type) { - value = pred->getRegister(id, type); - } else { - value = pred->getRegister(id); - type = value.type; - } - - if (allSameValues && !predValues.empty()) { - allSameValues = predValues.back().first == value.value; - } - - predValues.emplace_back(value.value, pred->builder.id); - } - - Value value; - - if (allSameValues) { - value = {type, predValues.back().first}; - // std::printf(" ** %s[%u] is value = %u\n", getRegName(id), - // id.getOffset(), - // predValues.back().first.id); - } else { - // std::printf(" ** %s[%u] is phi = { ", getRegName(id), id.getOffset()); - // for (bool isFirst = true; auto value : predValues) { - // if (isFirst) { - // isFirst = false; - // } else { - // std::printf(", "); - // } - // std::printf("%u", value.first.id); - // } - // std::printf(" }\n"); - value = {type, builder.createPhi(type, predValues)}; - } - - registers->setRegister(id, value); - }; - - for (auto id : values) { - setupRegisterValue(id); - } - for (auto id : outputs) { - setupRegisterValue(id); - } -} - -spirv::SamplerValue Fragment::createSampler(RegisterId base) { - auto sBuffer0 = getOperand(RegisterId::Raw(base + 0), TypeId::UInt32); - auto sBuffer1 = getOperand(RegisterId::Raw(base + 1), TypeId::UInt32); - auto sBuffer2 = getOperand(RegisterId::Raw(base + 2), TypeId::UInt32); - auto sBuffer3 = getOperand(RegisterId::Raw(base + 3), TypeId::UInt32); - - auto optSBuffer0Value = context->findUint32Value(sBuffer0.value); - auto optSBuffer1Value = context->findUint32Value(sBuffer1.value); - auto optSBuffer2Value = context->findUint32Value(sBuffer2.value); - auto optSBuffer3Value = context->findUint32Value(sBuffer3.value); - - if (optSBuffer0Value && optSBuffer1Value && optSBuffer2Value && - optSBuffer3Value) { - std::uint32_t sbuffer[] = { - *optSBuffer0Value, - *optSBuffer1Value, - *optSBuffer2Value, - *optSBuffer3Value, - }; - - auto uniform = context->getOrCreateUniformConstant( - sbuffer, std::size(sbuffer), TypeId::Sampler); - return builder.createLoad(context->getSamplerType(), uniform->variable); - } else { - std::uint32_t sbuffer[] = { - 0, - 0, - 0, - 0, - }; - - auto uniform = context->getOrCreateUniformConstant( - sbuffer, std::size(sbuffer), TypeId::Sampler); - return builder.createLoad(context->getSamplerType(), uniform->variable); - } -} - -spirv::ImageValue Fragment::createImage(RegisterId base, bool r128, - bool sampled, AccessOp access) { - auto tBuffer0 = getOperand(RegisterId::Raw(base + 0), TypeId::UInt32); - auto tBuffer1 = getOperand(RegisterId::Raw(base + 1), TypeId::UInt32); - auto tBuffer2 = getOperand(RegisterId::Raw(base + 2), TypeId::UInt32); - auto tBuffer3 = getOperand(RegisterId::Raw(base + 3), TypeId::UInt32); - - auto optTBuffer0Value = context->findUint32Value(tBuffer0.value); - auto optTBuffer1Value = context->findUint32Value(tBuffer1.value); - auto optTBuffer2Value = context->findUint32Value(tBuffer2.value); - auto optTBuffer3Value = context->findUint32Value(tBuffer3.value); - - if (!optTBuffer0Value || !optTBuffer1Value || !optTBuffer2Value || - !optTBuffer3Value) { - util::unreachable(); - } - - auto imageTypeId = sampled ? TypeId::Image2D : TypeId::StorageImage2D; - auto imageType = - sampled ? context->getImage2DType() : context->getStorageImage2DType(); - - if (r128) { - std::uint32_t sbuffer[] = { - *optTBuffer0Value, - *optTBuffer1Value, - *optTBuffer2Value, - *optTBuffer3Value, - }; - - auto uniform = context->getOrCreateUniformConstant( - sbuffer, std::size(sbuffer), imageTypeId); - uniform->accessOp |= access; - return builder.createLoad(imageType, uniform->variable); - } - - auto tBuffer4 = getOperand(RegisterId::Raw(base + 4), TypeId::UInt32); - auto tBuffer5 = getOperand(RegisterId::Raw(base + 5), TypeId::UInt32); - auto tBuffer6 = getOperand(RegisterId::Raw(base + 6), TypeId::UInt32); - auto tBuffer7 = getOperand(RegisterId::Raw(base + 7), TypeId::UInt32); - - auto optTBuffer4Value = context->findUint32Value(tBuffer4.value); - auto optTBuffer5Value = context->findUint32Value(tBuffer5.value); - auto optTBuffer6Value = context->findUint32Value(tBuffer6.value); - auto optTBuffer7Value = context->findUint32Value(tBuffer7.value); - - if (!optTBuffer4Value || !optTBuffer5Value || !optTBuffer6Value || - !optTBuffer7Value) { - util::unreachable(); - } - - std::uint32_t sbuffer[] = { - *optTBuffer0Value, *optTBuffer1Value, *optTBuffer2Value, - *optTBuffer3Value, *optTBuffer4Value, *optTBuffer5Value, - *optTBuffer6Value, *optTBuffer7Value, - }; - - auto uniform = context->getOrCreateUniformConstant( - sbuffer, std::size(sbuffer), imageTypeId); - uniform->accessOp |= access; - return builder.createLoad(imageType, uniform->variable); -} - -Value Fragment::createCompositeExtract(Value composite, std::uint32_t member) { - auto optCompositeType = context->getTypeIdOf(composite.type); - if (!optCompositeType.has_value()) { - util::unreachable(); - } - - auto compositeType = *optCompositeType; - - TypeId baseType = compositeType.getBaseType(); - std::uint32_t memberCount = compositeType.getElementsCount(); - - if (member >= memberCount) { - util::unreachable(); - } - - auto resultType = context->getType(baseType); - spirv::Value resultValue; - - if (memberCount > 4) { - // stored in array - auto row = member / 4; - auto column = member % 4; - - auto rowType = context->getType( - static_cast(static_cast(baseType) + 3)); - - auto rowValue = - builder.createCompositeExtract(rowType, composite.value, {{row}}); - resultValue = - builder.createCompositeExtract(resultType, rowValue, {{column}}); - } else { - resultValue = - builder.createCompositeExtract(resultType, composite.value, {{member}}); - } - - return {resultType, resultValue}; -} - -spirv::Value Fragment::createBitcast(spirv::Type to, spirv::Type from, - spirv::Value value) { - if (from == to) { - return value; - } - - if (from == context->getUInt8Type()) - value = builder.createUConvert(to, spirv::cast(value)); - - if (from == context->getFloat32Type()) { - if (auto origValue = context->findFloat32Value(value)) { - if (to == context->getUInt32Type()) { - return context->getUInt32(std::bit_cast(*origValue)); - } - - if (to == context->getSint32Type()) { - return context->getSInt32(std::bit_cast(*origValue)); - } - } - } else if (from == context->getUInt32Type()) { - if (auto origValue = context->findUint32Value(value)) { - if (to == context->getFloat32Type()) { - return context->getFloat32(std::bit_cast(*origValue)); - } - - if (to == context->getSint32Type()) { - return context->getSInt32(std::bit_cast(*origValue)); - } - } - } else if (from == context->getSint32Type()) { - if (auto origValue = context->findSint32Value(value)) { - if (to == context->getFloat32Type()) { - return context->getFloat32(std::bit_cast(*origValue)); - } - - if (to == context->getUInt32Type()) { - return context->getUInt32(std::bit_cast(*origValue)); - } - } - } - - if (from == context->getUInt64Type() && to == context->getUInt32Type()) { - util::unreachable(); - } - return builder.createBitcast(to, value); -} - -Value Fragment::getOperand(RegisterId id, TypeId type, OperandGetFlags flags) { - if (id == RegisterId::Scc) { - if (type != TypeId::Bool) { - util::unreachable(); - } - - return getRegister(id); - } - - auto elementsCount = type.getElementsCount(); - - if (elementsCount == 0) { - util::unreachable(); - } - - auto resultType = context->getType(type); - - auto baseTypeId = type.getBaseType(); - auto baseTypeSize = baseTypeId.getSize(); - auto registerCountPerElement = (baseTypeSize + 3) / 4; - auto registerElementsCount = elementsCount * registerCountPerElement; - - if (registerElementsCount == 1 || id.isExport() || id.isAttr()) { - if (flags == OperandGetFlags::PreserveType) { - return getRegister(id); - } else { - return getRegister(id, resultType); - } - } - - if (baseTypeSize < 4) { - util::unreachable(); - } - - auto baseType = context->getType(baseTypeId); - - if (registerCountPerElement == 1) { - std::vector members; - members.reserve(elementsCount); - spirv::Type preservedType; - - for (std::uint32_t i = 0; i < elementsCount; ++i) { - Value member; - - if (flags == OperandGetFlags::PreserveType) { - if (!preservedType) { - member = getRegister(RegisterId::Raw(id + i)); - preservedType = member.type; - } else { - member = getRegister(RegisterId::Raw(id + i), preservedType); - } - } else { - member = getRegister(RegisterId::Raw(id + i), baseType); - } - - members.push_back(member.value); - } - - return {resultType, builder.createCompositeConstruct(resultType, members)}; - } - - if (registerElementsCount != 2) { - util::unreachable(); - } - - TypeId registerType; - - switch (baseTypeId) { - case TypeId::UInt64: - registerType = TypeId::UInt32; - break; - case TypeId::SInt64: - registerType = TypeId::SInt32; - break; - case TypeId::Float64: - registerType = TypeId::Float32; - break; - - default: - util::unreachable(); - } - - if (registerCountPerElement != 2) { - util::unreachable(); - } - - auto uint64T = context->getUInt64Type(); - auto valueLo = builder.createUConvert( - uint64T, - spirv::cast(getOperand(id, TypeId::UInt32).value)); - auto valueHi = builder.createUConvert( - uint64T, spirv::cast( - getOperand(RegisterId::Raw(id + 1), TypeId::UInt32).value)); - valueHi = - builder.createShiftLeftLogical(uint64T, valueHi, context->getUInt32(32)); - auto value = builder.createBitwiseOr(uint64T, valueLo, valueHi); - - if (baseTypeId != TypeId::UInt64) { - value = createBitcast(baseType, context->getUInt64Type(), value); - } - - return {resultType, value}; -} - -void Fragment::setOperand(RegisterId id, Value value) { - if (id.isExport()) { - function->createExport(builder, id.getOffset(), value); - return; - } - - auto typeId = *context->getTypeIdOf(value.type); - auto elementsCount = typeId.getElementsCount(); - - if (elementsCount == 0) { - util::unreachable(); - } - - // if (id.isScalar()) { - // std::printf("update sgpr[%u]\n", id.getOffset()); - // } - - // TODO: handle half types - auto baseTypeId = typeId.getBaseType(); - auto baseTypeSize = baseTypeId.getSize(); - - auto registerCountPerElement = (baseTypeSize + 3) / 4; - auto registerElementsCount = elementsCount * registerCountPerElement; - - if (id == RegisterId::Scc) { - auto boolT = context->getBoolType(); - if (value.type != boolT) { - if (value.type == context->getUInt32Type()) { - if (auto imm = context->findUint32Value(value.value)) { - value.value = *imm ? context->getTrue() : context->getFalse(); - } else { - value.value = builder.createINotEqual(boolT, value.value, - context->getUInt32(0)); - } - } else if (value.type == context->getSint32Type()) { - value.value = - builder.createINotEqual(boolT, value.value, context->getSInt32(0)); - } else if (value.type == context->getUInt64Type()) { - value.value = - builder.createINotEqual(boolT, value.value, context->getUInt64(0)); - } else { - util::unreachable(); - } - - value.type = boolT; - } - - setRegister(id, value); - return; - } - - if (registerElementsCount == 1 || id.isExport() || id.isAttr()) { - setRegister(id, value); - return; - } - - if (baseTypeSize < 4) { - util::unreachable(); - } - - if (registerCountPerElement == 1) { - for (std::uint32_t i = 0; i < elementsCount; ++i) { - auto element = createCompositeExtract(value, i); - auto regId = RegisterId::Raw(id + i); - setRegister(regId, element); - } - } else { - if (elementsCount != 1 || baseTypeId != typeId) { - util::unreachable(); - } - - TypeId registerType; - - switch (baseTypeId) { - case TypeId::UInt64: - registerType = TypeId::UInt32; - break; - case TypeId::SInt64: - registerType = TypeId::SInt32; - break; - case TypeId::Float64: - registerType = TypeId::Float32; - break; - - default: - util::unreachable(); - } - - if (registerCountPerElement != 2) { - util::unreachable(); - } - - auto uint64T = context->getUInt64Type(); - auto uint64_value = spirv::cast(value.value); - if (baseTypeId != TypeId::UInt64) { - uint64_value = spirv::cast( - createBitcast(uint64T, context->getType(baseTypeId), value.value)); - } - - auto uint32T = context->getUInt32Type(); - auto valueLo = builder.createUConvert(uint32T, uint64_value); - auto valueHi = builder.createUConvert( - uint32T, builder.createShiftRightLogical(uint64T, uint64_value, - context->getUInt32(32))); - - setOperand(id, {uint32T, valueLo}); - setOperand(RegisterId::Raw(id.raw + 1), {uint32T, valueHi}); - } -} - -void Fragment::setVcc(Value value) { - // TODO: update vcc hi if needed - // TODO: update vccz - - setOperand(RegisterId::VccLo, value); - setOperand(RegisterId::VccHi, - {context->getUInt32Type(), context->getUInt32(0)}); -} - -void Fragment::setScc(Value value) { - setOperand(RegisterId::Scc, value); - - if (value.type != context->getBoolType() && - value.type != context->getUInt32Type() && - value.type != context->getSint32Type() && - value.type != context->getUInt64Type()) { - util::unreachable(); - } -} - -spirv::BoolValue Fragment::getScc() { - auto result = - getOperand(RegisterId::Scc, TypeId::Bool, OperandGetFlags::PreserveType); - - if (result.type == context->getBoolType()) { - return spirv::cast(result.value); - } - - if (result.type == context->getUInt32Type()) { - return builder.createINotEqual(context->getBoolType(), result.value, - context->getUInt32(0)); - } - if (result.type == context->getSint32Type()) { - return builder.createINotEqual(context->getBoolType(), result.value, - context->getSInt32(0)); - } - if (result.type == context->getUInt64Type()) { - return builder.createINotEqual(context->getBoolType(), result.value, - context->getUInt64(0)); - } - - util::unreachable(); -} -/* -void Fragment::createCallTo(MaterializedFunction *materialized) { - std::vector args; - args.reserve(materialized->args.size()); - - for (auto input : materialized->args) { - auto value = getOperand(input.first, input.second); - args.push_back(value.value); - } - - auto callResultType = materialized->returnType; - - auto callResult = - builder.createFunctionCall(callResultType, materialized->function, args); - if (materialized->results.empty()) { - return; - } - - if (materialized->results.size() == 1) { - setOperand(materialized->results.begin()->first, - Value(callResultType, callResult)); - return; - } - - auto resultTypePointer = context->getBuilder().createTypePointer( - spv::StorageClass::Function, callResultType); - auto resultTypeVariable = - builder.createVariable(resultTypePointer, spv::StorageClass::Function); - builder.createStore(resultTypeVariable, callResult); - - std::uint32_t member = 0; - for (auto [output, typeId] : materialized->results) { - auto pointerType = - context->getPointerType(spv::StorageClass::Function, typeId); - auto valuePointer = builder.createAccessChain( - pointerType, resultTypeVariable, {{context->getUInt32(member++)}}); - - auto elementType = context->getType(typeId); - auto elementValue = builder.createLoad(elementType, valuePointer); - setOperand(output, Value(elementType, elementValue)); - } -} -*/ -void amdgpu::shader::Fragment::convert(std::uint64_t size) { - auto ptr = context->getMemory().getPointer(registers->pc); - auto endptr = ptr + size / sizeof(std::uint32_t); - - context->dependencies->map(registers->pc, registers->pc + size); - - while (ptr < endptr) { - Instruction inst(ptr); - auto startPoint = builder.bodyRegion.getCurrentPosition(); - - std::printf("===============\n"); - inst.dump(); - std::printf("\n"); - convertInstruction(*this, inst); - - std::printf("-------------->\n"); - spirv::dump(builder.bodyRegion.getCurrentPosition() - startPoint); - - ptr += inst.size(); - } -} - -Value amdgpu::shader::Fragment::getRegister(RegisterId id) { - if (id.isScalar()) { - switch (id.getOffset()) { - case 128 ... 192: - return {context->getSint32Type(), context->getSInt32(id - 128)}; - case 193 ... 208: - return {context->getSint32Type(), - context->getSInt32(-static_cast(id - 192))}; - case 240: - return {context->getFloat32Type(), context->getFloat32(0.5f)}; - case 241: - return {context->getFloat32Type(), context->getFloat32(-0.5f)}; - case 242: - return {context->getFloat32Type(), context->getFloat32(1.0f)}; - case 243: - return {context->getFloat32Type(), context->getFloat32(-1.0f)}; - case 244: - return {context->getFloat32Type(), context->getFloat32(2.0f)}; - case 245: - return {context->getFloat32Type(), context->getFloat32(-2.0f)}; - case 246: - return {context->getFloat32Type(), context->getFloat32(4.0f)}; - case 247: - return {context->getFloat32Type(), context->getFloat32(-4.0f)}; - // case 248: - // return {context->getFloat32Type(), context->getFloat32(1 / M_PI * 2)}; - case 255: { - context->dependencies->map(registers->pc, - registers->pc + sizeof(std::uint32_t)); - auto ptr = context->getMemory().getPointer(registers->pc); - registers->pc += sizeof(std::uint32_t); - return {context->getUInt32Type(), context->getUInt32(*ptr)}; - } - } - } - - if (auto result = registers->getRegister(id)) { - return result; - } - - if (id.isExport()) { - util::unreachable(); - } - - // std::printf("creation input %u\n", id.raw); - auto result = function->createInput(id); - assert(result); - values.insert(id); - registers->setRegister(id, result); - return result; -} - -Value amdgpu::shader::Fragment::getRegister(RegisterId id, spirv::Type type) { - auto result = getRegister(id); - - if (!result) { - return result; - } - - if (type == context->getUInt64Type()) { - util::unreachable("%u is ulong\n", id.raw); - } - - return {type, createBitcast(type, result.type, result.value)}; -} - -void amdgpu::shader::Fragment::setRegister(RegisterId id, Value value) { - if (registers->getRegister(id) == value) { - return; - } - - assert(value); - - registers->setRegister(id, value); - outputs.insert(id); - // std::printf("creation output %u\n", id.raw); -} diff --git a/hw/amdgpu/shader/src/Function.cpp b/hw/amdgpu/shader/src/Function.cpp deleted file mode 100644 index 096be61..0000000 --- a/hw/amdgpu/shader/src/Function.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include "Function.hpp" -#include "ConverterContext.hpp" -#include "RegisterId.hpp" - -using namespace amdgpu::shader; - -Value Function::createInput(RegisterId id) { - auto [it, inserted] = inputs.try_emplace(id); - - if (!inserted) { - assert(it->second); - return it->second; - } - - auto offset = id.getOffset(); - - if (id.isScalar()) { - auto uint32T = context->getUInt32Type(); - - if (userSgprs.size() > offset) { - return ((it->second = {uint32T, context->getUInt32(userSgprs[offset])})); - } - - if (stage == Stage::None) { - return ((it->second = - Value{uint32T, builder.createFunctionParameter(uint32T)})); - } - - switch (id.raw) { - case RegisterId::ExecLo: - return ((it->second = {uint32T, context->getUInt32(1)})); - case RegisterId::ExecHi: - return ((it->second = {uint32T, context->getUInt32(0)})); - - case RegisterId::Scc: - return ((it->second = {context->getBoolType(), context->getFalse()})); - - default: - break; - } - - if (stage == Stage::Vertex) { - return ((it->second = {uint32T, context->getUInt32(0)})); - } else if (stage == Stage::Fragment) { - return ((it->second = {uint32T, context->getUInt32(0)})); - } else if (stage == Stage::Compute) { - std::uint32_t offsetAfterSgprs = offset - userSgprs.size(); - if (offsetAfterSgprs < 3) { - auto workgroupIdVar = context->getWorkgroupId(); - auto workgroupId = entryFragment.builder.createLoad( - context->getUint32x3Type(), workgroupIdVar); - for (uint32_t i = 0; i < 3; ++i) { - auto input = entryFragment.builder.createCompositeExtract( - uint32T, workgroupId, {{i}}); - - inputs[RegisterId::Scalar(userSgprs.size() + i)] = {uint32T, input}; - } - - return inputs[id]; - } - - return ((it->second = {uint32T, context->getUInt32(0)})); - } - - util::unreachable(); - } - - if (stage == Stage::None) { - auto float32T = context->getFloat32Type(); - return ( - (it->second = {float32T, builder.createFunctionParameter(float32T)})); - } - - if (stage == Stage::Vertex) { - if (id.isVector()) { - auto uint32T = context->getUInt32Type(); - - if (id.getOffset() == 0) { - auto input = - entryFragment.builder.createLoad(uint32T, context->getThreadId()); - - return ((it->second = {uint32T, input})); - } - - return ((it->second = {uint32T, context->getUInt32(0)})); - } - - util::unreachable("Unexpected vertex input %u. user sgprs count=%zu", - id.raw, userSgprs.size()); - } - - if (stage == Stage::Fragment) { - if (id.isAttr()) { - auto float4T = context->getFloat32x4Type(); - auto input = entryFragment.builder.createLoad( - float4T, context->getIn(id.getOffset())); - return ((it->second = {float4T, input})); - } - - if (id.isVector()) { - switch (offset) { - case 2: - case 3: - case 4: - case 5: { - auto float4T = context->getFloat32x4Type(); - auto floatT = context->getFloat32Type(); - auto fragCoord = - entryFragment.builder.createLoad(float4T, context->getFragCoord()); - return ( - (it->second = {floatT, entryFragment.builder.createCompositeExtract( - floatT, fragCoord, {{offset - 2}})})); - } - } - } - - return ((it->second = {context->getUInt32Type(), context->getUInt32(0)})); - } - - if (stage == Stage::Compute) { - if (id.isVector() && offset < 3) { - auto uint32T = context->getUInt32Type(); - auto localInvocationIdVar = context->getLocalInvocationId(); - auto localInvocationId = entryFragment.builder.createLoad( - context->getUint32x3Type(), localInvocationIdVar); - - for (uint32_t i = 0; i < 3; ++i) { - auto input = entryFragment.builder.createCompositeExtract( - uint32T, localInvocationId, {{i}}); - - inputs[RegisterId::Vector(i)] = {uint32T, input}; - } - - return inputs[id]; - } - - return ((it->second = {context->getUInt32Type(), context->getUInt32(0)})); - } - - util::unreachable(); -} - -void Function::createExport(spirv::BlockBuilder &builder, unsigned index, - Value value) { - if (stage == Stage::Vertex) { - switch (index) { - case 12: { - auto float4OutPtrT = - context->getPointerType(spv::StorageClass::Output, TypeId::Float32x4); - - auto gl_PerVertexPosition = builder.createAccessChain( - float4OutPtrT, context->getPerVertex(), {{context->getSInt32(0)}}); - - if (value.type != context->getFloat32x4Type()) { - util::unreachable(); - } - - builder.createStore(gl_PerVertexPosition, value.value); - return; - } - - case 32 ... 64: { // paramN - if (value.type != context->getFloat32x4Type()) { - util::unreachable(); - } - - builder.createStore(context->getOut(index - 32), value.value); - return; - } - } - - util::unreachable("Unexpected vartex export target %u", index); - } - - if (stage == Stage::Fragment) { - switch (index) { - case 0 ... 7: { - if (value.type != context->getFloat32x4Type()) { - util::unreachable(); - } - - builder.createStore(context->getOut(index), value.value); - return; - } - } - - util::unreachable("Unexpected fragment export target %u", index); - } - - util::unreachable(); -} - -spirv::Type Function::getResultType() { - if (exitFragment.outputs.empty()) { - return context->getVoidType(); - } - - if (exitFragment.outputs.size() == 1) { - return exitFragment.registers->getRegister(*exitFragment.outputs.begin()) - .type; - } - - std::vector members; - members.reserve(exitFragment.outputs.size()); - - for (auto id : exitFragment.outputs) { - members.push_back(exitFragment.registers->getRegister(id).type); - } - - return context->getStructType(members); -} - -spirv::FunctionType Function::getFunctionType() { - if (stage != Stage::None) { - return context->getFunctionType(getResultType(), {}); - } - - std::vector params; - params.reserve(inputs.size()); - - for (auto inp : inputs) { - params.push_back(inp.second.type); - } - - return context->getFunctionType(getResultType(), params); -} - -Fragment *Function::createDetachedFragment() { - auto result = context->createFragment(0); - result->function = this; - return result; -} - -void Function::insertReturn() { - if (exitFragment.outputs.empty()) { - exitFragment.builder.createReturn(); - return; - } - - if (exitFragment.outputs.size() == 1) { - auto value = - exitFragment.registers->getRegister(*exitFragment.outputs.begin()) - .value; - exitFragment.builder.createReturnValue(value); - return; - } - - auto resultType = getResultType(); - - auto resultTypePointer = context->getBuilder().createTypePointer( - spv::StorageClass::Function, resultType); - - auto resultVariable = entryFragment.builder.createVariable( - resultTypePointer, spv::StorageClass::Function); - - std::uint32_t member = 0; - for (auto regId : exitFragment.outputs) { - auto value = exitFragment.registers->getRegister(regId); - auto valueTypeId = context->getTypeIdOf(value.type); - - auto pointerType = - context->getPointerType(spv::StorageClass::Function, *valueTypeId); - auto valuePointer = exitFragment.builder.createAccessChain( - pointerType, resultVariable, - {{exitFragment.context->getUInt32(member++)}}); - - exitFragment.builder.createStore(valuePointer, value.value); - } - - auto resultValue = - exitFragment.builder.createLoad(resultType, resultVariable); - - exitFragment.builder.createReturnValue(resultValue); -} diff --git a/hw/amdgpu/shader/src/Instruction.cpp b/hw/amdgpu/shader/src/Instruction.cpp deleted file mode 100644 index f85f061..0000000 --- a/hw/amdgpu/shader/src/Instruction.cpp +++ /dev/null @@ -1,3161 +0,0 @@ -#include "Instruction.hpp" -#include - -namespace { -using namespace amdgpu::shader; - -int printScalarOperand(int id, const std::uint32_t *inst) { - switch (id) { - case 0 ... 103: - std::printf("sgpr[%d]", id); - return 0; - case 106: - std::printf("VCC_LO"); - return 0; - case 107: - std::printf("VCC_HI"); - return 0; - case 124: - std::printf("M0"); - return 0; - case 126: - std::printf("EXEC_LO"); - return 0; - case 127: - std::printf("EXEC_HI"); - return 0; - case 128 ... 192: - std::printf("%d", id - 128); - return 0; - case 193 ... 208: - std::printf("%d", -static_cast(id - 192)); - return 0; - case 240: - std::printf("0.5"); - return 0; - case 241: - std::printf("-0.5"); - return 0; - case 242: - std::printf("1.0"); - return 0; - case 243: - std::printf("-1.0"); - return 0; - case 244: - std::printf("2.0"); - return 0; - case 245: - std::printf("-2.0"); - return 0; - case 246: - std::printf("4.0"); - return 0; - case 247: - std::printf("-4.0"); - return 0; - case 251: - std::printf("VCCZ"); - return 0; - case 252: - std::printf("EXECZ"); - return 0; - case 253: - std::printf("SCC"); - return 0; - case 254: - std::printf("LDS_DIRECT"); - return 0; - case 255: - std::printf("%08x", *inst); - return 1; - case 256 ... 511: - std::printf("vgpr[%u]", id - 256); - return 0; - } - - std::printf("", id); - return 0; -} - -int printVectorOperand(int id, const std::uint32_t *inst) { - std::printf("vgpr[%u]", id); - return 0; -} - -void printExpTarget(int target) { - switch (target) { - case 0 ... 7: - std::printf("mrt%u", target); - break; - case 8: - std::printf("mrtz"); - break; - case 9: - std::printf("null"); - break; - case 12 ... 15: - std::printf("pos%u", target - 12); - break; - case 32 ... 63: - std::printf("param%u", target - 32); - break; - - default: - std::printf("", target); - break; - } -} - -void printSop1Opcode(Sop1::Op op) { - if (auto string = sop1OpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printSop2Opcode(Sop2::Op op) { - if (auto string = sop2OpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printSopkOpcode(Sopk::Op op) { - if (auto string = sopkOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printSopcOpcode(Sopc::Op op) { - if (auto string = sopcOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printSoppOpcode(Sopp::Op op) { - if (auto string = soppOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printVop2Opcode(Vop2::Op op) { - if (auto string = vop2OpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printVop1Opcode(Vop1::Op op) { - if (auto string = vop1OpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printVopcOpcode(Vopc::Op op) { - if (auto string = vopcOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printVop3Opcode(Vop3::Op op) { - if (auto string = vop3OpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printSmrdOpcode(Smrd::Op op) { - if (auto string = smrdOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printMubufOpcode(Mubuf::Op op) { - if (auto string = mubufOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printMtbufOpcode(Mtbuf::Op op) { - if (auto string = mtbufOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printMimgOpcode(Mimg::Op op) { - if (auto string = mimgOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printDsOpcode(Ds::Op op) { - if (auto string = dsOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} - -void printVintrpOpcode(Vintrp::Op op) { - if (auto string = vintrpOpcodeToString(op)) { - std::printf("%s", string); - } else { - std::printf("", static_cast(op)); - } -} -} // namespace - -const char *amdgpu::shader::sop1OpcodeToString(Sop1::Op op) { - switch (op) { - case Sop1::Op::S_MOV_B32: - return "s_mov_b32"; - case Sop1::Op::S_MOV_B64: - return "s_mov_b64"; - case Sop1::Op::S_CMOV_B32: - return "s_cmov_b32"; - case Sop1::Op::S_CMOV_B64: - return "s_cmov_b64"; - case Sop1::Op::S_NOT_B32: - return "s_not_b32"; - case Sop1::Op::S_NOT_B64: - return "s_not_b64"; - case Sop1::Op::S_WQM_B32: - return "s_wqm_b32"; - case Sop1::Op::S_WQM_B64: - return "s_wqm_b64"; - case Sop1::Op::S_BREV_B32: - return "s_brev_b32"; - case Sop1::Op::S_BREV_B64: - return "s_brev_b64"; - case Sop1::Op::S_BCNT0_I32_B32: - return "s_bcnt0_i32_b32"; - case Sop1::Op::S_BCNT0_I32_B64: - return "s_bcnt0_i32_b64"; - case Sop1::Op::S_BCNT1_I32_B32: - return "s_bcnt1_i32_b32"; - case Sop1::Op::S_BCNT1_I32_B64: - return "s_bcnt1_i32_b64"; - case Sop1::Op::S_FF0_I32_B32: - return "s_ff0_i32_b32"; - case Sop1::Op::S_FF0_I32_B64: - return "s_ff0_i32_b64"; - case Sop1::Op::S_FF1_I32_B32: - return "s_ff1_i32_b32"; - case Sop1::Op::S_FF1_I32_B64: - return "s_ff1_i32_b64"; - case Sop1::Op::S_FLBIT_I32_B32: - return "s_flbit_i32_b32"; - case Sop1::Op::S_FLBIT_I32_B64: - return "s_flbit_i32_b64"; - case Sop1::Op::S_FLBIT_I32: - return "s_flbit_i32"; - case Sop1::Op::S_FLBIT_I32_I64: - return "s_flbit_i32_i64"; - case Sop1::Op::S_SEXT_I32_I8: - return "s_sext_i32_i8"; - case Sop1::Op::S_SEXT_I32_I16: - return "s_sext_i32_i16"; - case Sop1::Op::S_BITSET0_B32: - return "s_bitset0_b32"; - case Sop1::Op::S_BITSET0_B64: - return "s_bitset0_b64"; - case Sop1::Op::S_BITSET1_B32: - return "s_bitset1_b32"; - case Sop1::Op::S_BITSET1_B64: - return "s_bitset1_b64"; - case Sop1::Op::S_GETPC_B64: - return "s_getpc_b64"; - case Sop1::Op::S_SETPC_B64: - return "s_setpc_b64"; - case Sop1::Op::S_SWAPPC_B64: - return "s_swappc_b64"; - case Sop1::Op::S_RFE_B64: - return "s_rfe_b64"; - case Sop1::Op::S_AND_SAVEEXEC_B64: - return "s_and_saveexec_b64"; - case Sop1::Op::S_OR_SAVEEXEC_B64: - return "s_or_saveexec_b64"; - case Sop1::Op::S_XOR_SAVEEXEC_B64: - return "s_xor_saveexec_b64"; - case Sop1::Op::S_ANDN2_SAVEEXEC_B64: - return "s_andn2_saveexec_b64"; - case Sop1::Op::S_ORN2_SAVEEXEC_B64: - return "s_orn2_saveexec_b64"; - case Sop1::Op::S_NAND_SAVEEXEC_B64: - return "s_nand_saveexec_b64"; - case Sop1::Op::S_NOR_SAVEEXEC_B64: - return "s_nor_saveexec_b64"; - case Sop1::Op::S_XNOR_SAVEEXEC_B64: - return "s_xnor_saveexec_b64"; - case Sop1::Op::S_QUADMASK_B32: - return "s_quadmask_b32"; - case Sop1::Op::S_QUADMASK_B64: - return "s_quadmask_b64"; - case Sop1::Op::S_MOVRELS_B32: - return "s_movrels_b32"; - case Sop1::Op::S_MOVRELS_B64: - return "s_movrels_b64"; - case Sop1::Op::S_MOVRELD_B32: - return "s_movreld_b32"; - case Sop1::Op::S_MOVRELD_B64: - return "s_movreld_b64"; - case Sop1::Op::S_CBRANCH_JOIN: - return "s_cbranch_join"; - case Sop1::Op::S_ABS_I32: - return "s_abs_i32"; - case Sop1::Op::S_MOV_FED_B32: - return "s_mov_fed_b32"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::sop2OpcodeToString(Sop2::Op op) { - switch (op) { - case Sop2::Op::S_ADD_U32: - return "s_add_u32"; - case Sop2::Op::S_SUB_U32: - return "s_sub_u32"; - case Sop2::Op::S_ADD_I32: - return "s_add_i32"; - case Sop2::Op::S_SUB_I32: - return "s_sub_i32"; - case Sop2::Op::S_ADDC_U32: - return "s_addc_u32"; - case Sop2::Op::S_SUBB_U32: - return "s_subb_u32"; - case Sop2::Op::S_MIN_I32: - return "s_min_i32"; - case Sop2::Op::S_MIN_U32: - return "s_min_u32"; - case Sop2::Op::S_MAX_I32: - return "s_max_i32"; - case Sop2::Op::S_MAX_U32: - return "s_max_u32"; - case Sop2::Op::S_CSELECT_B32: - return "s_cselect_b32"; - case Sop2::Op::S_CSELECT_B64: - return "s_cselect_b64"; - case Sop2::Op::S_AND_B32: - return "s_and_b32"; - case Sop2::Op::S_AND_B64: - return "s_and_b64"; - case Sop2::Op::S_OR_B32: - return "s_or_b32"; - case Sop2::Op::S_OR_B64: - return "s_or_b64"; - case Sop2::Op::S_XOR_B32: - return "s_xor_b32"; - case Sop2::Op::S_XOR_B64: - return "s_xor_b64"; - case Sop2::Op::S_ANDN2_B32: - return "s_andn2_b32"; - case Sop2::Op::S_ANDN2_B64: - return "s_andn2_b64"; - case Sop2::Op::S_ORN2_B32: - return "s_orn2_b32"; - case Sop2::Op::S_ORN2_B64: - return "s_orn2_b64"; - case Sop2::Op::S_NAND_B32: - return "s_nand_b32"; - case Sop2::Op::S_NAND_B64: - return "s_nand_b64"; - case Sop2::Op::S_NOR_B32: - return "s_nor_b32"; - case Sop2::Op::S_NOR_B64: - return "s_nor_b64"; - case Sop2::Op::S_XNOR_B32: - return "s_xnor_b32"; - case Sop2::Op::S_XNOR_B64: - return "s_xnor_b64"; - case Sop2::Op::S_LSHL_B32: - return "s_lshl_b32"; - case Sop2::Op::S_LSHL_B64: - return "s_lshl_b64"; - case Sop2::Op::S_LSHR_B32: - return "s_lshr_b32"; - case Sop2::Op::S_LSHR_B64: - return "s_lshr_b64"; - case Sop2::Op::S_ASHR_I32: - return "s_ashr_i32"; - case Sop2::Op::S_ASHR_I64: - return "s_ashr_i64"; - case Sop2::Op::S_BFM_B32: - return "s_bfm_b32"; - case Sop2::Op::S_BFM_B64: - return "s_bfm_b64"; - case Sop2::Op::S_MUL_I32: - return "s_mul_i32"; - case Sop2::Op::S_BFE_U32: - return "s_bfe_u32"; - case Sop2::Op::S_BFE_I32: - return "s_bfe_i32"; - case Sop2::Op::S_BFE_U64: - return "s_bfe_u64"; - case Sop2::Op::S_BFE_I64: - return "s_bfe_i64"; - case Sop2::Op::S_CBRANCH_G_FORK: - return "s_cbranch_g_fork"; - case Sop2::Op::S_ABSDIFF_I32: - return "s_absdiff_i32"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::sopkOpcodeToString(Sopk::Op op) { - switch (op) { - case Sopk::Op::S_MOVK_I32: - return "s_movk_i32"; - case Sopk::Op::S_CMOVK_I32: - return "s_cmovk_i32"; - case Sopk::Op::S_CMPK_EQ_I32: - return "s_cmpk_eq_i32"; - case Sopk::Op::S_CMPK_LG_I32: - return "s_cmpk_lg_i32"; - case Sopk::Op::S_CMPK_GT_I32: - return "s_cmpk_gt_i32"; - case Sopk::Op::S_CMPK_GE_I32: - return "s_cmpk_ge_i32"; - case Sopk::Op::S_CMPK_LT_I32: - return "s_cmpk_lt_i32"; - case Sopk::Op::S_CMPK_LE_I32: - return "s_cmpk_le_i32"; - case Sopk::Op::S_CMPK_EQ_U32: - return "s_cmpk_eq_u32"; - case Sopk::Op::S_CMPK_LG_U32: - return "s_cmpk_lg_u32"; - case Sopk::Op::S_CMPK_GT_U32: - return "s_cmpk_gt_u32"; - case Sopk::Op::S_CMPK_GE_U32: - return "s_cmpk_ge_u32"; - case Sopk::Op::S_CMPK_LT_U32: - return "s_cmpk_lt_u32"; - case Sopk::Op::S_CMPK_LE_U32: - return "s_cmpk_le_u32"; - case Sopk::Op::S_ADDK_I32: - return "s_addk_i32"; - case Sopk::Op::S_MULK_I32: - return "s_mulk_i32"; - case Sopk::Op::S_CBRANCH_I_FORK: - return "s_cbranch_i_fork"; - case Sopk::Op::S_GETREG_B32: - return "s_getreg_b32"; - case Sopk::Op::S_SETREG_B32: - return "s_setreg_b32"; - case Sopk::Op::S_SETREG_IMM: - return "s_setreg_imm"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::sopcOpcodeToString(Sopc::Op op) { - switch (op) { - case Sopc::Op::S_CMP_EQ_I32: - return "s_cmp_eq_i32"; - case Sopc::Op::S_CMP_LG_I32: - return "s_cmp_lg_i32"; - case Sopc::Op::S_CMP_GT_I32: - return "s_cmp_gt_i32"; - case Sopc::Op::S_CMP_GE_I32: - return "s_cmp_ge_i32"; - case Sopc::Op::S_CMP_LT_I32: - return "s_cmp_lt_i32"; - case Sopc::Op::S_CMP_LE_I32: - return "s_cmp_le_i32"; - case Sopc::Op::S_CMP_EQ_U32: - return "s_cmp_eq_u32"; - case Sopc::Op::S_CMP_LG_U32: - return "s_cmp_lg_u32"; - case Sopc::Op::S_CMP_GT_U32: - return "s_cmp_gt_u32"; - case Sopc::Op::S_CMP_GE_U32: - return "s_cmp_ge_u32"; - case Sopc::Op::S_CMP_LT_U32: - return "s_cmp_lt_u32"; - case Sopc::Op::S_CMP_LE_U32: - return "s_cmp_le_u32"; - case Sopc::Op::S_BITCMP0_B32: - return "s_bitcmp0_b32"; - case Sopc::Op::S_BITCMP1_B32: - return "s_bitcmp1_b32"; - case Sopc::Op::S_BITCMP0_B64: - return "s_bitcmp0_b64"; - case Sopc::Op::S_BITCMP1_B64: - return "s_bitcmp1_b64"; - case Sopc::Op::S_SETVSKIP: - return "s_setvskip"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::soppOpcodeToString(Sopp::Op op) { - switch (op) { - case Sopp::Op::S_NOP: - return "s_nop"; - case Sopp::Op::S_ENDPGM: - return "s_endpgm"; - case Sopp::Op::S_BRANCH: - return "s_branch"; - case Sopp::Op::S_CBRANCH_SCC0: - return "s_cbranch_scc0"; - case Sopp::Op::S_CBRANCH_SCC1: - return "s_cbranch_scc1"; - case Sopp::Op::S_CBRANCH_VCCZ: - return "s_cbranch_vccz"; - case Sopp::Op::S_CBRANCH_VCCNZ: - return "s_cbranch_vccnz"; - case Sopp::Op::S_CBRANCH_EXECZ: - return "s_cbranch_execz"; - case Sopp::Op::S_CBRANCH_EXECNZ: - return "s_cbranch_execnz"; - case Sopp::Op::S_BARRIER: - return "s_barrier"; - case Sopp::Op::S_WAITCNT: - return "s_waitcnt"; - case Sopp::Op::S_SETHALT: - return "s_sethalt"; - case Sopp::Op::S_SLEEP: - return "s_sleep"; - case Sopp::Op::S_SETPRIO: - return "s_setprio"; - case Sopp::Op::S_SENDMSG: - return "s_sendmsg"; - case Sopp::Op::S_SENDMSGHALT: - return "s_sendmsghalt"; - case Sopp::Op::S_TRAP: - return "s_trap"; - case Sopp::Op::S_ICACHE_INV: - return "s_icache_inv"; - case Sopp::Op::S_INCPERFLEVEL: - return "s_incperflevel"; - case Sopp::Op::S_DECPERFLEVEL: - return "s_decperflevel"; - case Sopp::Op::S_TTRACEDATA: - return "s_ttracedata"; - case Sopp::Op::S_CBRANCH_CDBGSYS: - return "s_cbranch_cdbgsys"; - case Sopp::Op::S_CBRANCH_CDBGUSER: - return "s_cbranch_cdbguser"; - case Sopp::Op::S_CBRANCH_CDBGSYS_OR_USER: - return "s_cbranch_cdbgsys_or_user"; - case Sopp::Op::S_CBRANCH_CDBGSYS_AND_USER: - return "s_cbranch_cdbgsys_and_user"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::vop2OpcodeToString(Vop2::Op op) { - switch (op) { - case Vop2::Op::V_CNDMASK_B32: - return "v_cndmask_b32"; - case Vop2::Op::V_READLANE_B32: - return "v_readlane_b32"; - case Vop2::Op::V_WRITELANE_B32: - return "v_writelane_b32"; - case Vop2::Op::V_ADD_F32: - return "v_add_f32"; - case Vop2::Op::V_SUB_F32: - return "v_sub_f32"; - case Vop2::Op::V_SUBREV_F32: - return "v_subrev_f32"; - case Vop2::Op::V_MAC_LEGACY_F32: - return "v_mac_legacy_f32"; - case Vop2::Op::V_MUL_LEGACY_F32: - return "v_mul_legacy_f32"; - case Vop2::Op::V_MUL_F32: - return "v_mul_f32"; - case Vop2::Op::V_MUL_I32_I24: - return "v_mul_i32_i24"; - case Vop2::Op::V_MUL_HI_I32_I24: - return "v_mul_hi_i32_i24"; - case Vop2::Op::V_MUL_U32_U24: - return "v_mul_u32_u24"; - case Vop2::Op::V_MUL_HI_U32_U24: - return "v_mul_hi_u32_u24"; - case Vop2::Op::V_MIN_LEGACY_F32: - return "v_min_legacy_f32"; - case Vop2::Op::V_MAX_LEGACY_F32: - return "v_max_legacy_f32"; - case Vop2::Op::V_MIN_F32: - return "v_min_f32"; - case Vop2::Op::V_MAX_F32: - return "v_max_f32"; - case Vop2::Op::V_MIN_I32: - return "v_min_i32"; - case Vop2::Op::V_MAX_I32: - return "v_max_i32"; - case Vop2::Op::V_MIN_U32: - return "v_min_u32"; - case Vop2::Op::V_MAX_U32: - return "v_max_u32"; - case Vop2::Op::V_LSHR_B32: - return "v_lshr_b32"; - case Vop2::Op::V_LSHRREV_B32: - return "v_lshrrev_b32"; - case Vop2::Op::V_ASHR_I32: - return "v_ashr_i32"; - case Vop2::Op::V_ASHRREV_I32: - return "v_ashrrev_i32"; - case Vop2::Op::V_LSHL_B32: - return "v_lshl_b32"; - case Vop2::Op::V_LSHLREV_B32: - return "v_lshlrev_b32"; - case Vop2::Op::V_AND_B32: - return "v_and_b32"; - case Vop2::Op::V_OR_B32: - return "v_or_b32"; - case Vop2::Op::V_XOR_B32: - return "v_xor_b32"; - case Vop2::Op::V_BFM_B32: - return "v_bfm_b32"; - case Vop2::Op::V_MAC_F32: - return "v_mac_f32"; - case Vop2::Op::V_MADMK_F32: - return "v_madmk_f32"; - case Vop2::Op::V_MADAK_F32: - return "v_madak_f32"; - case Vop2::Op::V_BCNT_U32_B32: - return "v_bcnt_u32_b32"; - case Vop2::Op::V_MBCNT_LO_U32_B32: - return "v_mbcnt_lo_u32_b32"; - case Vop2::Op::V_MBCNT_HI_U32_B32: - return "v_mbcnt_hi_u32_b32"; - case Vop2::Op::V_ADD_I32: - return "v_add_i32"; - case Vop2::Op::V_SUB_I32: - return "v_sub_i32"; - case Vop2::Op::V_SUBREV_I32: - return "v_subrev_i32"; - case Vop2::Op::V_ADDC_U32: - return "v_addc_u32"; - case Vop2::Op::V_SUBB_U32: - return "v_subb_u32"; - case Vop2::Op::V_SUBBREV_U32: - return "v_subbrev_u32"; - case Vop2::Op::V_LDEXP_F32: - return "v_ldexp_f32"; - case Vop2::Op::V_CVT_PKACCUM_U8_F32: - return "v_cvt_pkaccum_u8_f32"; - case Vop2::Op::V_CVT_PKNORM_I16_F32: - return "v_cvt_pknorm_i16_f32"; - case Vop2::Op::V_CVT_PKNORM_U16_F32: - return "v_cvt_pknorm_u16_f32"; - case Vop2::Op::V_CVT_PKRTZ_F16_F32: - return "v_cvt_pkrtz_f16_f32"; - case Vop2::Op::V_CVT_PK_U16_U32: - return "v_cvt_pk_u16_u32"; - case Vop2::Op::V_CVT_PK_I16_I32: - return "v_cvt_pk_i16_i32"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::vop1OpcodeToString(Vop1::Op op) { - switch (op) { - case Vop1::Op::V_NOP: - return "v_nop"; - case Vop1::Op::V_MOV_B32: - return "v_mov_b32"; - case Vop1::Op::V_READFIRSTLANE_B32: - return "v_readfirstlane_b32"; - case Vop1::Op::V_CVT_I32_F64: - return "v_cvt_i32_f64"; - case Vop1::Op::V_CVT_F64_I32: - return "v_cvt_f64_i32"; - case Vop1::Op::V_CVT_F32_I32: - return "v_cvt_f32_i32"; - case Vop1::Op::V_CVT_F32_U32: - return "v_cvt_f32_u32"; - case Vop1::Op::V_CVT_U32_F32: - return "v_cvt_u32_f32"; - case Vop1::Op::V_CVT_I32_F32: - return "v_cvt_i32_f32"; - case Vop1::Op::V_MOV_FED_B32: - return "v_mov_fed_b32"; - case Vop1::Op::V_CVT_F16_F32: - return "v_cvt_f16_f32"; - case Vop1::Op::V_CVT_F32_F16: - return "v_cvt_f32_f16"; - case Vop1::Op::V_CVT_RPI_I32_F32: - return "v_cvt_rpi_i32_f32"; - case Vop1::Op::V_CVT_FLR_I32_F32: - return "v_cvt_flr_i32_f32"; - case Vop1::Op::V_CVT_OFF_F32_I4: - return "v_cvt_off_f32_i4"; - case Vop1::Op::V_CVT_F32_F64: - return "v_cvt_f32_f64"; - case Vop1::Op::V_CVT_F64_F32: - return "v_cvt_f64_f32"; - case Vop1::Op::V_CVT_F32_UBYTE0: - return "v_cvt_f32_ubyte0"; - case Vop1::Op::V_CVT_F32_UBYTE1: - return "v_cvt_f32_ubyte1"; - case Vop1::Op::V_CVT_F32_UBYTE2: - return "v_cvt_f32_ubyte2"; - case Vop1::Op::V_CVT_F32_UBYTE3: - return "v_cvt_f32_ubyte3"; - case Vop1::Op::V_CVT_U32_F64: - return "v_cvt_u32_f64"; - case Vop1::Op::V_CVT_F64_U32: - return "v_cvt_f64_u32"; - case Vop1::Op::V_FRACT_F32: - return "v_fract_f32"; - case Vop1::Op::V_TRUNC_F32: - return "v_trunc_f32"; - case Vop1::Op::V_CEIL_F32: - return "v_ceil_f32"; - case Vop1::Op::V_RNDNE_F32: - return "v_rndne_f32"; - case Vop1::Op::V_FLOOR_F32: - return "v_floor_f32"; - case Vop1::Op::V_EXP_F32: - return "v_exp_f32"; - case Vop1::Op::V_LOG_CLAMP_F32: - return "v_log_clamp_f32"; - case Vop1::Op::V_LOG_F32: - return "v_log_f32"; - case Vop1::Op::V_RCP_CLAMP_F32: - return "v_rcp_clamp_f32"; - case Vop1::Op::V_RCP_LEGACY_F32: - return "v_rcp_legacy_f32"; - case Vop1::Op::V_RCP_F32: - return "v_rcp_f32"; - case Vop1::Op::V_RCP_IFLAG_F32: - return "v_rcp_iflag_f32"; - case Vop1::Op::V_RSQ_CLAMP_F32: - return "v_rsq_clamp_f32"; - case Vop1::Op::V_RSQ_LEGACY_F32: - return "v_rsq_legacy_f32"; - case Vop1::Op::V_RSQ_F32: - return "v_rsq_f32"; - case Vop1::Op::V_RCP_F64: - return "v_rcp_f64"; - case Vop1::Op::V_RCP_CLAMP_F64: - return "v_rcp_clamp_f64"; - case Vop1::Op::V_RSQ_F64: - return "v_rsq_f64"; - case Vop1::Op::V_RSQ_CLAMP_F64: - return "v_rsq_clamp_f64"; - case Vop1::Op::V_SQRT_F32: - return "v_sqrt_f32"; - case Vop1::Op::V_SQRT_F64: - return "v_sqrt_f64"; - case Vop1::Op::V_SIN_F32: - return "v_sin_f32"; - case Vop1::Op::V_COS_F32: - return "v_cos_f32"; - case Vop1::Op::V_NOT_B32: - return "v_not_b32"; - case Vop1::Op::V_BFREV_B32: - return "v_bfrev_b32"; - case Vop1::Op::V_FFBH_U32: - return "v_ffbh_u32"; - case Vop1::Op::V_FFBL_B32: - return "v_ffbl_b32"; - case Vop1::Op::V_FFBH_I32: - return "v_ffbh_i32"; - case Vop1::Op::V_FREXP_EXP_I32_F64: - return "v_frexp_exp_i32_f64"; - case Vop1::Op::V_FREXP_MANT_F64: - return "v_frexp_mant_f64"; - case Vop1::Op::V_FRACT_F64: - return "v_fract_f64"; - case Vop1::Op::V_FREXP_EXP_I32_F32: - return "v_frexp_exp_i32_f32"; - case Vop1::Op::V_FREXP_MANT_F32: - return "v_frexp_mant_f32"; - case Vop1::Op::V_CLREXCP: - return "v_clrexcp"; - case Vop1::Op::V_MOVRELD_B32: - return "v_movreld_b32"; - case Vop1::Op::V_MOVRELS_B32: - return "v_movrels_b32"; - case Vop1::Op::V_MOVRELSD_B32: - return "v_movrelsd_b32"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::vopcOpcodeToString(Vopc::Op op) { - switch (op) { - case Vopc::Op::V_CMP_F_F32: - return "v_cmp_f_f32"; - case Vopc::Op::V_CMP_LT_F32: - return "v_cmp_lt_f32"; - case Vopc::Op::V_CMP_EQ_F32: - return "v_cmp_eq_f32"; - case Vopc::Op::V_CMP_LE_F32: - return "v_cmp_le_f32"; - case Vopc::Op::V_CMP_GT_F32: - return "v_cmp_gt_f32"; - case Vopc::Op::V_CMP_LG_F32: - return "v_cmp_lg_f32"; - case Vopc::Op::V_CMP_GE_F32: - return "v_cmp_ge_f32"; - case Vopc::Op::V_CMP_O_F32: - return "v_cmp_o_f32"; - case Vopc::Op::V_CMP_U_F32: - return "v_cmp_u_f32"; - case Vopc::Op::V_CMP_NGE_F32: - return "v_cmp_nge_f32"; - case Vopc::Op::V_CMP_NLG_F32: - return "v_cmp_nlg_f32"; - case Vopc::Op::V_CMP_NGT_F32: - return "v_cmp_ngt_f32"; - case Vopc::Op::V_CMP_NLE_F32: - return "v_cmp_nle_f32"; - case Vopc::Op::V_CMP_NEQ_F32: - return "v_cmp_neq_f32"; - case Vopc::Op::V_CMP_NLT_F32: - return "v_cmp_nlt_f32"; - case Vopc::Op::V_CMP_TRU_F32: - return "v_cmp_tru_f32"; - case Vopc::Op::V_CMPX_F_F32: - return "v_cmpx_f_f32"; - case Vopc::Op::V_CMPX_LT_F32: - return "v_cmpx_lt_f32"; - case Vopc::Op::V_CMPX_EQ_F32: - return "v_cmpx_eq_f32"; - case Vopc::Op::V_CMPX_LE_F32: - return "v_cmpx_le_f32"; - case Vopc::Op::V_CMPX_GT_F32: - return "v_cmpx_gt_f32"; - case Vopc::Op::V_CMPX_LG_F32: - return "v_cmpx_lg_f32"; - case Vopc::Op::V_CMPX_GE_F32: - return "v_cmpx_ge_f32"; - case Vopc::Op::V_CMPX_O_F32: - return "v_cmpx_o_f32"; - case Vopc::Op::V_CMPX_U_F32: - return "v_cmpx_u_f32"; - case Vopc::Op::V_CMPX_NGE_F32: - return "v_cmpx_nge_f32"; - case Vopc::Op::V_CMPX_NLG_F32: - return "v_cmpx_nlg_f32"; - case Vopc::Op::V_CMPX_NGT_F32: - return "v_cmpx_ngt_f32"; - case Vopc::Op::V_CMPX_NLE_F32: - return "v_cmpx_nle_f32"; - case Vopc::Op::V_CMPX_NEQ_F32: - return "v_cmpx_neq_f32"; - case Vopc::Op::V_CMPX_NLT_F32: - return "v_cmpx_nlt_f32"; - case Vopc::Op::V_CMPX_TRU_F32: - return "v_cmpx_tru_f32"; - case Vopc::Op::V_CMP_F_F64: - return "v_cmp_f_f64"; - case Vopc::Op::V_CMP_LT_F64: - return "v_cmp_lt_f64"; - case Vopc::Op::V_CMP_EQ_F64: - return "v_cmp_eq_f64"; - case Vopc::Op::V_CMP_LE_F64: - return "v_cmp_le_f64"; - case Vopc::Op::V_CMP_GT_F64: - return "v_cmp_gt_f64"; - case Vopc::Op::V_CMP_LG_F64: - return "v_cmp_lg_f64"; - case Vopc::Op::V_CMP_GE_F64: - return "v_cmp_ge_f64"; - case Vopc::Op::V_CMP_O_F64: - return "v_cmp_o_f64"; - case Vopc::Op::V_CMP_U_F64: - return "v_cmp_u_f64"; - case Vopc::Op::V_CMP_NGE_F64: - return "v_cmp_nge_f64"; - case Vopc::Op::V_CMP_NLG_F64: - return "v_cmp_nlg_f64"; - case Vopc::Op::V_CMP_NGT_F64: - return "v_cmp_ngt_f64"; - case Vopc::Op::V_CMP_NLE_F64: - return "v_cmp_nle_f64"; - case Vopc::Op::V_CMP_NEQ_F64: - return "v_cmp_neq_f64"; - case Vopc::Op::V_CMP_NLT_F64: - return "v_cmp_nlt_f64"; - case Vopc::Op::V_CMP_TRU_F64: - return "v_cmp_tru_f64"; - case Vopc::Op::V_CMPX_F_F64: - return "v_cmpx_f_f64"; - case Vopc::Op::V_CMPX_LT_F64: - return "v_cmpx_lt_f64"; - case Vopc::Op::V_CMPX_EQ_F64: - return "v_cmpx_eq_f64"; - case Vopc::Op::V_CMPX_LE_F64: - return "v_cmpx_le_f64"; - case Vopc::Op::V_CMPX_GT_F64: - return "v_cmpx_gt_f64"; - case Vopc::Op::V_CMPX_LG_F64: - return "v_cmpx_lg_f64"; - case Vopc::Op::V_CMPX_GE_F64: - return "v_cmpx_ge_f64"; - case Vopc::Op::V_CMPX_O_F64: - return "v_cmpx_o_f64"; - case Vopc::Op::V_CMPX_U_F64: - return "v_cmpx_u_f64"; - case Vopc::Op::V_CMPX_NGE_F64: - return "v_cmpx_nge_f64"; - case Vopc::Op::V_CMPX_NLG_F64: - return "v_cmpx_nlg_f64"; - case Vopc::Op::V_CMPX_NGT_F64: - return "v_cmpx_ngt_f64"; - case Vopc::Op::V_CMPX_NLE_F64: - return "v_cmpx_nle_f64"; - case Vopc::Op::V_CMPX_NEQ_F64: - return "v_cmpx_neq_f64"; - case Vopc::Op::V_CMPX_NLT_F64: - return "v_cmpx_nlt_f64"; - case Vopc::Op::V_CMPX_TRU_F64: - return "v_cmpx_tru_f64"; - case Vopc::Op::V_CMPS_F_F32: - return "v_cmps_f_f32"; - case Vopc::Op::V_CMPS_LT_F32: - return "v_cmps_lt_f32"; - case Vopc::Op::V_CMPS_EQ_F32: - return "v_cmps_eq_f32"; - case Vopc::Op::V_CMPS_LE_F32: - return "v_cmps_le_f32"; - case Vopc::Op::V_CMPS_GT_F32: - return "v_cmps_gt_f32"; - case Vopc::Op::V_CMPS_LG_F32: - return "v_cmps_lg_f32"; - case Vopc::Op::V_CMPS_GE_F32: - return "v_cmps_ge_f32"; - case Vopc::Op::V_CMPS_O_F32: - return "v_cmps_o_f32"; - case Vopc::Op::V_CMPS_U_F32: - return "v_cmps_u_f32"; - case Vopc::Op::V_CMPS_NGE_F32: - return "v_cmps_nge_f32"; - case Vopc::Op::V_CMPS_NLG_F32: - return "v_cmps_nlg_f32"; - case Vopc::Op::V_CMPS_NGT_F32: - return "v_cmps_ngt_f32"; - case Vopc::Op::V_CMPS_NLE_F32: - return "v_cmps_nle_f32"; - case Vopc::Op::V_CMPS_NEQ_F32: - return "v_cmps_neq_f32"; - case Vopc::Op::V_CMPS_NLT_F32: - return "v_cmps_nlt_f32"; - case Vopc::Op::V_CMPS_TRU_F32: - return "v_cmps_tru_f32"; - case Vopc::Op::V_CMPSX_F_F32: - return "v_cmpsx_f_f32"; - case Vopc::Op::V_CMPSX_LT_F32: - return "v_cmpsx_lt_f32"; - case Vopc::Op::V_CMPSX_EQ_F32: - return "v_cmpsx_eq_f32"; - case Vopc::Op::V_CMPSX_LE_F32: - return "v_cmpsx_le_f32"; - case Vopc::Op::V_CMPSX_GT_F32: - return "v_cmpsx_gt_f32"; - case Vopc::Op::V_CMPSX_LG_F32: - return "v_cmpsx_lg_f32"; - case Vopc::Op::V_CMPSX_GE_F32: - return "v_cmpsx_ge_f32"; - case Vopc::Op::V_CMPSX_O_F32: - return "v_cmpsx_o_f32"; - case Vopc::Op::V_CMPSX_U_F32: - return "v_cmpsx_u_f32"; - case Vopc::Op::V_CMPSX_NGE_F32: - return "v_cmpsx_nge_f32"; - case Vopc::Op::V_CMPSX_NLG_F32: - return "v_cmpsx_nlg_f32"; - case Vopc::Op::V_CMPSX_NGT_F32: - return "v_cmpsx_ngt_f32"; - case Vopc::Op::V_CMPSX_NLE_F32: - return "v_cmpsx_nle_f32"; - case Vopc::Op::V_CMPSX_NEQ_F32: - return "v_cmpsx_neq_f32"; - case Vopc::Op::V_CMPSX_NLT_F32: - return "v_cmpsx_nlt_f32"; - case Vopc::Op::V_CMPSX_TRU_F32: - return "v_cmpsx_tru_f32"; - case Vopc::Op::V_CMPS_F_F64: - return "v_cmps_f_f64"; - case Vopc::Op::V_CMPS_LT_F64: - return "v_cmps_lt_f64"; - case Vopc::Op::V_CMPS_EQ_F64: - return "v_cmps_eq_f64"; - case Vopc::Op::V_CMPS_LE_F64: - return "v_cmps_le_f64"; - case Vopc::Op::V_CMPS_GT_F64: - return "v_cmps_gt_f64"; - case Vopc::Op::V_CMPS_LG_F64: - return "v_cmps_lg_f64"; - case Vopc::Op::V_CMPS_GE_F64: - return "v_cmps_ge_f64"; - case Vopc::Op::V_CMPS_O_F64: - return "v_cmps_o_f64"; - case Vopc::Op::V_CMPS_U_F64: - return "v_cmps_u_f64"; - case Vopc::Op::V_CMPS_NGE_F64: - return "v_cmps_nge_f64"; - case Vopc::Op::V_CMPS_NLG_F64: - return "v_cmps_nlg_f64"; - case Vopc::Op::V_CMPS_NGT_F64: - return "v_cmps_ngt_f64"; - case Vopc::Op::V_CMPS_NLE_F64: - return "v_cmps_nle_f64"; - case Vopc::Op::V_CMPS_NEQ_F64: - return "v_cmps_neq_f64"; - case Vopc::Op::V_CMPS_NLT_F64: - return "v_cmps_nlt_f64"; - case Vopc::Op::V_CMPS_TRU_F64: - return "v_cmps_tru_f64"; - case Vopc::Op::V_CMPSX_F_F64: - return "v_cmpsx_f_f64"; - case Vopc::Op::V_CMPSX_LT_F64: - return "v_cmpsx_lt_f64"; - case Vopc::Op::V_CMPSX_EQ_F64: - return "v_cmpsx_eq_f64"; - case Vopc::Op::V_CMPSX_LE_F64: - return "v_cmpsx_le_f64"; - case Vopc::Op::V_CMPSX_GT_F64: - return "v_cmpsx_gt_f64"; - case Vopc::Op::V_CMPSX_LG_F64: - return "v_cmpsx_lg_f64"; - case Vopc::Op::V_CMPSX_GE_F64: - return "v_cmpsx_ge_f64"; - case Vopc::Op::V_CMPSX_O_F64: - return "v_cmpsx_o_f64"; - case Vopc::Op::V_CMPSX_U_F64: - return "v_cmpsx_u_f64"; - case Vopc::Op::V_CMPSX_NGE_F64: - return "v_cmpsx_nge_f64"; - case Vopc::Op::V_CMPSX_NLG_F64: - return "v_cmpsx_nlg_f64"; - case Vopc::Op::V_CMPSX_NGT_F64: - return "v_cmpsx_ngt_f64"; - case Vopc::Op::V_CMPSX_NLE_F64: - return "v_cmpsx_nle_f64"; - case Vopc::Op::V_CMPSX_NEQ_F64: - return "v_cmpsx_neq_f64"; - case Vopc::Op::V_CMPSX_NLT_F64: - return "v_cmpsx_nlt_f64"; - case Vopc::Op::V_CMPSX_TRU_F64: - return "v_cmpsx_tru_f64"; - case Vopc::Op::V_CMP_F_I32: - return "v_cmp_f_i32"; - case Vopc::Op::V_CMP_LT_I32: - return "v_cmp_lt_i32"; - case Vopc::Op::V_CMP_EQ_I32: - return "v_cmp_eq_i32"; - case Vopc::Op::V_CMP_LE_I32: - return "v_cmp_le_i32"; - case Vopc::Op::V_CMP_GT_I32: - return "v_cmp_gt_i32"; - case Vopc::Op::V_CMP_NE_I32: - return "v_cmp_ne_i32"; - case Vopc::Op::V_CMP_GE_I32: - return "v_cmp_ge_i32"; - case Vopc::Op::V_CMP_T_I32: - return "v_cmp_t_i32"; - case Vopc::Op::V_CMP_CLASS_F32: - return "v_cmp_class_f32"; - case Vopc::Op::V_CMP_LT_I16: - return "v_cmp_lt_i16"; - case Vopc::Op::V_CMP_EQ_I16: - return "v_cmp_eq_i16"; - case Vopc::Op::V_CMP_LE_I16: - return "v_cmp_le_i16"; - case Vopc::Op::V_CMP_GT_I16: - return "v_cmp_gt_i16"; - case Vopc::Op::V_CMP_NE_I16: - return "v_cmp_ne_i16"; - case Vopc::Op::V_CMP_GE_I16: - return "v_cmp_ge_i16"; - case Vopc::Op::V_CMP_CLASS_F16: - return "v_cmp_class_f16"; - case Vopc::Op::V_CMPX_F_I32: - return "v_cmpx_f_i32"; - case Vopc::Op::V_CMPX_LT_I32: - return "v_cmpx_lt_i32"; - case Vopc::Op::V_CMPX_EQ_I32: - return "v_cmpx_eq_i32"; - case Vopc::Op::V_CMPX_LE_I32: - return "v_cmpx_le_i32"; - case Vopc::Op::V_CMPX_GT_I32: - return "v_cmpx_gt_i32"; - case Vopc::Op::V_CMPX_NE_I32: - return "v_cmpx_ne_i32"; - case Vopc::Op::V_CMPX_GE_I32: - return "v_cmpx_ge_i32"; - case Vopc::Op::V_CMPX_T_I32: - return "v_cmpx_t_i32"; - case Vopc::Op::V_CMPX_CLASS_F32: - return "v_cmpx_class_f32"; - case Vopc::Op::V_CMPX_LT_I16: - return "v_cmpx_lt_i16"; - case Vopc::Op::V_CMPX_EQ_I16: - return "v_cmpx_eq_i16"; - case Vopc::Op::V_CMPX_LE_I16: - return "v_cmpx_le_i16"; - case Vopc::Op::V_CMPX_GT_I16: - return "v_cmpx_gt_i16"; - case Vopc::Op::V_CMPX_NE_I16: - return "v_cmpx_ne_i16"; - case Vopc::Op::V_CMPX_GE_I16: - return "v_cmpx_ge_i16"; - case Vopc::Op::V_CMPX_CLASS_F16: - return "v_cmpx_class_f16"; - case Vopc::Op::V_CMP_F_I64: - return "v_cmp_f_i64"; - case Vopc::Op::V_CMP_LT_I64: - return "v_cmp_lt_i64"; - case Vopc::Op::V_CMP_EQ_I64: - return "v_cmp_eq_i64"; - case Vopc::Op::V_CMP_LE_I64: - return "v_cmp_le_i64"; - case Vopc::Op::V_CMP_GT_I64: - return "v_cmp_gt_i64"; - case Vopc::Op::V_CMP_NE_I64: - return "v_cmp_ne_i64"; - case Vopc::Op::V_CMP_GE_I64: - return "v_cmp_ge_i64"; - case Vopc::Op::V_CMP_T_I64: - return "v_cmp_t_i64"; - case Vopc::Op::V_CMP_CLASS_F64: - return "v_cmp_class_f64"; - case Vopc::Op::V_CMP_LT_U16: - return "v_cmp_lt_u16"; - case Vopc::Op::V_CMP_EQ_U16: - return "v_cmp_eq_u16"; - case Vopc::Op::V_CMP_LE_U16: - return "v_cmp_le_u16"; - case Vopc::Op::V_CMP_GT_U16: - return "v_cmp_gt_u16"; - case Vopc::Op::V_CMP_NE_U16: - return "v_cmp_ne_u16"; - case Vopc::Op::V_CMP_GE_U16: - return "v_cmp_ge_u16"; - case Vopc::Op::V_CMPX_F_I64: - return "v_cmpx_f_i64"; - case Vopc::Op::V_CMPX_LT_I64: - return "v_cmpx_lt_i64"; - case Vopc::Op::V_CMPX_EQ_I64: - return "v_cmpx_eq_i64"; - case Vopc::Op::V_CMPX_LE_I64: - return "v_cmpx_le_i64"; - case Vopc::Op::V_CMPX_GT_I64: - return "v_cmpx_gt_i64"; - case Vopc::Op::V_CMPX_NE_I64: - return "v_cmpx_ne_i64"; - case Vopc::Op::V_CMPX_GE_I64: - return "v_cmpx_ge_i64"; - case Vopc::Op::V_CMPX_T_I64: - return "v_cmpx_t_i64"; - case Vopc::Op::V_CMPX_CLASS_F64: - return "v_cmpx_class_f64"; - case Vopc::Op::V_CMPX_LT_U16: - return "v_cmpx_lt_u16"; - case Vopc::Op::V_CMPX_EQ_U16: - return "v_cmpx_eq_u16"; - case Vopc::Op::V_CMPX_LE_U16: - return "v_cmpx_le_u16"; - case Vopc::Op::V_CMPX_GT_U16: - return "v_cmpx_gt_u16"; - case Vopc::Op::V_CMPX_NE_U16: - return "v_cmpx_ne_u16"; - case Vopc::Op::V_CMPX_GE_U16: - return "v_cmpx_ge_u16"; - case Vopc::Op::V_CMP_F_U32: - return "v_cmp_f_u32"; - case Vopc::Op::V_CMP_LT_U32: - return "v_cmp_lt_u32"; - case Vopc::Op::V_CMP_EQ_U32: - return "v_cmp_eq_u32"; - case Vopc::Op::V_CMP_LE_U32: - return "v_cmp_le_u32"; - case Vopc::Op::V_CMP_GT_U32: - return "v_cmp_gt_u32"; - case Vopc::Op::V_CMP_NE_U32: - return "v_cmp_ne_u32"; - case Vopc::Op::V_CMP_GE_U32: - return "v_cmp_ge_u32"; - case Vopc::Op::V_CMP_T_U32: - return "v_cmp_t_u32"; - case Vopc::Op::V_CMP_F_F16: - return "v_cmp_f_f16"; - case Vopc::Op::V_CMP_LT_F16: - return "v_cmp_lt_f16"; - case Vopc::Op::V_CMP_EQ_F16: - return "v_cmp_eq_f16"; - case Vopc::Op::V_CMP_LE_F16: - return "v_cmp_le_f16"; - case Vopc::Op::V_CMP_GT_F16: - return "v_cmp_gt_f16"; - case Vopc::Op::V_CMP_LG_F16: - return "v_cmp_lg_f16"; - case Vopc::Op::V_CMP_GE_F16: - return "v_cmp_ge_f16"; - case Vopc::Op::V_CMP_O_F16: - return "v_cmp_o_f16"; - case Vopc::Op::V_CMPX_F_U32: - return "v_cmpx_f_u32"; - case Vopc::Op::V_CMPX_LT_U32: - return "v_cmpx_lt_u32"; - case Vopc::Op::V_CMPX_EQ_U32: - return "v_cmpx_eq_u32"; - case Vopc::Op::V_CMPX_LE_U32: - return "v_cmpx_le_u32"; - case Vopc::Op::V_CMPX_GT_U32: - return "v_cmpx_gt_u32"; - case Vopc::Op::V_CMPX_NE_U32: - return "v_cmpx_ne_u32"; - case Vopc::Op::V_CMPX_GE_U32: - return "v_cmpx_ge_u32"; - case Vopc::Op::V_CMPX_T_U32: - return "v_cmpx_t_u32"; - case Vopc::Op::V_CMPX_F_F16: - return "v_cmpx_f_f16"; - case Vopc::Op::V_CMPX_LT_F16: - return "v_cmpx_lt_f16"; - case Vopc::Op::V_CMPX_EQ_F16: - return "v_cmpx_eq_f16"; - case Vopc::Op::V_CMPX_LE_F16: - return "v_cmpx_le_f16"; - case Vopc::Op::V_CMPX_GT_F16: - return "v_cmpx_gt_f16"; - case Vopc::Op::V_CMPX_LG_F16: - return "v_cmpx_lg_f16"; - case Vopc::Op::V_CMPX_GE_F16: - return "v_cmpx_ge_f16"; - case Vopc::Op::V_CMPX_O_F16: - return "v_cmpx_o_f16"; - case Vopc::Op::V_CMP_F_U64: - return "v_cmp_f_u64"; - case Vopc::Op::V_CMP_LT_U64: - return "v_cmp_lt_u64"; - case Vopc::Op::V_CMP_EQ_U64: - return "v_cmp_eq_u64"; - case Vopc::Op::V_CMP_LE_U64: - return "v_cmp_le_u64"; - case Vopc::Op::V_CMP_GT_U64: - return "v_cmp_gt_u64"; - case Vopc::Op::V_CMP_NE_U64: - return "v_cmp_ne_u64"; - case Vopc::Op::V_CMP_GE_U64: - return "v_cmp_ge_u64"; - case Vopc::Op::V_CMP_T_U64: - return "v_cmp_t_u64"; - case Vopc::Op::V_CMP_U_F16: - return "v_cmp_u_f16"; - case Vopc::Op::V_CMP_NGE_F16: - return "v_cmp_nge_f16"; - case Vopc::Op::V_CMP_NLG_F16: - return "v_cmp_nlg_f16"; - case Vopc::Op::V_CMP_NGT_F16: - return "v_cmp_ngt_f16"; - case Vopc::Op::V_CMP_NLE_F16: - return "v_cmp_nle_f16"; - case Vopc::Op::V_CMP_NEQ_F16: - return "v_cmp_neq_f16"; - case Vopc::Op::V_CMP_NLT_F16: - return "v_cmp_nlt_f16"; - case Vopc::Op::V_CMP_TRU_F16: - return "v_cmp_tru_f16"; - case Vopc::Op::V_CMPX_F_U64: - return "v_cmpx_f_u64"; - case Vopc::Op::V_CMPX_LT_U64: - return "v_cmpx_lt_u64"; - case Vopc::Op::V_CMPX_EQ_U64: - return "v_cmpx_eq_u64"; - case Vopc::Op::V_CMPX_LE_U64: - return "v_cmpx_le_u64"; - case Vopc::Op::V_CMPX_GT_U64: - return "v_cmpx_gt_u64"; - case Vopc::Op::V_CMPX_NE_U64: - return "v_cmpx_ne_u64"; - case Vopc::Op::V_CMPX_GE_U64: - return "v_cmpx_ge_u64"; - case Vopc::Op::V_CMPX_T_U64: - return "v_cmpx_t_u64"; - case Vopc::Op::V_CMPX_U_F16: - return "v_cmpx_u_f16"; - case Vopc::Op::V_CMPX_NGE_F16: - return "v_cmpx_nge_f16"; - case Vopc::Op::V_CMPX_NLG_F16: - return "v_cmpx_nlg_f16"; - case Vopc::Op::V_CMPX_NGT_F16: - return "v_cmpx_ngt_f16"; - case Vopc::Op::V_CMPX_NLE_F16: - return "v_cmpx_nle_f16"; - case Vopc::Op::V_CMPX_NEQ_F16: - return "v_cmpx_neq_f16"; - case Vopc::Op::V_CMPX_NLT_F16: - return "v_cmpx_nlt_f16"; - case Vopc::Op::V_CMPX_TRU_F16: - return "v_cmpx_tru_f16"; - - default: - return nullptr; - } -} - -const char *amdgpu::shader::vop3OpcodeToString(Vop3::Op op) { - switch (op) { - case Vop3::Op::V3_CMP_F_F32: - return "v3_cmp_f_f32"; - case Vop3::Op::V3_CMP_LT_F32: - return "v3_cmp_lt_f32"; - case Vop3::Op::V3_CMP_EQ_F32: - return "v3_cmp_eq_f32"; - case Vop3::Op::V3_CMP_LE_F32: - return "v3_cmp_le_f32"; - case Vop3::Op::V3_CMP_GT_F32: - return "v3_cmp_gt_f32"; - case Vop3::Op::V3_CMP_LG_F32: - return "v3_cmp_lg_f32"; - case Vop3::Op::V3_CMP_GE_F32: - return "v3_cmp_ge_f32"; - case Vop3::Op::V3_CMP_O_F32: - return "v3_cmp_o_f32"; - case Vop3::Op::V3_CMP_U_F32: - return "v3_cmp_u_f32"; - case Vop3::Op::V3_CMP_NGE_F32: - return "v3_cmp_nge_f32"; - case Vop3::Op::V3_CMP_NLG_F32: - return "v3_cmp_nlg_f32"; - case Vop3::Op::V3_CMP_NGT_F32: - return "v3_cmp_ngt_f32"; - case Vop3::Op::V3_CMP_NLE_F32: - return "v3_cmp_nle_f32"; - case Vop3::Op::V3_CMP_NEQ_F32: - return "v3_cmp_neq_f32"; - case Vop3::Op::V3_CMP_NLT_F32: - return "v3_cmp_nlt_f32"; - case Vop3::Op::V3_CMP_TRU_F32: - return "v3_cmp_tru_f32"; - case Vop3::Op::V3_CMPX_F_F32: - return "v3_cmpx_f_f32"; - case Vop3::Op::V3_CMPX_LT_F32: - return "v3_cmpx_lt_f32"; - case Vop3::Op::V3_CMPX_EQ_F32: - return "v3_cmpx_eq_f32"; - case Vop3::Op::V3_CMPX_LE_F32: - return "v3_cmpx_le_f32"; - case Vop3::Op::V3_CMPX_GT_F32: - return "v3_cmpx_gt_f32"; - case Vop3::Op::V3_CMPX_LG_F32: - return "v3_cmpx_lg_f32"; - case Vop3::Op::V3_CMPX_GE_F32: - return "v3_cmpx_ge_f32"; - case Vop3::Op::V3_CMPX_O_F32: - return "v3_cmpx_o_f32"; - case Vop3::Op::V3_CMPX_U_F32: - return "v3_cmpx_u_f32"; - case Vop3::Op::V3_CMPX_NGE_F32: - return "v3_cmpx_nge_f32"; - case Vop3::Op::V3_CMPX_NLG_F32: - return "v3_cmpx_nlg_f32"; - case Vop3::Op::V3_CMPX_NGT_F32: - return "v3_cmpx_ngt_f32"; - case Vop3::Op::V3_CMPX_NLE_F32: - return "v3_cmpx_nle_f32"; - case Vop3::Op::V3_CMPX_NEQ_F32: - return "v3_cmpx_neq_f32"; - case Vop3::Op::V3_CMPX_NLT_F32: - return "v3_cmpx_nlt_f32"; - case Vop3::Op::V3_CMPX_TRU_F32: - return "v3_cmpx_tru_f32"; - case Vop3::Op::V3_CMP_F_F64: - return "v3_cmp_f_f64"; - case Vop3::Op::V3_CMP_LT_F64: - return "v3_cmp_lt_f64"; - case Vop3::Op::V3_CMP_EQ_F64: - return "v3_cmp_eq_f64"; - case Vop3::Op::V3_CMP_LE_F64: - return "v3_cmp_le_f64"; - case Vop3::Op::V3_CMP_GT_F64: - return "v3_cmp_gt_f64"; - case Vop3::Op::V3_CMP_LG_F64: - return "v3_cmp_lg_f64"; - case Vop3::Op::V3_CMP_GE_F64: - return "v3_cmp_ge_f64"; - case Vop3::Op::V3_CMP_O_F64: - return "v3_cmp_o_f64"; - case Vop3::Op::V3_CMP_U_F64: - return "v3_cmp_u_f64"; - case Vop3::Op::V3_CMP_NGE_F64: - return "v3_cmp_nge_f64"; - case Vop3::Op::V3_CMP_NLG_F64: - return "v3_cmp_nlg_f64"; - case Vop3::Op::V3_CMP_NGT_F64: - return "v3_cmp_ngt_f64"; - case Vop3::Op::V3_CMP_NLE_F64: - return "v3_cmp_nle_f64"; - case Vop3::Op::V3_CMP_NEQ_F64: - return "v3_cmp_neq_f64"; - case Vop3::Op::V3_CMP_NLT_F64: - return "v3_cmp_nlt_f64"; - case Vop3::Op::V3_CMP_TRU_F64: - return "v3_cmp_tru_f64"; - case Vop3::Op::V3_CMPX_F_F64: - return "v3_cmpx_f_f64"; - case Vop3::Op::V3_CMPX_LT_F64: - return "v3_cmpx_lt_f64"; - case Vop3::Op::V3_CMPX_EQ_F64: - return "v3_cmpx_eq_f64"; - case Vop3::Op::V3_CMPX_LE_F64: - return "v3_cmpx_le_f64"; - case Vop3::Op::V3_CMPX_GT_F64: - return "v3_cmpx_gt_f64"; - case Vop3::Op::V3_CMPX_LG_F64: - return "v3_cmpx_lg_f64"; - case Vop3::Op::V3_CMPX_GE_F64: - return "v3_cmpx_ge_f64"; - case Vop3::Op::V3_CMPX_O_F64: - return "v3_cmpx_o_f64"; - case Vop3::Op::V3_CMPX_U_F64: - return "v3_cmpx_u_f64"; - case Vop3::Op::V3_CMPX_NGE_F64: - return "v3_cmpx_nge_f64"; - case Vop3::Op::V3_CMPX_NLG_F64: - return "v3_cmpx_nlg_f64"; - case Vop3::Op::V3_CMPX_NGT_F64: - return "v3_cmpx_ngt_f64"; - case Vop3::Op::V3_CMPX_NLE_F64: - return "v3_cmpx_nle_f64"; - case Vop3::Op::V3_CMPX_NEQ_F64: - return "v3_cmpx_neq_f64"; - case Vop3::Op::V3_CMPX_NLT_F64: - return "v3_cmpx_nlt_f64"; - case Vop3::Op::V3_CMPX_TRU_F64: - return "v3_cmpx_tru_f64"; - case Vop3::Op::V3_CMPS_F_F32: - return "v3_cmps_f_f32"; - case Vop3::Op::V3_CMPS_LT_F32: - return "v3_cmps_lt_f32"; - case Vop3::Op::V3_CMPS_EQ_F32: - return "v3_cmps_eq_f32"; - case Vop3::Op::V3_CMPS_LE_F32: - return "v3_cmps_le_f32"; - case Vop3::Op::V3_CMPS_GT_F32: - return "v3_cmps_gt_f32"; - case Vop3::Op::V3_CMPS_LG_F32: - return "v3_cmps_lg_f32"; - case Vop3::Op::V3_CMPS_GE_F32: - return "v3_cmps_ge_f32"; - case Vop3::Op::V3_CMPS_O_F32: - return "v3_cmps_o_f32"; - case Vop3::Op::V3_CMPS_U_F32: - return "v3_cmps_u_f32"; - case Vop3::Op::V3_CMPS_NGE_F32: - return "v3_cmps_nge_f32"; - case Vop3::Op::V3_CMPS_NLG_F32: - return "v3_cmps_nlg_f32"; - case Vop3::Op::V3_CMPS_NGT_F32: - return "v3_cmps_ngt_f32"; - case Vop3::Op::V3_CMPS_NLE_F32: - return "v3_cmps_nle_f32"; - case Vop3::Op::V3_CMPS_NEQ_F32: - return "v3_cmps_neq_f32"; - case Vop3::Op::V3_CMPS_NLT_F32: - return "v3_cmps_nlt_f32"; - case Vop3::Op::V3_CMPS_TRU_F32: - return "v3_cmps_tru_f32"; - case Vop3::Op::V3_CMPSX_F_F32: - return "v3_cmpsx_f_f32"; - case Vop3::Op::V3_CMPSX_LT_F32: - return "v3_cmpsx_lt_f32"; - case Vop3::Op::V3_CMPSX_EQ_F32: - return "v3_cmpsx_eq_f32"; - case Vop3::Op::V3_CMPSX_LE_F32: - return "v3_cmpsx_le_f32"; - case Vop3::Op::V3_CMPSX_GT_F32: - return "v3_cmpsx_gt_f32"; - case Vop3::Op::V3_CMPSX_LG_F32: - return "v3_cmpsx_lg_f32"; - case Vop3::Op::V3_CMPSX_GE_F32: - return "v3_cmpsx_ge_f32"; - case Vop3::Op::V3_CMPSX_O_F32: - return "v3_cmpsx_o_f32"; - case Vop3::Op::V3_CMPSX_U_F32: - return "v3_cmpsx_u_f32"; - case Vop3::Op::V3_CMPSX_NGE_F32: - return "v3_cmpsx_nge_f32"; - case Vop3::Op::V3_CMPSX_NLG_F32: - return "v3_cmpsx_nlg_f32"; - case Vop3::Op::V3_CMPSX_NGT_F32: - return "v3_cmpsx_ngt_f32"; - case Vop3::Op::V3_CMPSX_NLE_F32: - return "v3_cmpsx_nle_f32"; - case Vop3::Op::V3_CMPSX_NEQ_F32: - return "v3_cmpsx_neq_f32"; - case Vop3::Op::V3_CMPSX_NLT_F32: - return "v3_cmpsx_nlt_f32"; - case Vop3::Op::V3_CMPSX_TRU_F32: - return "v3_cmpsx_tru_f32"; - case Vop3::Op::V3_CMPS_F_F64: - return "v3_cmps_f_f64"; - case Vop3::Op::V3_CMPS_LT_F64: - return "v3_cmps_lt_f64"; - case Vop3::Op::V3_CMPS_EQ_F64: - return "v3_cmps_eq_f64"; - case Vop3::Op::V3_CMPS_LE_F64: - return "v3_cmps_le_f64"; - case Vop3::Op::V3_CMPS_GT_F64: - return "v3_cmps_gt_f64"; - case Vop3::Op::V3_CMPS_LG_F64: - return "v3_cmps_lg_f64"; - case Vop3::Op::V3_CMPS_GE_F64: - return "v3_cmps_ge_f64"; - case Vop3::Op::V3_CMPS_O_F64: - return "v3_cmps_o_f64"; - case Vop3::Op::V3_CMPS_U_F64: - return "v3_cmps_u_f64"; - case Vop3::Op::V3_CMPS_NGE_F64: - return "v3_cmps_nge_f64"; - case Vop3::Op::V3_CMPS_NLG_F64: - return "v3_cmps_nlg_f64"; - case Vop3::Op::V3_CMPS_NGT_F64: - return "v3_cmps_ngt_f64"; - case Vop3::Op::V3_CMPS_NLE_F64: - return "v3_cmps_nle_f64"; - case Vop3::Op::V3_CMPS_NEQ_F64: - return "v3_cmps_neq_f64"; - case Vop3::Op::V3_CMPS_NLT_F64: - return "v3_cmps_nlt_f64"; - case Vop3::Op::V3_CMPS_TRU_F64: - return "v3_cmps_tru_f64"; - case Vop3::Op::V3_CMPSX_F_F64: - return "v3_cmpsx_f_f64"; - case Vop3::Op::V3_CMPSX_LT_F64: - return "v3_cmpsx_lt_f64"; - case Vop3::Op::V3_CMPSX_EQ_F64: - return "v3_cmpsx_eq_f64"; - case Vop3::Op::V3_CMPSX_LE_F64: - return "v3_cmpsx_le_f64"; - case Vop3::Op::V3_CMPSX_GT_F64: - return "v3_cmpsx_gt_f64"; - case Vop3::Op::V3_CMPSX_LG_F64: - return "v3_cmpsx_lg_f64"; - case Vop3::Op::V3_CMPSX_GE_F64: - return "v3_cmpsx_ge_f64"; - case Vop3::Op::V3_CMPSX_O_F64: - return "v3_cmpsx_o_f64"; - case Vop3::Op::V3_CMPSX_U_F64: - return "v3_cmpsx_u_f64"; - case Vop3::Op::V3_CMPSX_NGE_F64: - return "v3_cmpsx_nge_f64"; - case Vop3::Op::V3_CMPSX_NLG_F64: - return "v3_cmpsx_nlg_f64"; - case Vop3::Op::V3_CMPSX_NGT_F64: - return "v3_cmpsx_ngt_f64"; - case Vop3::Op::V3_CMPSX_NLE_F64: - return "v3_cmpsx_nle_f64"; - case Vop3::Op::V3_CMPSX_NEQ_F64: - return "v3_cmpsx_neq_f64"; - case Vop3::Op::V3_CMPSX_NLT_F64: - return "v3_cmpsx_nlt_f64"; - case Vop3::Op::V3_CMPSX_TRU_F64: - return "v3_cmpsx_tru_f64"; - case Vop3::Op::V3_CMP_F_I32: - return "v3_cmp_f_i32"; - case Vop3::Op::V3_CMP_LT_I32: - return "v3_cmp_lt_i32"; - case Vop3::Op::V3_CMP_EQ_I32: - return "v3_cmp_eq_i32"; - case Vop3::Op::V3_CMP_LE_I32: - return "v3_cmp_le_i32"; - case Vop3::Op::V3_CMP_GT_I32: - return "v3_cmp_gt_i32"; - case Vop3::Op::V3_CMP_NE_I32: - return "v3_cmp_ne_i32"; - case Vop3::Op::V3_CMP_GE_I32: - return "v3_cmp_ge_i32"; - case Vop3::Op::V3_CMP_T_I32: - return "v3_cmp_t_i32"; - case Vop3::Op::V3_CMP_CLASS_F32: - return "v3_cmp_class_f32"; - case Vop3::Op::V3_CMP_LT_I16: - return "v3_cmp_lt_i16"; - case Vop3::Op::V3_CMP_EQ_I16: - return "v3_cmp_eq_i16"; - case Vop3::Op::V3_CMP_LE_I16: - return "v3_cmp_le_i16"; - case Vop3::Op::V3_CMP_GT_I16: - return "v3_cmp_gt_i16"; - case Vop3::Op::V3_CMP_NE_I16: - return "v3_cmp_ne_i16"; - case Vop3::Op::V3_CMP_GE_I16: - return "v3_cmp_ge_i16"; - case Vop3::Op::V3_CMP_CLASS_F16: - return "v3_cmp_class_f16"; - case Vop3::Op::V3_CMPX_F_I32: - return "v3_cmpx_f_i32"; - case Vop3::Op::V3_CMPX_LT_I32: - return "v3_cmpx_lt_i32"; - case Vop3::Op::V3_CMPX_EQ_I32: - return "v3_cmpx_eq_i32"; - case Vop3::Op::V3_CMPX_LE_I32: - return "v3_cmpx_le_i32"; - case Vop3::Op::V3_CMPX_GT_I32: - return "v3_cmpx_gt_i32"; - case Vop3::Op::V3_CMPX_NE_I32: - return "v3_cmpx_ne_i32"; - case Vop3::Op::V3_CMPX_GE_I32: - return "v3_cmpx_ge_i32"; - case Vop3::Op::V3_CMPX_T_I32: - return "v3_cmpx_t_i32"; - case Vop3::Op::V3_CMPX_CLASS_F32: - return "v3_cmpx_class_f32"; - case Vop3::Op::V3_CMPX_LT_I16: - return "v3_cmpx_lt_i16"; - case Vop3::Op::V3_CMPX_EQ_I16: - return "v3_cmpx_eq_i16"; - case Vop3::Op::V3_CMPX_LE_I16: - return "v3_cmpx_le_i16"; - case Vop3::Op::V3_CMPX_GT_I16: - return "v3_cmpx_gt_i16"; - case Vop3::Op::V3_CMPX_NE_I16: - return "v3_cmpx_ne_i16"; - case Vop3::Op::V3_CMPX_GE_I16: - return "v3_cmpx_ge_i16"; - case Vop3::Op::V3_CMPX_CLASS_F16: - return "v3_cmpx_class_f16"; - case Vop3::Op::V3_CMP_F_I64: - return "v3_cmp_f_i64"; - case Vop3::Op::V3_CMP_LT_I64: - return "v3_cmp_lt_i64"; - case Vop3::Op::V3_CMP_EQ_I64: - return "v3_cmp_eq_i64"; - case Vop3::Op::V3_CMP_LE_I64: - return "v3_cmp_le_i64"; - case Vop3::Op::V3_CMP_GT_I64: - return "v3_cmp_gt_i64"; - case Vop3::Op::V3_CMP_NE_I64: - return "v3_cmp_ne_i64"; - case Vop3::Op::V3_CMP_GE_I64: - return "v3_cmp_ge_i64"; - case Vop3::Op::V3_CMP_T_I64: - return "v3_cmp_t_i64"; - case Vop3::Op::V3_CMP_CLASS_F64: - return "v3_cmp_class_f64"; - case Vop3::Op::V3_CMP_LT_U16: - return "v3_cmp_lt_u16"; - case Vop3::Op::V3_CMP_EQ_U16: - return "v3_cmp_eq_u16"; - case Vop3::Op::V3_CMP_LE_U16: - return "v3_cmp_le_u16"; - case Vop3::Op::V3_CMP_GT_U16: - return "v3_cmp_gt_u16"; - case Vop3::Op::V3_CMP_NE_U16: - return "v3_cmp_ne_u16"; - case Vop3::Op::V3_CMP_GE_U16: - return "v3_cmp_ge_u16"; - case Vop3::Op::V3_CMPX_F_I64: - return "v3_cmpx_f_i64"; - case Vop3::Op::V3_CMPX_LT_I64: - return "v3_cmpx_lt_i64"; - case Vop3::Op::V3_CMPX_EQ_I64: - return "v3_cmpx_eq_i64"; - case Vop3::Op::V3_CMPX_LE_I64: - return "v3_cmpx_le_i64"; - case Vop3::Op::V3_CMPX_GT_I64: - return "v3_cmpx_gt_i64"; - case Vop3::Op::V3_CMPX_NE_I64: - return "v3_cmpx_ne_i64"; - case Vop3::Op::V3_CMPX_GE_I64: - return "v3_cmpx_ge_i64"; - case Vop3::Op::V3_CMPX_T_I64: - return "v3_cmpx_t_i64"; - case Vop3::Op::V3_CMPX_CLASS_F64: - return "v3_cmpx_class_f64"; - case Vop3::Op::V3_CMPX_LT_U16: - return "v3_cmpx_lt_u16"; - case Vop3::Op::V3_CMPX_EQ_U16: - return "v3_cmpx_eq_u16"; - case Vop3::Op::V3_CMPX_LE_U16: - return "v3_cmpx_le_u16"; - case Vop3::Op::V3_CMPX_GT_U16: - return "v3_cmpx_gt_u16"; - case Vop3::Op::V3_CMPX_NE_U16: - return "v3_cmpx_ne_u16"; - case Vop3::Op::V3_CMPX_GE_U16: - return "v3_cmpx_ge_u16"; - case Vop3::Op::V3_CMP_F_U32: - return "v3_cmp_f_u32"; - case Vop3::Op::V3_CMP_LT_U32: - return "v3_cmp_lt_u32"; - case Vop3::Op::V3_CMP_EQ_U32: - return "v3_cmp_eq_u32"; - case Vop3::Op::V3_CMP_LE_U32: - return "v3_cmp_le_u32"; - case Vop3::Op::V3_CMP_GT_U32: - return "v3_cmp_gt_u32"; - case Vop3::Op::V3_CMP_NE_U32: - return "v3_cmp_ne_u32"; - case Vop3::Op::V3_CMP_GE_U32: - return "v3_cmp_ge_u32"; - case Vop3::Op::V3_CMP_T_U32: - return "v3_cmp_t_u32"; - case Vop3::Op::V3_CMP_F_F16: - return "v3_cmp_f_f16"; - case Vop3::Op::V3_CMP_LT_F16: - return "v3_cmp_lt_f16"; - case Vop3::Op::V3_CMP_EQ_F16: - return "v3_cmp_eq_f16"; - case Vop3::Op::V3_CMP_LE_F16: - return "v3_cmp_le_f16"; - case Vop3::Op::V3_CMP_GT_F16: - return "v3_cmp_gt_f16"; - case Vop3::Op::V3_CMP_LG_F16: - return "v3_cmp_lg_f16"; - case Vop3::Op::V3_CMP_GE_F16: - return "v3_cmp_ge_f16"; - case Vop3::Op::V3_CMP_O_F16: - return "v3_cmp_o_f16"; - case Vop3::Op::V3_CMPX_F_U32: - return "v3_cmpx_f_u32"; - case Vop3::Op::V3_CMPX_LT_U32: - return "v3_cmpx_lt_u32"; - case Vop3::Op::V3_CMPX_EQ_U32: - return "v3_cmpx_eq_u32"; - case Vop3::Op::V3_CMPX_LE_U32: - return "v3_cmpx_le_u32"; - case Vop3::Op::V3_CMPX_GT_U32: - return "v3_cmpx_gt_u32"; - case Vop3::Op::V3_CMPX_NE_U32: - return "v3_cmpx_ne_u32"; - case Vop3::Op::V3_CMPX_GE_U32: - return "v3_cmpx_ge_u32"; - case Vop3::Op::V3_CMPX_T_U32: - return "v3_cmpx_t_u32"; - case Vop3::Op::V3_CMPX_F_F16: - return "v3_cmpx_f_f16"; - case Vop3::Op::V3_CMPX_LT_F16: - return "v3_cmpx_lt_f16"; - case Vop3::Op::V3_CMPX_EQ_F16: - return "v3_cmpx_eq_f16"; - case Vop3::Op::V3_CMPX_LE_F16: - return "v3_cmpx_le_f16"; - case Vop3::Op::V3_CMPX_GT_F16: - return "v3_cmpx_gt_f16"; - case Vop3::Op::V3_CMPX_LG_F16: - return "v3_cmpx_lg_f16"; - case Vop3::Op::V3_CMPX_GE_F16: - return "v3_cmpx_ge_f16"; - case Vop3::Op::V3_CMPX_O_F16: - return "v3_cmpx_o_f16"; - case Vop3::Op::V3_CMP_F_U64: - return "v3_cmp_f_u64"; - case Vop3::Op::V3_CMP_LT_U64: - return "v3_cmp_lt_u64"; - case Vop3::Op::V3_CMP_EQ_U64: - return "v3_cmp_eq_u64"; - case Vop3::Op::V3_CMP_LE_U64: - return "v3_cmp_le_u64"; - case Vop3::Op::V3_CMP_GT_U64: - return "v3_cmp_gt_u64"; - case Vop3::Op::V3_CMP_NE_U64: - return "v3_cmp_ne_u64"; - case Vop3::Op::V3_CMP_GE_U64: - return "v3_cmp_ge_u64"; - case Vop3::Op::V3_CMP_T_U64: - return "v3_cmp_t_u64"; - case Vop3::Op::V3_CMP_U_F16: - return "v3_cmp_u_f16"; - case Vop3::Op::V3_CMP_NGE_F16: - return "v3_cmp_nge_f16"; - case Vop3::Op::V3_CMP_NLG_F16: - return "v3_cmp_nlg_f16"; - case Vop3::Op::V3_CMP_NGT_F16: - return "v3_cmp_ngt_f16"; - case Vop3::Op::V3_CMP_NLE_F16: - return "v3_cmp_nle_f16"; - case Vop3::Op::V3_CMP_NEQ_F16: - return "v3_cmp_neq_f16"; - case Vop3::Op::V3_CMP_NLT_F16: - return "v3_cmp_nlt_f16"; - case Vop3::Op::V3_CMP_TRU_F16: - return "v3_cmp_tru_f16"; - case Vop3::Op::V3_CMPX_F_U64: - return "v3_cmpx_f_u64"; - case Vop3::Op::V3_CMPX_LT_U64: - return "v3_cmpx_lt_u64"; - case Vop3::Op::V3_CMPX_EQ_U64: - return "v3_cmpx_eq_u64"; - case Vop3::Op::V3_CMPX_LE_U64: - return "v3_cmpx_le_u64"; - case Vop3::Op::V3_CMPX_GT_U64: - return "v3_cmpx_gt_u64"; - case Vop3::Op::V3_CMPX_NE_U64: - return "v3_cmpx_ne_u64"; - case Vop3::Op::V3_CMPX_GE_U64: - return "v3_cmpx_ge_u64"; - case Vop3::Op::V3_CMPX_T_U64: - return "v3_cmpx_t_u64"; - case Vop3::Op::V3_CNDMASK_B32: - return "v3_cndmask_b32"; - case Vop3::Op::V3_READLANE_B32: - return "v3_readlane_b32"; - case Vop3::Op::V3_WRITELANE_B32: - return "v3_writelane_b32"; - case Vop3::Op::V3_ADD_F32: - return "v3_add_f32"; - case Vop3::Op::V3_SUB_F32: - return "v3_sub_f32"; - case Vop3::Op::V3_SUBREV_F32: - return "v3_subrev_f32"; - case Vop3::Op::V3_MAC_LEGACY_F32: - return "v3_mac_legacy_f32"; - case Vop3::Op::V3_MUL_LEGACY_F32: - return "v3_mul_legacy_f32"; - case Vop3::Op::V3_MUL_F32: - return "v3_mul_f32"; - case Vop3::Op::V3_MUL_I32_I24: - return "v3_mul_i32_i24"; - case Vop3::Op::V3_MUL_HI_I32_I24: - return "v3_mul_hi_i32_i24"; - case Vop3::Op::V3_MUL_U32_U24: - return "v3_mul_u32_u24"; - case Vop3::Op::V3_MUL_HI_U32_U24: - return "v3_mul_hi_u32_u24"; - case Vop3::Op::V3_MIN_LEGACY_F32: - return "v3_min_legacy_f32"; - case Vop3::Op::V3_MAX_LEGACY_F32: - return "v3_max_legacy_f32"; - case Vop3::Op::V3_MIN_F32: - return "v3_min_f32"; - case Vop3::Op::V3_MAX_F32: - return "v3_max_f32"; - case Vop3::Op::V3_MIN_I32: - return "v3_min_i32"; - case Vop3::Op::V3_MAX_I32: - return "v3_max_i32"; - case Vop3::Op::V3_MIN_U32: - return "v3_min_u32"; - case Vop3::Op::V3_MAX_U32: - return "v3_max_u32"; - case Vop3::Op::V3_LSHR_B32: - return "v3_lshr_b32"; - case Vop3::Op::V3_LSHRREV_B32: - return "v3_lshrrev_b32"; - case Vop3::Op::V3_ASHR_I32: - return "v3_ashr_i32"; - case Vop3::Op::V3_ASHRREV_I32: - return "v3_ashrrev_i32"; - case Vop3::Op::V3_LSHL_B32: - return "v3_lshl_b32"; - case Vop3::Op::V3_LSHLREV_B32: - return "v3_lshlrev_b32"; - case Vop3::Op::V3_AND_B32: - return "v3_and_b32"; - case Vop3::Op::V3_OR_B32: - return "v3_or_b32"; - case Vop3::Op::V3_XOR_B32: - return "v3_xor_b32"; - case Vop3::Op::V3_BFM_B32: - return "v3_bfm_b32"; - case Vop3::Op::V3_MAC_F32: - return "v3_mac_f32"; - case Vop3::Op::V3_MADMK_F32: - return "v3_madmk_f32"; - case Vop3::Op::V3_MADAK_F32: - return "v3_madak_f32"; - case Vop3::Op::V3_BCNT_U32_B32: - return "v3_bcnt_u32_b32"; - case Vop3::Op::V3_MBCNT_LO_U32_B32: - return "v3_mbcnt_lo_u32_b32"; - case Vop3::Op::V3_MBCNT_HI_U32_B32: - return "v3_mbcnt_hi_u32_b32"; - case Vop3::Op::V3_ADD_I32: - return "v3_add_i32"; - case Vop3::Op::V3_SUB_I32: - return "v3_sub_i32"; - case Vop3::Op::V3_SUBREV_I32: - return "v3_subrev_i32"; - case Vop3::Op::V3_ADDC_U32: - return "v3_addc_u32"; - case Vop3::Op::V3_SUBB_U32: - return "v3_subb_u32"; - case Vop3::Op::V3_SUBBREV_U32: - return "v3_subbrev_u32"; - case Vop3::Op::V3_LDEXP_F32: - return "v3_ldexp_f32"; - case Vop3::Op::V3_CVT_PKACCUM_U8_F32: - return "v3_cvt_pkaccum_u8_f32"; - case Vop3::Op::V3_CVT_PKNORM_I16_F32: - return "v3_cvt_pknorm_i16_f32"; - case Vop3::Op::V3_CVT_PKNORM_U16_F32: - return "v3_cvt_pknorm_u16_f32"; - case Vop3::Op::V3_CVT_PKRTZ_F16_F32: - return "v3_cvt_pkrtz_f16_f32"; - case Vop3::Op::V3_CVT_PK_U16_U32: - return "v3_cvt_pk_u16_u32"; - case Vop3::Op::V3_CVT_PK_I16_I32: - return "v3_cvt_pk_i16_i32"; - case Vop3::Op::V3_MAD_LEGACY_F32: - return "v3_mad_legacy_f32"; - case Vop3::Op::V3_MAD_F32: - return "v3_mad_f32"; - case Vop3::Op::V3_MAD_I32_I24: - return "v3_mad_i32_i24"; - case Vop3::Op::V3_MAD_U32_U24: - return "v3_mad_u32_u24"; - case Vop3::Op::V3_CUBEID_F32: - return "v3_cubeid_f32"; - case Vop3::Op::V3_CUBESC_F32: - return "v3_cubesc_f32"; - case Vop3::Op::V3_CUBETC_F32: - return "v3_cubetc_f32"; - case Vop3::Op::V3_CUBEMA_F32: - return "v3_cubema_f32"; - case Vop3::Op::V3_BFE_U32: - return "v3_bfe_u32"; - case Vop3::Op::V3_BFE_I32: - return "v3_bfe_i32"; - case Vop3::Op::V3_BFI_B32: - return "v3_bfi_b32"; - case Vop3::Op::V3_FMA_F32: - return "v3_fma_f32"; - case Vop3::Op::V3_FMA_F64: - return "v3_fma_f64"; - case Vop3::Op::V3_LERP_U8: - return "v3_lerp_u8"; - case Vop3::Op::V3_ALIGNBIT_B32: - return "v3_alignbit_b32"; - case Vop3::Op::V3_ALIGNBYTE_B32: - return "v3_alignbyte_b32"; - case Vop3::Op::V3_MULLIT_F32: - return "v3_mullit_f32"; - case Vop3::Op::V3_MIN3_F32: - return "v3_min3_f32"; - case Vop3::Op::V3_MIN3_I32: - return "v3_min3_i32"; - case Vop3::Op::V3_MIN3_U32: - return "v3_min3_u32"; - case Vop3::Op::V3_MAX3_F32: - return "v3_max3_f32"; - case Vop3::Op::V3_MAX3_I32: - return "v3_max3_i32"; - case Vop3::Op::V3_MAX3_U32: - return "v3_max3_u32"; - case Vop3::Op::V3_MED3_F32: - return "v3_med3_f32"; - case Vop3::Op::V3_MED3_I32: - return "v3_med3_i32"; - case Vop3::Op::V3_MED3_U32: - return "v3_med3_u32"; - case Vop3::Op::V3_SAD_U8: - return "v3_sad_u8"; - case Vop3::Op::V3_SAD_HI_U8: - return "v3_sad_hi_u8"; - case Vop3::Op::V3_SAD_U16: - return "v3_sad_u16"; - case Vop3::Op::V3_SAD_U32: - return "v3_sad_u32"; - case Vop3::Op::V3_CVT_PK_U8_F32: - return "v3_cvt_pk_u8_f32"; - case Vop3::Op::V3_DIV_FIXUP_F32: - return "v3_div_fixup_f32"; - case Vop3::Op::V3_DIV_FIXUP_F64: - return "v3_div_fixup_f64"; - case Vop3::Op::V3_LSHL_B64: - return "v3_lshl_b64"; - case Vop3::Op::V3_LSHR_B64: - return "v3_lshr_b64"; - case Vop3::Op::V3_ASHR_I64: - return "v3_ashr_i64"; - case Vop3::Op::V3_ADD_F64: - return "v3_add_f64"; - case Vop3::Op::V3_MUL_F64: - return "v3_mul_f64"; - case Vop3::Op::V3_MIN_F64: - return "v3_min_f64"; - case Vop3::Op::V3_MAX_F64: - return "v3_max_f64"; - case Vop3::Op::V3_LDEXP_F64: - return "v3_ldexp_f64"; - case Vop3::Op::V3_MUL_LO_U32: - return "v3_mul_lo_u32"; - case Vop3::Op::V3_MUL_HI_U32: - return "v3_mul_hi_u32"; - case Vop3::Op::V3_MUL_LO_I32: - return "v3_mul_lo_i32"; - case Vop3::Op::V3_MUL_HI_I32: - return "v3_mul_hi_i32"; - case Vop3::Op::V3_DIV_SCALE_F32: - return "v3_div_scale_f32"; - case Vop3::Op::V3_DIV_SCALE_F64: - return "v3_div_scale_f64"; - case Vop3::Op::V3_DIV_FMAS_F32: - return "v3_div_fmas_f32"; - case Vop3::Op::V3_DIV_FMAS_F64: - return "v3_div_fmas_f64"; - case Vop3::Op::V3_MSAD_U8: - return "v3_msad_u8"; - case Vop3::Op::V3_QSAD_U8: - return "v3_qsad_u8"; - case Vop3::Op::V3_MQSAD_U8: - return "v3_mqsad_u8"; - case Vop3::Op::V3_TRIG_PREOP_F64: - return "v3_trig_preop_f64"; - case Vop3::Op::V3_NOP: - return "v3_nop"; - case Vop3::Op::V3_MOV_B32: - return "v3_mov_b32"; - case Vop3::Op::V3_READFIRSTLANE_B32: - return "v3_readfirstlane_b32"; - case Vop3::Op::V3_CVT_I32_F64: - return "v3_cvt_i32_f64"; - case Vop3::Op::V3_CVT_F64_I32: - return "v3_cvt_f64_i32"; - case Vop3::Op::V3_CVT_F32_I32: - return "v3_cvt_f32_i32"; - case Vop3::Op::V3_CVT_F32_U32: - return "v3_cvt_f32_u32"; - case Vop3::Op::V3_CVT_U32_F32: - return "v3_cvt_u32_f32"; - case Vop3::Op::V3_CVT_I32_F32: - return "v3_cvt_i32_f32"; - case Vop3::Op::V3_MOV_FED_B32: - return "v3_mov_fed_b32"; - case Vop3::Op::V3_CVT_F16_F32: - return "v3_cvt_f16_f32"; - case Vop3::Op::V3_CVT_F32_F16: - return "v3_cvt_f32_f16"; - case Vop3::Op::V3_CVT_RPI_I32_F32: - return "v3_cvt_rpi_i32_f32"; - case Vop3::Op::V3_CVT_FLR_I32_F32: - return "v3_cvt_flr_i32_f32"; - case Vop3::Op::V3_CVT_OFF_F32_I4: - return "v3_cvt_off_f32_i4"; - case Vop3::Op::V3_CVT_F32_F64: - return "v3_cvt_f32_f64"; - case Vop3::Op::V3_CVT_F64_F32: - return "v3_cvt_f64_f32"; - case Vop3::Op::V3_CVT_F32_UBYTE0: - return "v3_cvt_f32_ubyte0"; - case Vop3::Op::V3_CVT_F32_UBYTE1: - return "v3_cvt_f32_ubyte1"; - case Vop3::Op::V3_CVT_F32_UBYTE2: - return "v3_cvt_f32_ubyte2"; - case Vop3::Op::V3_CVT_F32_UBYTE3: - return "v3_cvt_f32_ubyte3"; - case Vop3::Op::V3_CVT_U32_F64: - return "v3_cvt_u32_f64"; - case Vop3::Op::V3_CVT_F64_U32: - return "v3_cvt_f64_u32"; - case Vop3::Op::V3_FRACT_F32: - return "v3_fract_f32"; - case Vop3::Op::V3_TRUNC_F32: - return "v3_trunc_f32"; - case Vop3::Op::V3_CEIL_F32: - return "v3_ceil_f32"; - case Vop3::Op::V3_RNDNE_F32: - return "v3_rndne_f32"; - case Vop3::Op::V3_FLOOR_F32: - return "v3_floor_f32"; - case Vop3::Op::V3_EXP_F32: - return "v3_exp_f32"; - case Vop3::Op::V3_LOG_CLAMP_F32: - return "v3_log_clamp_f32"; - case Vop3::Op::V3_LOG_F32: - return "v3_log_f32"; - case Vop3::Op::V3_RCP_CLAMP_F32: - return "v3_rcp_clamp_f32"; - case Vop3::Op::V3_RCP_LEGACY_F32: - return "v3_rcp_legacy_f32"; - case Vop3::Op::V3_RCP_F32: - return "v3_rcp_f32"; - case Vop3::Op::V3_RCP_IFLAG_F32: - return "v3_rcp_iflag_f32"; - case Vop3::Op::V3_RSQ_CLAMP_F32: - return "v3_rsq_clamp_f32"; - case Vop3::Op::V3_RSQ_LEGACY_F32: - return "v3_rsq_legacy_f32"; - case Vop3::Op::V3_RSQ_F32: - return "v3_rsq_f32"; - case Vop3::Op::V3_RCP_F64: - return "v3_rcp_f64"; - case Vop3::Op::V3_RCP_CLAMP_F64: - return "v3_rcp_clamp_f64"; - case Vop3::Op::V3_RSQ_F64: - return "v3_rsq_f64"; - case Vop3::Op::V3_RSQ_CLAMP_F64: - return "v3_rsq_clamp_f64"; - case Vop3::Op::V3_SQRT_F32: - return "v3_sqrt_f32"; - case Vop3::Op::V3_SQRT_F64: - return "v3_sqrt_f64"; - case Vop3::Op::V3_SIN_F32: - return "v3_sin_f32"; - case Vop3::Op::V3_COS_F32: - return "v3_cos_f32"; - case Vop3::Op::V3_NOT_B32: - return "v3_not_b32"; - case Vop3::Op::V3_BFREV_B32: - return "v3_bfrev_b32"; - case Vop3::Op::V3_FFBH_U32: - return "v3_ffbh_u32"; - case Vop3::Op::V3_FFBL_B32: - return "v3_ffbl_b32"; - case Vop3::Op::V3_FFBH_I32: - return "v3_ffbh_i32"; - case Vop3::Op::V3_FREXP_EXP_I32_F64: - return "v3_frexp_exp_i32_f64"; - case Vop3::Op::V3_FREXP_MANT_F64: - return "v3_frexp_mant_f64"; - case Vop3::Op::V3_FRACT_F64: - return "v3_fract_f64"; - case Vop3::Op::V3_FREXP_EXP_I32_F32: - return "v3_frexp_exp_i32_f32"; - case Vop3::Op::V3_FREXP_MANT_F32: - return "v3_frexp_mant_f32"; - case Vop3::Op::V3_CLREXCP: - return "v3_clrexcp"; - case Vop3::Op::V3_MOVRELD_B32: - return "v3_movreld_b32"; - case Vop3::Op::V3_MOVRELS_B32: - return "v3_movrels_b32"; - case Vop3::Op::V3_MOVRELSD_B32: - return "v3_movrelsd_b32"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::smrdOpcodeToString(Smrd::Op op) { - switch (op) { - case Smrd::Op::S_LOAD_DWORD: - return "s_load_dword"; - case Smrd::Op::S_LOAD_DWORDX2: - return "s_load_dwordx2"; - case Smrd::Op::S_LOAD_DWORDX4: - return "s_load_dwordx4"; - case Smrd::Op::S_LOAD_DWORDX8: - return "s_load_dwordx8"; - case Smrd::Op::S_LOAD_DWORDX16: - return "s_load_dwordx16"; - case Smrd::Op::S_BUFFER_LOAD_DWORD: - return "s_buffer_load_dword"; - case Smrd::Op::S_BUFFER_LOAD_DWORDX2: - return "s_buffer_load_dwordx2"; - case Smrd::Op::S_BUFFER_LOAD_DWORDX4: - return "s_buffer_load_dwordx4"; - case Smrd::Op::S_BUFFER_LOAD_DWORDX8: - return "s_buffer_load_dwordx8"; - case Smrd::Op::S_BUFFER_LOAD_DWORDX16: - return "s_buffer_load_dwordx16"; - case Smrd::Op::S_DCACHE_INV_VOL: - return "s_dcache_inv_vol"; - case Smrd::Op::S_MEMTIME: - return "s_memtime"; - case Smrd::Op::S_DCACHE_INV: - return "s_dcache_inv"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::mubufOpcodeToString(Mubuf::Op op) { - switch (op) { - case Mubuf::Op::BUFFER_LOAD_FORMAT_X: - return "buffer_load_format_x"; - case Mubuf::Op::BUFFER_LOAD_FORMAT_XY: - return "buffer_load_format_xy"; - case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZ: - return "buffer_load_format_xyz"; - case Mubuf::Op::BUFFER_LOAD_FORMAT_XYZW: - return "buffer_load_format_xyzw"; - case Mubuf::Op::BUFFER_STORE_FORMAT_X: - return "buffer_store_format_x"; - case Mubuf::Op::BUFFER_STORE_FORMAT_XY: - return "buffer_store_format_xy"; - case Mubuf::Op::BUFFER_STORE_FORMAT_XYZ: - return "buffer_store_format_xyz"; - case Mubuf::Op::BUFFER_STORE_FORMAT_XYZW: - return "buffer_store_format_xyzw"; - case Mubuf::Op::BUFFER_LOAD_UBYTE: - return "buffer_load_ubyte"; - case Mubuf::Op::BUFFER_LOAD_SBYTE: - return "buffer_load_sbyte"; - case Mubuf::Op::BUFFER_LOAD_USHORT: - return "buffer_load_ushort"; - case Mubuf::Op::BUFFER_LOAD_SSHORT: - return "buffer_load_sshort"; - case Mubuf::Op::BUFFER_LOAD_DWORD: - return "buffer_load_dword"; - case Mubuf::Op::BUFFER_LOAD_DWORDX2: - return "buffer_load_dwordx2"; - case Mubuf::Op::BUFFER_LOAD_DWORDX4: - return "buffer_load_dwordx4"; - case Mubuf::Op::BUFFER_LOAD_DWORDX3: - return "buffer_load_dwordx3"; - case Mubuf::Op::BUFFER_STORE_BYTE: - return "buffer_store_byte"; - case Mubuf::Op::BUFFER_STORE_SHORT: - return "buffer_store_short"; - case Mubuf::Op::BUFFER_STORE_DWORD: - return "buffer_store_dword"; - case Mubuf::Op::BUFFER_STORE_DWORDX2: - return "buffer_store_dwordx2"; - case Mubuf::Op::BUFFER_STORE_DWORDX4: - return "buffer_store_dwordx4"; - case Mubuf::Op::BUFFER_STORE_DWORDX3: - return "buffer_store_dwordx3"; - case Mubuf::Op::BUFFER_ATOMIC_SWAP: - return "buffer_atomic_swap"; - case Mubuf::Op::BUFFER_ATOMIC_CMPSWAP: - return "buffer_atomic_cmpswap"; - case Mubuf::Op::BUFFER_ATOMIC_ADD: - return "buffer_atomic_add"; - case Mubuf::Op::BUFFER_ATOMIC_SUB: - return "buffer_atomic_sub"; - case Mubuf::Op::BUFFER_ATOMIC_RSUB: - return "buffer_atomic_rsub"; - case Mubuf::Op::BUFFER_ATOMIC_SMIN: - return "buffer_atomic_smin"; - case Mubuf::Op::BUFFER_ATOMIC_UMIN: - return "buffer_atomic_umin"; - case Mubuf::Op::BUFFER_ATOMIC_SMAX: - return "buffer_atomic_smax"; - case Mubuf::Op::BUFFER_ATOMIC_UMAX: - return "buffer_atomic_umax"; - case Mubuf::Op::BUFFER_ATOMIC_AND: - return "buffer_atomic_and"; - case Mubuf::Op::BUFFER_ATOMIC_OR: - return "buffer_atomic_or"; - case Mubuf::Op::BUFFER_ATOMIC_XOR: - return "buffer_atomic_xor"; - case Mubuf::Op::BUFFER_ATOMIC_INC: - return "buffer_atomic_inc"; - case Mubuf::Op::BUFFER_ATOMIC_DEC: - return "buffer_atomic_dec"; - case Mubuf::Op::BUFFER_ATOMIC_FCMPSWAP: - return "buffer_atomic_fcmpswap"; - case Mubuf::Op::BUFFER_ATOMIC_FMIN: - return "buffer_atomic_fmin"; - case Mubuf::Op::BUFFER_ATOMIC_FMAX: - return "buffer_atomic_fmax"; - case Mubuf::Op::BUFFER_ATOMIC_SWAP_X2: - return "buffer_atomic_swap_x2"; - case Mubuf::Op::BUFFER_ATOMIC_CMPSWAP_X2: - return "buffer_atomic_cmpswap_x2"; - case Mubuf::Op::BUFFER_ATOMIC_ADD_X2: - return "buffer_atomic_add_x2"; - case Mubuf::Op::BUFFER_ATOMIC_SUB_X2: - return "buffer_atomic_sub_x2"; - case Mubuf::Op::BUFFER_ATOMIC_RSUB_X2: - return "buffer_atomic_rsub_x2"; - case Mubuf::Op::BUFFER_ATOMIC_SMIN_X2: - return "buffer_atomic_smin_x2"; - case Mubuf::Op::BUFFER_ATOMIC_UMIN_X2: - return "buffer_atomic_umin_x2"; - case Mubuf::Op::BUFFER_ATOMIC_SMAX_X2: - return "buffer_atomic_smax_x2"; - case Mubuf::Op::BUFFER_ATOMIC_UMAX_X2: - return "buffer_atomic_umax_x2"; - case Mubuf::Op::BUFFER_ATOMIC_AND_X2: - return "buffer_atomic_and_x2"; - case Mubuf::Op::BUFFER_ATOMIC_OR_X2: - return "buffer_atomic_or_x2"; - case Mubuf::Op::BUFFER_ATOMIC_XOR_X2: - return "buffer_atomic_xor_x2"; - case Mubuf::Op::BUFFER_ATOMIC_INC_X2: - return "buffer_atomic_inc_x2"; - case Mubuf::Op::BUFFER_ATOMIC_DEC_X2: - return "buffer_atomic_dec_x2"; - case Mubuf::Op::BUFFER_ATOMIC_FCMPSWAP_X2: - return "buffer_atomic_fcmpswap_x2"; - case Mubuf::Op::BUFFER_ATOMIC_FMIN_X2: - return "buffer_atomic_fmin_x2"; - case Mubuf::Op::BUFFER_ATOMIC_FMAX_X2: - return "buffer_atomic_fmax_x2"; - case Mubuf::Op::BUFFER_WBINVL1_SC_VOL: - return "buffer_wbinvl1_sc/vol"; - case Mubuf::Op::BUFFER_WBINVL1: - return "buffer_wbinvl1"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::mtbufOpcodeToString(Mtbuf::Op op) { - switch (op) { - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_X: - return "tbuffer_load_format_x"; - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XY: - return "tbuffer_load_format_xy"; - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZ: - return "tbuffer_load_format_xyz"; - case Mtbuf::Op::TBUFFER_LOAD_FORMAT_XYZW: - return "tbuffer_load_format_xyzw"; - case Mtbuf::Op::TBUFFER_STORE_FORMAT_X: - return "tbuffer_store_format_x"; - case Mtbuf::Op::TBUFFER_STORE_FORMAT_XY: - return "tbuffer_store_format_xy"; - case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZ: - return "tbuffer_store_format_xyz"; - case Mtbuf::Op::TBUFFER_STORE_FORMAT_XYZW: - return "tbuffer_store_format_xyzw"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::mimgOpcodeToString(Mimg::Op op) { - switch (op) { - case Mimg::Op::IMAGE_LOAD: - return "image_load"; - case Mimg::Op::IMAGE_LOAD_MIP: - return "image_load_mip"; - case Mimg::Op::IMAGE_LOAD_PCK: - return "image_load_pck"; - case Mimg::Op::IMAGE_LOAD_PCK_SGN: - return "image_load_pck_sgn"; - case Mimg::Op::IMAGE_LOAD_MIP_PCK: - return "image_load_mip_pck"; - case Mimg::Op::IMAGE_LOAD_MIP_PCK_SGN: - return "image_load_mip_pck_sgn"; - case Mimg::Op::IMAGE_STORE: - return "image_store"; - case Mimg::Op::IMAGE_STORE_MIP: - return "image_store_mip"; - case Mimg::Op::IMAGE_STORE_PCK: - return "image_store_pck"; - case Mimg::Op::IMAGE_STORE_MIP_PCK: - return "image_store_mip_pck"; - case Mimg::Op::IMAGE_GET_RESINFO: - return "image_get_resinfo"; - case Mimg::Op::IMAGE_ATOMIC_SWAP: - return "image_atomic_swap"; - case Mimg::Op::IMAGE_ATOMIC_CMPSWAP: - return "image_atomic_cmpswap"; - case Mimg::Op::IMAGE_ATOMIC_ADD: - return "image_atomic_add"; - case Mimg::Op::IMAGE_ATOMIC_SUB: - return "image_atomic_sub"; - case Mimg::Op::IMAGE_ATOMIC_RSUB: - return "image_atomic_rsub"; - case Mimg::Op::IMAGE_ATOMIC_SMIN: - return "image_atomic_smin"; - case Mimg::Op::IMAGE_ATOMIC_UMIN: - return "image_atomic_umin"; - case Mimg::Op::IMAGE_ATOMIC_SMAX: - return "image_atomic_smax"; - case Mimg::Op::IMAGE_ATOMIC_UMAX: - return "image_atomic_umax"; - case Mimg::Op::IMAGE_ATOMIC_AND: - return "image_atomic_and"; - case Mimg::Op::IMAGE_ATOMIC_OR: - return "image_atomic_or"; - case Mimg::Op::IMAGE_ATOMIC_XOR: - return "image_atomic_xor"; - case Mimg::Op::IMAGE_ATOMIC_INC: - return "image_atomic_inc"; - case Mimg::Op::IMAGE_ATOMIC_DEC: - return "image_atomic_dec"; - case Mimg::Op::IMAGE_ATOMIC_FCMPSWAP: - return "image_atomic_fcmpswap"; - case Mimg::Op::IMAGE_ATOMIC_FMIN: - return "image_atomic_fmin"; - case Mimg::Op::IMAGE_ATOMIC_FMAX: - return "image_atomic_fmax"; - case Mimg::Op::IMAGE_SAMPLE: - return "image_sample"; - case Mimg::Op::IMAGE_SAMPLE_CL: - return "image_sample_cl"; - case Mimg::Op::IMAGE_SAMPLE_D: - return "image_sample_d"; - case Mimg::Op::IMAGE_SAMPLE_D_CL: - return "image_sample_d_cl"; - case Mimg::Op::IMAGE_SAMPLE_L: - return "image_sample_l"; - case Mimg::Op::IMAGE_SAMPLE_B: - return "image_sample_b"; - case Mimg::Op::IMAGE_SAMPLE_B_CL: - return "image_sample_b_cl"; - case Mimg::Op::IMAGE_SAMPLE_LZ: - return "image_sample_lz"; - case Mimg::Op::IMAGE_SAMPLE_C: - return "image_sample_c"; - case Mimg::Op::IMAGE_SAMPLE_C_CL: - return "image_sample_c_cl"; - case Mimg::Op::IMAGE_SAMPLE_C_D: - return "image_sample_c_d"; - case Mimg::Op::IMAGE_SAMPLE_C_D_CL: - return "image_sample_c_d_cl"; - case Mimg::Op::IMAGE_SAMPLE_C_L: - return "image_sample_c_l"; - case Mimg::Op::IMAGE_SAMPLE_C_B: - return "image_sample_c_b"; - case Mimg::Op::IMAGE_SAMPLE_C_B_CL: - return "image_sample_c_b_cl"; - case Mimg::Op::IMAGE_SAMPLE_C_LZ: - return "image_sample_c_lz"; - case Mimg::Op::IMAGE_SAMPLE_O: - return "image_sample_o"; - case Mimg::Op::IMAGE_SAMPLE_CL_O: - return "image_sample_cl_o"; - case Mimg::Op::IMAGE_SAMPLE_D_O: - return "image_sample_d_o"; - case Mimg::Op::IMAGE_SAMPLE_D_CL_O: - return "image_sample_d_cl_o"; - case Mimg::Op::IMAGE_SAMPLE_L_O: - return "image_sample_l_o"; - case Mimg::Op::IMAGE_SAMPLE_B_O: - return "image_sample_b_o"; - case Mimg::Op::IMAGE_SAMPLE_B_CL_O: - return "image_sample_b_cl_o"; - case Mimg::Op::IMAGE_SAMPLE_LZ_O: - return "image_sample_lz_o"; - case Mimg::Op::IMAGE_SAMPLE_C_O: - return "image_sample_c_o"; - case Mimg::Op::IMAGE_SAMPLE_C_CL_O: - return "image_sample_c_cl_o"; - case Mimg::Op::IMAGE_SAMPLE_C_D_O: - return "image_sample_c_d_o"; - case Mimg::Op::IMAGE_SAMPLE_C_D_CL_O: - return "image_sample_c_d_cl_o"; - case Mimg::Op::IMAGE_SAMPLE_C_L_O: - return "image_sample_c_l_o"; - case Mimg::Op::IMAGE_SAMPLE_C_B_O: - return "image_sample_c_b_o"; - case Mimg::Op::IMAGE_SAMPLE_C_B_CL_O: - return "image_sample_c_b_cl_o"; - case Mimg::Op::IMAGE_SAMPLE_C_LZ_O: - return "image_sample_c_lz_o"; - case Mimg::Op::IMAGE_GATHER4: - return "image_gather4"; - case Mimg::Op::IMAGE_GATHER4_CL: - return "image_gather4_cl"; - case Mimg::Op::IMAGE_GATHER4_L: - return "image_gather4_l"; - case Mimg::Op::IMAGE_GATHER4_B: - return "image_gather4_b"; - case Mimg::Op::IMAGE_GATHER4_B_CL: - return "image_gather4_b_cl"; - case Mimg::Op::IMAGE_GATHER4_LZ: - return "image_gather4_lz"; - case Mimg::Op::IMAGE_GATHER4_C: - return "image_gather4_c"; - case Mimg::Op::IMAGE_GATHER4_C_CL: - return "image_gather4_c_cl"; - case Mimg::Op::IMAGE_GATHER4_C_L: - return "image_gather4_c_l"; - case Mimg::Op::IMAGE_GATHER4_C_B: - return "image_gather4_c_b"; - case Mimg::Op::IMAGE_GATHER4_C_B_CL: - return "image_gather4_c_b_cl"; - case Mimg::Op::IMAGE_GATHER4_C_LZ: - return "image_gather4_c_lz"; - case Mimg::Op::IMAGE_GATHER4_O: - return "image_gather4_o"; - case Mimg::Op::IMAGE_GATHER4_CL_O: - return "image_gather4_cl_o"; - case Mimg::Op::IMAGE_GATHER4_L_O: - return "image_gather4_l_o"; - case Mimg::Op::IMAGE_GATHER4_B_O: - return "image_gather4_b_o"; - case Mimg::Op::IMAGE_GATHER4_B_CL_O: - return "image_gather4_b_cl_o"; - case Mimg::Op::IMAGE_GATHER4_LZ_O: - return "image_gather4_lz_o"; - case Mimg::Op::IMAGE_GATHER4_C_O: - return "image_gather4_c_o"; - case Mimg::Op::IMAGE_GATHER4_C_CL_O: - return "image_gather4_c_cl_o"; - case Mimg::Op::IMAGE_GATHER4_C_L_O: - return "image_gather4_c_l_o"; - case Mimg::Op::IMAGE_GATHER4_C_B_O: - return "image_gather4_c_b_o"; - case Mimg::Op::IMAGE_GATHER4_C_B_CL_O: - return "image_gather4_c_b_cl_o"; - case Mimg::Op::IMAGE_GATHER4_C_LZ_O: - return "image_gather4_c_lz_o"; - case Mimg::Op::IMAGE_GET_LOD: - return "image_get_lod"; - case Mimg::Op::IMAGE_SAMPLE_CD: - return "image_sample_cd"; - case Mimg::Op::IMAGE_SAMPLE_CD_CL: - return "image_sample_cd_cl"; - case Mimg::Op::IMAGE_SAMPLE_C_CD: - return "image_sample_c_cd"; - case Mimg::Op::IMAGE_SAMPLE_C_CD_CL: - return "image_sample_c_cd_cl"; - case Mimg::Op::IMAGE_SAMPLE_CD_O: - return "image_sample_cd_o"; - case Mimg::Op::IMAGE_SAMPLE_CD_CL_O: - return "image_sample_cd_cl_o"; - case Mimg::Op::IMAGE_SAMPLE_C_CD_O: - return "image_sample_c_cd_o"; - case Mimg::Op::IMAGE_SAMPLE_C_CD_CL_O: - return "image_sample_c_cd_cl_o"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::dsOpcodeToString(Ds::Op op) { - switch (op) { - case Ds::Op::DS_ADD_U32: - return "ds_add_u32"; - case Ds::Op::DS_SUB_U32: - return "ds_sub_u32"; - case Ds::Op::DS_RSUB_U32: - return "ds_rsub_u32"; - case Ds::Op::DS_INC_U32: - return "ds_inc_u32"; - case Ds::Op::DS_DEC_U32: - return "ds_dec_u32"; - case Ds::Op::DS_MIN_I32: - return "ds_min_i32"; - case Ds::Op::DS_MAX_I32: - return "ds_max_i32"; - case Ds::Op::DS_MIN_U32: - return "ds_min_u32"; - case Ds::Op::DS_MAX_U32: - return "ds_max_u32"; - case Ds::Op::DS_AND_B32: - return "ds_and_b32"; - case Ds::Op::DS_OR_B32: - return "ds_or_b32"; - case Ds::Op::DS_XOR_B32: - return "ds_xor_b32"; - case Ds::Op::DS_MSKOR_B32: - return "ds_mskor_b32"; - case Ds::Op::DS_WRITE_B32: - return "ds_write_b32"; - case Ds::Op::DS_WRITE2_B32: - return "ds_write2_b32"; - case Ds::Op::DS_WRITE2ST64_B32: - return "ds_write2st64_b32"; - case Ds::Op::DS_CMPST_B32: - return "ds_cmpst_b32"; - case Ds::Op::DS_CMPST_F32: - return "ds_cmpst_f32"; - case Ds::Op::DS_MIN_F32: - return "ds_min_f32"; - case Ds::Op::DS_MAX_F32: - return "ds_max_f32"; - case Ds::Op::DS_NOP: - return "ds_nop"; - case Ds::Op::DS_GWS_SEMA_RELEASE_ALL: - return "ds_gws_sema_release_all"; - case Ds::Op::DS_GWS_INIT: - return "ds_gws_init"; - case Ds::Op::DS_GWS_SEMA_V: - return "ds_gws_sema_v"; - case Ds::Op::DS_GWS_SEMA_BR: - return "ds_gws_sema_br"; - case Ds::Op::DS_GWS_SEMA_P: - return "ds_gws_sema_p"; - case Ds::Op::DS_GWS_BARRIER: - return "ds_gws_barrier"; - case Ds::Op::DS_WRITE_B8: - return "ds_write_b8"; - case Ds::Op::DS_WRITE_B16: - return "ds_write_b16"; - case Ds::Op::DS_ADD_RTN_U32: - return "ds_add_rtn_u32"; - case Ds::Op::DS_SUB_RTN_U32: - return "ds_sub_rtn_u32"; - case Ds::Op::DS_RSUB_RTN_U32: - return "ds_rsub_rtn_u32"; - case Ds::Op::DS_INC_RTN_U32: - return "ds_inc_rtn_u32"; - case Ds::Op::DS_DEC_RTN_U32: - return "ds_dec_rtn_u32"; - case Ds::Op::DS_MIN_RTN_I32: - return "ds_min_rtn_i32"; - case Ds::Op::DS_MAX_RTN_I32: - return "ds_max_rtn_i32"; - case Ds::Op::DS_MIN_RTN_U32: - return "ds_min_rtn_u32"; - case Ds::Op::DS_MAX_RTN_U32: - return "ds_max_rtn_u32"; - case Ds::Op::DS_AND_RTN_B32: - return "ds_and_rtn_b32"; - case Ds::Op::DS_OR_RTN_B32: - return "ds_or_rtn_b32"; - case Ds::Op::DS_XOR_RTN_B32: - return "ds_xor_rtn_b32"; - case Ds::Op::DS_MSKOR_RTN_B32: - return "ds_mskor_rtn_b32"; - case Ds::Op::DS_WRXCHG_RTN_B32: - return "ds_wrxchg_rtn_b32"; - case Ds::Op::DS_WRXCHG2_RTN_B32: - return "ds_wrxchg2_rtn_b32"; - case Ds::Op::DS_WRXCHG2ST64_RTN_B32: - return "ds_wrxchg2st64_rtn_b32"; - case Ds::Op::DS_CMPST_RTN_B32: - return "ds_cmpst_rtn_b32"; - case Ds::Op::DS_CMPST_RTN_F32: - return "ds_cmpst_rtn_f32"; - case Ds::Op::DS_MIN_RTN_F32: - return "ds_min_rtn_f32"; - case Ds::Op::DS_MAX_RTN_F32: - return "ds_max_rtn_f32"; - case Ds::Op::DS_WRAP_RTN_B32: - return "ds_wrap_rtn_b32"; - case Ds::Op::DS_SWIZZLE_B32: - return "ds_swizzle_b32"; - case Ds::Op::DS_READ_B32: - return "ds_read_b32"; - case Ds::Op::DS_READ2_B32: - return "ds_read2_b32"; - case Ds::Op::DS_READ2ST64_B32: - return "ds_read2st64_b32"; - case Ds::Op::DS_READ_I8: - return "ds_read_i8"; - case Ds::Op::DS_READ_U8: - return "ds_read_u8"; - case Ds::Op::DS_READ_I16: - return "ds_read_i16"; - case Ds::Op::DS_READ_U16: - return "ds_read_u16"; - case Ds::Op::DS_CONSUME: - return "ds_consume"; - case Ds::Op::DS_APPEND: - return "ds_append"; - case Ds::Op::DS_ORDERED_COUNT: - return "ds_ordered_count"; - case Ds::Op::DS_ADD_U64: - return "ds_add_u64"; - case Ds::Op::DS_SUB_U64: - return "ds_sub_u64"; - case Ds::Op::DS_RSUB_U64: - return "ds_rsub_u64"; - case Ds::Op::DS_INC_U64: - return "ds_inc_u64"; - case Ds::Op::DS_DEC_U64: - return "ds_dec_u64"; - case Ds::Op::DS_MIN_I64: - return "ds_min_i64"; - case Ds::Op::DS_MAX_I64: - return "ds_max_i64"; - case Ds::Op::DS_MIN_U64: - return "ds_min_u64"; - case Ds::Op::DS_MAX_U64: - return "ds_max_u64"; - case Ds::Op::DS_AND_B64: - return "ds_and_b64"; - case Ds::Op::DS_OR_B64: - return "ds_or_b64"; - case Ds::Op::DS_XOR_B64: - return "ds_xor_b64"; - case Ds::Op::DS_MSKOR_B64: - return "ds_mskor_b64"; - case Ds::Op::DS_WRITE_B64: - return "ds_write_b64"; - case Ds::Op::DS_WRITE2_B64: - return "ds_write2_b64"; - case Ds::Op::DS_WRITE2ST64_B64: - return "ds_write2st64_b64"; - case Ds::Op::DS_CMPST_B64: - return "ds_cmpst_b64"; - case Ds::Op::DS_CMPST_F64: - return "ds_cmpst_f64"; - case Ds::Op::DS_MIN_F64: - return "ds_min_f64"; - case Ds::Op::DS_MAX_F64: - return "ds_max_f64"; - case Ds::Op::DS_ADD_RTN_U64: - return "ds_add_rtn_u64"; - case Ds::Op::DS_SUB_RTN_U64: - return "ds_sub_rtn_u64"; - case Ds::Op::DS_RSUB_RTN_U64: - return "ds_rsub_rtn_u64"; - case Ds::Op::DS_INC_RTN_U64: - return "ds_inc_rtn_u64"; - case Ds::Op::DS_DEC_RTN_U64: - return "ds_dec_rtn_u64"; - case Ds::Op::DS_MIN_RTN_I64: - return "ds_min_rtn_i64"; - case Ds::Op::DS_MAX_RTN_I64: - return "ds_max_rtn_i64"; - case Ds::Op::DS_MIN_RTN_U64: - return "ds_min_rtn_u64"; - case Ds::Op::DS_MAX_RTN_U64: - return "ds_max_rtn_u64"; - case Ds::Op::DS_AND_RTN_B64: - return "ds_and_rtn_b64"; - case Ds::Op::DS_OR_RTN_B64: - return "ds_or_rtn_b64"; - case Ds::Op::DS_XOR_RTN_B64: - return "ds_xor_rtn_b64"; - case Ds::Op::DS_MSKOR_RTN_B64: - return "ds_mskor_rtn_b64"; - case Ds::Op::DS_WRXCHG_RTN_B64: - return "ds_wrxchg_rtn_b64"; - case Ds::Op::DS_WRXCHG2_RTN_B64: - return "ds_wrxchg2_rtn_b64"; - case Ds::Op::DS_WRXCHG2ST64_RTN_B64: - return "ds_wrxchg2st64_rtn_b64"; - case Ds::Op::DS_CMPST_RTN_B64: - return "ds_cmpst_rtn_b64"; - case Ds::Op::DS_CMPST_RTN_F64: - return "ds_cmpst_rtn_f64"; - case Ds::Op::DS_MIN_RTN_F64: - return "ds_min_rtn_f64"; - case Ds::Op::DS_MAX_RTN_F64: - return "ds_max_rtn_f64"; - case Ds::Op::DS_READ_B64: - return "ds_read_b64"; - case Ds::Op::DS_READ2_B64: - return "ds_read2_b64"; - case Ds::Op::DS_READ2ST64_B64: - return "ds_read2st64_b64"; - case Ds::Op::DS_CONDXCHG32_RTN_B64: - return "ds_condxchg32_rtn_b64"; - case Ds::Op::DS_ADD_SRC2_U32: - return "ds_add_src2_u32"; - case Ds::Op::DS_SUB_SRC2_U32: - return "ds_sub_src2_u32"; - case Ds::Op::DS_RSUB_SRC2_U32: - return "ds_rsub_src2_u32"; - case Ds::Op::DS_INC_SRC2_U32: - return "ds_inc_src2_u32"; - case Ds::Op::DS_DEC_SRC2_U32: - return "ds_dec_src2_u32"; - case Ds::Op::DS_MIN_SRC2_I32: - return "ds_min_src2_i32"; - case Ds::Op::DS_MAX_SRC2_I32: - return "ds_max_src2_i32"; - case Ds::Op::DS_MIN_SRC2_U32: - return "ds_min_src2_u32"; - case Ds::Op::DS_MAX_SRC2_U32: - return "ds_max_src2_u32"; - case Ds::Op::DS_AND_SRC2_B32: - return "ds_and_src2_b32"; - case Ds::Op::DS_OR_SRC2_B32: - return "ds_or_src2_b32"; - case Ds::Op::DS_XOR_SRC2_B32: - return "ds_xor_src2_b32"; - case Ds::Op::DS_WRITE_SRC2_B32: - return "ds_write_src2_b32"; - case Ds::Op::DS_MIN_SRC2_F32: - return "ds_min_src2_f32"; - case Ds::Op::DS_MAX_SRC2_F32: - return "ds_max_src2_f32"; - case Ds::Op::DS_ADD_SRC2_U64: - return "ds_add_src2_u64"; - case Ds::Op::DS_SUB_SRC2_U64: - return "ds_sub_src2_u64"; - case Ds::Op::DS_RSUB_SRC2_U64: - return "ds_rsub_src2_u64"; - case Ds::Op::DS_INC_SRC2_U64: - return "ds_inc_src2_u64"; - case Ds::Op::DS_DEC_SRC2_U64: - return "ds_dec_src2_u64"; - case Ds::Op::DS_MIN_SRC2_I64: - return "ds_min_src2_i64"; - case Ds::Op::DS_MAX_SRC2_I64: - return "ds_max_src2_i64"; - case Ds::Op::DS_MIN_SRC2_U64: - return "ds_min_src2_u64"; - case Ds::Op::DS_MAX_SRC2_U64: - return "ds_max_src2_u64"; - case Ds::Op::DS_AND_SRC2_B64: - return "ds_and_src2_b64"; - case Ds::Op::DS_OR_SRC2_B64: - return "ds_or_src2_b64"; - case Ds::Op::DS_XOR_SRC2_B64: - return "ds_xor_src2_b64"; - case Ds::Op::DS_WRITE_SRC2_B64: - return "ds_write_src2_b64"; - case Ds::Op::DS_MIN_SRC2_F64: - return "ds_min_src2_f64"; - case Ds::Op::DS_MAX_SRC2_F64: - return "ds_max_src2_f64"; - case Ds::Op::DS_WRITE_B96: - return "ds_write_b96"; - case Ds::Op::DS_WRITE_B128: - return "ds_write_b128"; - case Ds::Op::DS_CONDXCHG32_RTN_B128: - return "ds_condxchg32_rtn_b128"; - case Ds::Op::DS_READ_B96: - return "ds_read_b96"; - case Ds::Op::DS_READ_B128: - return "ds_read_b128"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::vintrpOpcodeToString(Vintrp::Op op) { - switch (op) { - case Vintrp::Op::V_INTERP_P1_F32: - return "v_interp_p1_f32"; - case Vintrp::Op::V_INTERP_P2_F32: - return "v_interp_p2_f32"; - case Vintrp::Op::V_INTERP_MOV_F32: - return "v_interp_mov_f32"; - default: - return nullptr; - } -} - -const char *amdgpu::shader::opcodeToString(InstructionClass instClass, int op) { - switch (instClass) { - case InstructionClass::Vop2: - return vop2OpcodeToString(static_cast(op)); - case InstructionClass::Sop2: - return sop2OpcodeToString(static_cast(op)); - case InstructionClass::Sopk: - return sopkOpcodeToString(static_cast(op)); - case InstructionClass::Smrd: - return smrdOpcodeToString(static_cast(op)); - case InstructionClass::Vop3: - return vop3OpcodeToString(static_cast(op)); - case InstructionClass::Mubuf: - return mubufOpcodeToString(static_cast(op)); - case InstructionClass::Mtbuf: - return mtbufOpcodeToString(static_cast(op)); - case InstructionClass::Mimg: - return mimgOpcodeToString(static_cast(op)); - case InstructionClass::Ds: - return dsOpcodeToString(static_cast(op)); - case InstructionClass::Vintrp: - return vintrpOpcodeToString(static_cast(op)); - case InstructionClass::Exp: - return nullptr; - case InstructionClass::Vop1: - return vop1OpcodeToString(static_cast(op)); - case InstructionClass::Vopc: - return vopcOpcodeToString(static_cast(op)); - case InstructionClass::Sop1: - return sop1OpcodeToString(static_cast(op)); - case InstructionClass::Sopc: - return sopcOpcodeToString(static_cast(op)); - case InstructionClass::Sopp: - return soppOpcodeToString(static_cast(op)); - - default: - return nullptr; - } -} - -void amdgpu::shader::Sop1::dump() const { - int instSize = kMinInstSize; - printSop1Opcode(op); - std::printf(" "); - instSize += printScalarOperand(sdst, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(ssrc0, inst + instSize); -} - -void amdgpu::shader::Sopk::dump() const { - int instSize = kMinInstSize; - printSopkOpcode(op); - std::printf(" "); - instSize += printScalarOperand(sdst, inst + instSize); - std::printf(", %d", simm); -} - -void amdgpu::shader::Sopc::dump() const { - int instSize = kMinInstSize; - printSopcOpcode(op); - std::printf(" "); - instSize += printScalarOperand(ssrc0, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(ssrc1, inst + instSize); -} - -void amdgpu::shader::Sop2::dump() const { - int instSize = kMinInstSize; - printSop2Opcode(op); - std::printf(" "); - instSize += printScalarOperand(sdst, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(ssrc0, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(ssrc1, inst + instSize); -} - -void amdgpu::shader::Sopp::dump() const { - int instSize = kMinInstSize; - printSoppOpcode(op); - std::printf(" "); - instSize += printScalarOperand(simm, inst + instSize); -} - -void amdgpu::shader::Vop1::dump() const { - int instSize = kMinInstSize; - printVop1Opcode(op); - std::printf(" "); - instSize += printVectorOperand(vdst, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(src0, inst + instSize); -} - -void amdgpu::shader::Vop2::dump() const { - int instSize = kMinInstSize; - printVop2Opcode(op); - std::printf(" "); - instSize += printVectorOperand(vdst, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(src0, inst + instSize); - std::printf(", "); - instSize += printVectorOperand(vsrc1, inst + instSize); - - if (op == Vop2::Op::V_MADMK_F32 || op == Vop2::Op::V_MADAK_F32) { - std::printf(", "); - instSize += printScalarOperand(255, inst + instSize); - } -} - -void amdgpu::shader::Vop3::dump() const { - - /* - v_add_i32 - v_addc_u32 - v_sub_i32 - v_subb_u32, - v_subbrev_u32 - v_subrev_i32 - v_div_scale_f32 - v_div_scale_f64 - */ - - int instSize = kMinInstSize; - printVop3Opcode(op); - std::printf(" "); - instSize += printVectorOperand(vdst, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(src0, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(src1, inst + instSize); - std::printf(", "); - instSize += printScalarOperand(src2, inst + instSize); - - std::printf(" #abs=%x, clmp=%x, neg=%x, omod=%x, ", abs, clmp, neg, omod); - instSize += printScalarOperand(sdst, inst + instSize); -} - -void amdgpu::shader::Vopc::dump() const { - int instSize = kMinInstSize; - - printVopcOpcode(op); - std::printf(" "); - instSize += printScalarOperand(src0, inst + instSize); - std::printf(", "); - instSize += printVectorOperand(vsrc1, inst + instSize); -} - -void amdgpu::shader::Smrd::dump() const { - int instSize = kMinInstSize; - - printSmrdOpcode(op); - printf(" "); - printScalarOperand(sdst, inst + instSize); - printf(", "); - printScalarOperand(sbase << 1, inst + instSize); - printf(", "); - - if (imm) { - printf("%u", offset << 2); - } else { - printScalarOperand(offset, inst + instSize); - } - - std::printf(" #sdst=%x,sbase=%x,imm=%x,offset=%x", sdst, sbase, imm, offset); -} -void amdgpu::shader::Mubuf::dump() const { - int instSize = kMinInstSize; - - printMubufOpcode(op); - printf(" "); - printVectorOperand(vdata, inst + instSize); - printf(", "); - printVectorOperand(vaddr, inst + instSize); - printf(", "); - printScalarOperand(srsrc << 2, inst + instSize); - printf(", "); - printScalarOperand(soffset, inst + instSize); - printf(" #offset=%x, " - "offen=%x,idxen=%x,glc=%x,lds=%x,vaddr=%x,vdata=%x,srsrc=%x,slc=%x," - "tfe=%x,soffset=%d", - offset, offen, idxen, glc, lds, vaddr, vdata, srsrc, slc, tfe, - soffset); -} -void amdgpu::shader::Mtbuf::dump() const { - int instSize = kMinInstSize; - - printMtbufOpcode(op); - printf(" "); - printVectorOperand(vdata, inst + instSize); - printf(", "); - printScalarOperand(srsrc << 2, inst + instSize); - printf(", "); - printScalarOperand(soffset, inst + instSize); - printf(" #offset=%x,offen=%x,idxen=%x,glc=%x,op=%x,dfmt=%x,nfmt=%x,vaddr=%x," - "vdata=%x,srsrc=%x,slc=%x,tfe=%x,soffset=%x", - offset, offen, idxen, glc, (unsigned)op, dfmt, nfmt, vaddr, vdata, - srsrc, slc, tfe, soffset); -} -void amdgpu::shader::Mimg::dump() const { - int instSize = kMinInstSize; - - printMimgOpcode(op); - - printf(" #dmask=%x,unrm=%x,glc=%x,da=%x,r128=%x,tfe=%x,lwe=%x,slc=%x," - "vaddr=%x,vdata=%x,srsrc=%x,ssamp=%x", - dmask, unrm, glc, da, r128, tfe, lwe, slc, vaddr, vdata, srsrc, ssamp); -} - -void amdgpu::shader::Ds::dump() const { - int instSize = kMinInstSize; - - printDsOpcode(op); -} - -void amdgpu::shader::Vintrp::dump() const { - int instSize = kMinInstSize; - - printVintrpOpcode(op); - printf(" "); - instSize += printVectorOperand(vdst, inst + instSize); - printf(", "); - instSize += printVectorOperand(vsrc, inst + instSize); - const char channels[] = {'x', 'y', 'z', 'w'}; - - printf(", attr%d.%c", attr, channels[attrChan]); -} -void amdgpu::shader::Exp::dump() const { - int instSize = kMinInstSize; - - printExpTarget(target); - printf(" "); - instSize += printVectorOperand(vsrc0, inst + instSize); - printf(", "); - instSize += printVectorOperand(vsrc1, inst + instSize); - printf(", "); - instSize += printVectorOperand(vsrc2, inst + instSize); - printf(", "); - instSize += printVectorOperand(vsrc3, inst + instSize); - printf(" #en=%x, compr=%x, done=%x, vm=%x", en, compr, done, vm); -} - -void amdgpu::shader::Instruction::dump() const { - printf("%-6s ", instructionClassToString(instClass)); - - switch (instClass) { - case InstructionClass::Invalid: - break; - case InstructionClass::Vop2: - Vop2(inst).dump(); - return; - case InstructionClass::Sop2: - Sop2(inst).dump(); - return; - case InstructionClass::Sopk: - Sopk(inst).dump(); - return; - case InstructionClass::Smrd: - Smrd(inst).dump(); - return; - case InstructionClass::Vop3: - Vop3(inst).dump(); - return; - case InstructionClass::Mubuf: - Mubuf(inst).dump(); - return; - case InstructionClass::Mtbuf: - Mtbuf(inst).dump(); - return; - case InstructionClass::Mimg: - Mimg(inst).dump(); - return; - case InstructionClass::Ds: - Ds(inst).dump(); - return; - case InstructionClass::Vintrp: - Vintrp(inst).dump(); - return; - case InstructionClass::Exp: - Exp(inst).dump(); - return; - case InstructionClass::Vop1: - Vop1(inst).dump(); - return; - case InstructionClass::Vopc: - Vopc(inst).dump(); - return; - case InstructionClass::Sop1: - Sop1(inst).dump(); - return; - case InstructionClass::Sopc: - Sopc(inst).dump(); - return; - case InstructionClass::Sopp: - Sopp(inst).dump(); - return; - } - - printf(""); -} - -const char * -amdgpu::shader::instructionClassToString(InstructionClass instrClass) { - switch (instrClass) { - case InstructionClass::Invalid: - return "INVALID"; - case InstructionClass::Vop2: - return "VOP2"; - case InstructionClass::Sop2: - return "SOP2"; - case InstructionClass::Sopk: - return "SOPK"; - case InstructionClass::Smrd: - return "SMRD"; - case InstructionClass::Vop3: - return "VOP3"; - case InstructionClass::Mubuf: - return "MUBUF"; - case InstructionClass::Mtbuf: - return "MTBUF"; - case InstructionClass::Mimg: - return "MIMG"; - case InstructionClass::Ds: - return "DS"; - case InstructionClass::Vintrp: - return "VINTRP"; - case InstructionClass::Exp: - return "EXP"; - case InstructionClass::Vop1: - return "VOP1"; - case InstructionClass::Vopc: - return "VOPC"; - case InstructionClass::Sop1: - return "SOP1"; - case InstructionClass::Sopc: - return "SOPC"; - case InstructionClass::Sopp: - return "SOPP"; - } - - __builtin_trap(); -} diff --git a/hw/amdgpu/shader/src/RegisterState.cpp b/hw/amdgpu/shader/src/RegisterState.cpp deleted file mode 100644 index 9ad39f1..0000000 --- a/hw/amdgpu/shader/src/RegisterState.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include "RegisterState.hpp" -#include "util/unreachable.hpp" - -amdgpu::shader::Value -amdgpu::shader::RegisterState::getRegister(RegisterId regId) { - auto offset = regId.getOffset(); - - if (regId.isScalar()) { - switch (offset) { - case 0 ... 103: - return sgprs[offset]; - case 106: - return vccLo; - case 107: - return vccHi; - case 124: - return m0; - case 126: - return execLo; - case 127: - return execHi; - case 253: - return scc; - case 254: - return ldsDirect; - } - - util::unreachable(); - } - - if (regId.isVector()) { - return vgprs[offset]; - } - - if (regId.isAttr()) { - return attrs[offset]; - } - - util::unreachable(); -} - -void amdgpu::shader::RegisterState::setRegister(RegisterId regId, Value value) { - auto offset = regId.getOffset(); - - if (regId.isScalar()) { - switch (offset) { - case 0 ... 103: - sgprs[offset] = value; - return; - case 106: - vccLo = value; - return; - case 107: - vccHi = value; - return; - case 124: - m0 = value; - return; - case 126: - execLo = value; - return; - case 127: - execHi = value; - return; - case 253: - scc = value; - return; - case 254: - ldsDirect = value; - return; - } - - util::unreachable(); - } - - if (regId.isVector()) { - vgprs[offset] = value; - return; - } - - if (regId.isAttr()) { - attrs[offset] = value; - return; - } - - util::unreachable(); -} diff --git a/hw/amdgpu/shader/src/TypeId.cpp b/hw/amdgpu/shader/src/TypeId.cpp deleted file mode 100644 index 1a4a6c6..0000000 --- a/hw/amdgpu/shader/src/TypeId.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include "TypeId.hpp" -#include "util/unreachable.hpp" - -amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const { - switch (raw) { - case TypeId::Void: - case TypeId::Bool: - case TypeId::SInt8: - case TypeId::UInt8: - case TypeId::SInt16: - case TypeId::UInt16: - case TypeId::SInt32: - case TypeId::UInt32: - case TypeId::SInt64: - case TypeId::UInt64: - case TypeId::Float16: - case TypeId::Float32: - case TypeId::Float64: - case TypeId::Sampler: - case TypeId::Image2D: - case TypeId::StorageImage2D: - case TypeId::SampledImage2D: - return raw; - - case TypeId::UInt32x2: - case TypeId::UInt32x3: - case TypeId::UInt32x4: - case TypeId::ArrayUInt32x8: - case TypeId::ArrayUInt32x16: - return TypeId::UInt32; - - case TypeId::Float32x2: - case TypeId::Float32x3: - case TypeId::Float32x4: - case TypeId::ArrayFloat32x8: - case TypeId::ArrayFloat32x16: - return TypeId::Float32; - } - - util::unreachable(); -} - -std::size_t amdgpu::shader::TypeId::getSize() const { - switch (raw) { - case TypeId::Void: - case TypeId::Sampler: - case TypeId::StorageImage2D: - case TypeId::Image2D: - case TypeId::SampledImage2D: - return 0; - case TypeId::Bool: - return 1; - case TypeId::SInt8: - case TypeId::UInt8: - return 1; - case TypeId::SInt16: - case TypeId::UInt16: - return 2; - case TypeId::SInt32: - case TypeId::UInt32: - return 4; - case TypeId::SInt64: - case TypeId::UInt64: - return 8; - case TypeId::Float16: - return 2; - case TypeId::Float32: - return 4; - case TypeId::Float64: - return 8; - - case TypeId::UInt32x2: - case TypeId::UInt32x3: - case TypeId::UInt32x4: - case TypeId::ArrayUInt32x8: - case TypeId::ArrayUInt32x16: - case TypeId::Float32x2: - case TypeId::Float32x3: - case TypeId::Float32x4: - case TypeId::ArrayFloat32x8: - case TypeId::ArrayFloat32x16: - return getElementsCount() * getBaseType().getSize(); - } - - util::unreachable(); -} - -std::size_t amdgpu::shader::TypeId::getElementsCount() const { - switch (raw) { - case TypeId::Bool: - case TypeId::SInt8: - case TypeId::UInt8: - case TypeId::SInt16: - case TypeId::UInt16: - case TypeId::SInt32: - case TypeId::UInt32: - case TypeId::SInt64: - case TypeId::UInt64: - case TypeId::Float16: - case TypeId::Float32: - case TypeId::Float64: - return 1; - - case TypeId::UInt32x2: - return 2; - case TypeId::UInt32x3: - return 3; - case TypeId::UInt32x4: - return 4; - case TypeId::ArrayUInt32x8: - return 8; - case TypeId::ArrayUInt32x16: - return 16; - case TypeId::Float32x2: - return 2; - case TypeId::Float32x3: - return 3; - case TypeId::Float32x4: - return 4; - case TypeId::ArrayFloat32x8: - return 8; - case TypeId::ArrayFloat32x16: - return 16; - - case TypeId::Void: - case TypeId::Sampler: - case TypeId::Image2D: - case TypeId::StorageImage2D: - case TypeId::SampledImage2D: - return 0; - } - - util::unreachable(); -} diff --git a/hw/amdgpu/shader/src/cf.cpp b/hw/amdgpu/shader/src/cf.cpp deleted file mode 100644 index 2f18a07..0000000 --- a/hw/amdgpu/shader/src/cf.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include "cf.hpp" -#include -#include -#include - -void cf::BasicBlock::split(BasicBlock *target) { - assert(target->address > address); - target->size = size - (target->address - address); - size = target->address - address; - - for (std::size_t i = 0, count = getSuccessorsCount(); i < count; ++i) { - auto succ = getSuccessor(i); - succ->predecessors.erase(this); - succ->predecessors.insert(target); - target->successors[i] = successors[i]; - successors[i] = nullptr; - } - - target->terminator = terminator; - terminator = TerminatorKind::None; - - createBranch(target); -} - -void cf::BasicBlock::createConditionalBranch(BasicBlock *ifTrue, - BasicBlock *ifFalse) { - assert(terminator == TerminatorKind::None); - assert(getSuccessorsCount() == 0); - ifTrue->predecessors.insert(this); - ifFalse->predecessors.insert(this); - - successors[0] = ifTrue; - successors[1] = ifFalse; - - terminator = TerminatorKind::Branch; -} - -void cf::BasicBlock::createBranch(BasicBlock *target) { - assert(terminator == TerminatorKind::None); - assert(getSuccessorsCount() == 0); - - target->predecessors.insert(this); - successors[0] = target; - - terminator = TerminatorKind::Branch; -} - -void cf::BasicBlock::createBranchToUnknown() { - assert(terminator == TerminatorKind::None); - assert(getSuccessorsCount() == 0); - - terminator = TerminatorKind::BranchToUnknown; -} - -void cf::BasicBlock::createReturn() { - assert(terminator == TerminatorKind::None); - assert(getSuccessorsCount() == 0); - - terminator = TerminatorKind::Return; -} - -void cf::BasicBlock::replaceSuccessor(BasicBlock *origBB, BasicBlock *newBB) { - origBB->predecessors.erase(this); - newBB->predecessors.insert(this); - - if (origBB == successors[0]) { - successors[0] = newBB; - return; - } - - if (origBB == successors[1]) { - successors[1] = newBB; - return; - } - - std::abort(); -} - -bool cf::BasicBlock::hasDirectPredecessor(const BasicBlock &block) const { - for (auto pred : predecessors) { - if (pred == &block) { - return true; - } - } - - return false; -} - -bool cf::BasicBlock::hasPredecessor(const BasicBlock &block) const { - if (&block == this) { - return hasDirectPredecessor(block); - } - - std::vector workList; - std::unordered_set visited; - workList.push_back(this); - visited.insert(this); - - while (!workList.empty()) { - auto node = workList.back(); - - if (node == &block) { - return true; - } - - workList.pop_back(); - workList.reserve(workList.size() + predecessors.size()); - - for (auto pred : predecessors) { - if (visited.insert(pred).second) { - workList.push_back(pred); - } - } - } - - return false; -} diff --git a/hw/amdgpu/shader/src/scf.cpp b/hw/amdgpu/shader/src/scf.cpp deleted file mode 100644 index 80bfd53..0000000 --- a/hw/amdgpu/shader/src/scf.cpp +++ /dev/null @@ -1,249 +0,0 @@ -#include "scf.hpp" -#include "cf.hpp" -#include - -void scf::Block::eraseFrom(Node *endBefore) { - mEnd = endBefore->getPrev(); - if (mEnd != nullptr) { - mEnd->mNext = nullptr; - } else { - mBegin = nullptr; - } -} - -void scf::Block::splitInto(Block *target, Node *splitPoint) { - auto targetEnd = std::exchange(mEnd, splitPoint->mPrev); - - if (mEnd != nullptr) { - mEnd->mNext = nullptr; - } else { - mBegin = nullptr; - } - - for (auto node = splitPoint; node != nullptr; node = node->getNext()) { - node->mParent = target; - } - - if (target->mEnd != nullptr) { - target->mEnd->mNext = splitPoint; - } - - splitPoint->mPrev = target->mEnd; - target->mEnd = targetEnd; - - if (target->mBegin == nullptr) { - target->mBegin = splitPoint; - } -} - -scf::Block *scf::Block::split(Context &context, Node *splitPoint) { - auto result = context.create(); - splitInto(result, splitPoint); - return result; -} - -static scf::BasicBlock *findJumpTargetIn(scf::Block *parentBlock, - scf::Block *testBlock) { - auto jumpNode = dynCast(testBlock->getLastNode()); - - if (jumpNode == nullptr || jumpNode->target->getParent() != parentBlock) { - return nullptr; - } - - return jumpNode->target; -} - -static bool transformJumpToLoop(scf::Context &ctxt, scf::Block *block) { - // bb0 - // bb1 - // if true { - // bb2 - // jump bb1 - // } else { - // bb3 - // } - // - // --> - // - // bb0 - // loop { - // bb1 - // if false { - // break - // } - // bb2 - // } - // bb3 - - if (block->isEmpty()) { - return false; - } - - auto ifElse = dynCast(block->getLastNode()); - - if (ifElse == nullptr) { - return false; - } - - auto loopTarget = findJumpTargetIn(block, ifElse->ifTrue); - auto loopBlock = ifElse->ifTrue; - auto invariantBlock = ifElse->ifFalse; - - if (loopTarget == nullptr) { - loopTarget = findJumpTargetIn(block, ifElse->ifFalse); - loopBlock = ifElse->ifFalse; - invariantBlock = ifElse->ifTrue; - - if (loopTarget == nullptr) { - return false; - } - } - - auto loopBody = block->split(ctxt, loopTarget); - auto loop = ctxt.create(loopBody); - block->append(loop); - - for (auto node = invariantBlock->getRootNode(); node != nullptr;) { - auto nextNode = node->getNext(); - invariantBlock->detachNode(node); - block->append(node); - node = nextNode; - } - - loopBlock->detachNode(loopBlock->getLastNode()); - - for (auto node = loopBlock->getRootNode(); node != nullptr;) { - auto nextNode = node->getNext(); - loopBlock->detachNode(node); - loopBody->append(node); - node = nextNode; - } - - invariantBlock->append(ctxt.create()); - - return true; -} - -static bool moveSameLastBlocksTo(scf::IfElse *ifElse, scf::Block *block) { - if (ifElse->ifTrue->isEmpty() || ifElse->ifFalse->isEmpty()) { - return false; - } - - auto ifTrueIt = ifElse->ifTrue->getLastNode(); - auto ifFalseIt = ifElse->ifFalse->getLastNode(); - - while (ifTrueIt != nullptr && ifFalseIt != nullptr) { - if (!ifTrueIt->isEqual(*ifFalseIt)) { - break; - } - - ifTrueIt = ifTrueIt->getPrev(); - ifFalseIt = ifFalseIt->getPrev(); - } - - if (ifTrueIt == ifElse->ifTrue->getLastNode()) { - return false; - } - - if (ifTrueIt == nullptr) { - ifTrueIt = ifElse->ifTrue->getRootNode(); - } else { - ifTrueIt = ifTrueIt->getNext(); - } - - if (ifFalseIt == nullptr) { - ifFalseIt = ifElse->ifFalse->getRootNode(); - } else { - ifFalseIt = ifFalseIt->getNext(); - } - - ifElse->ifTrue->splitInto(block, ifTrueIt); - ifElse->ifFalse->eraseFrom(ifFalseIt); - return true; -} - -class Structurizer { - scf::Context &context; - -public: - Structurizer(scf::Context &context) : context(context) {} - - scf::Block *structurize(cf::BasicBlock *bb) { - return structurizeBlock(bb, {}); - } - -public: - scf::IfElse *structurizeIfElse( - cf::BasicBlock *ifTrue, cf::BasicBlock *ifFalse, - std::unordered_map &visited) { - auto ifTrueBlock = structurizeBlock(ifTrue, visited); - auto ifFalseBlock = structurizeBlock(ifFalse, visited); - - return context.create(ifTrueBlock, ifFalseBlock); - } - - scf::Block *structurizeBlock( - cf::BasicBlock *bb, - std::unordered_map visited) { - auto result = context.create(); - std::vector workList; - workList.push_back(bb); - - while (!workList.empty()) { - auto block = workList.back(); - workList.pop_back(); - - auto [it, inserted] = visited.try_emplace(block, nullptr); - if (!inserted) { - result->append(context.create(it->second)); - continue; - } - - auto scfBlock = context.create(block->getAddress(), - block->getSize()); - it->second = scfBlock; - result->append(scfBlock); - - switch (block->getTerminator()) { - case cf::TerminatorKind::None: - std::abort(); - break; - - case cf::TerminatorKind::Branch: - switch (block->getSuccessorsCount()) { - case 1: - workList.push_back(block->getSuccessor(0)); - break; - - case 2: { - auto ifElse = structurizeIfElse(block->getSuccessor(0), - block->getSuccessor(1), visited); - result->append(ifElse); - - while (moveSameLastBlocksTo(ifElse, result) || - transformJumpToLoop(context, result)) { - ; - } - - break; - } - } - break; - - case cf::TerminatorKind::BranchToUnknown: - result->append(context.create()); - break; - - case cf::TerminatorKind::Return: - result->append(context.create()); - break; - } - } - - return result; - } -}; - -scf::Block *scf::structurize(Context &ctxt, cf::BasicBlock *bb) { - return Structurizer{ctxt}.structurize(bb); -} diff --git a/rpcsx-gpu-legacy/CMakeLists.txt b/rpcsx-gpu-legacy/CMakeLists.txt deleted file mode 100644 index 8abce0f..0000000 --- a/rpcsx-gpu-legacy/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -find_package(Vulkan 1.3 REQUIRED) -find_package(glfw3 3.3 REQUIRED) - -add_executable(rpcsx-gpu-legacy - main.cpp -) - -target_include_directories(rpcsx-gpu-legacy PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(rpcsx-gpu-legacy PUBLIC amdgpu::bridge amdgpu::device glfw Vulkan::Vulkan rx) -set_target_properties(rpcsx-gpu-legacy PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -target_base_address(rpcsx-gpu-legacy 0x0000060000000000) -install(TARGETS rpcsx-gpu-legacy RUNTIME DESTINATION bin) diff --git a/rpcsx-gpu-legacy/main.cpp b/rpcsx-gpu-legacy/main.cpp deleted file mode 100644 index 87e129d..0000000 --- a/rpcsx-gpu-legacy/main.cpp +++ /dev/null @@ -1,1206 +0,0 @@ -#include "amdgpu/RemoteMemory.hpp" -#include "amdgpu/device/gpu-scheduler.hpp" -#include "amdgpu/device/vk.hpp" -#include "rx/MemoryTable.hpp" -#include "rx/Version.hpp" -#include "rx/mem.hpp" -#include "util/unreachable.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include // TODO: make in optional - -static void usage(std::FILE *out, const char *argv0) { - std::fprintf(out, "usage: %s [options...]\n", argv0); - std::fprintf(out, " options:\n"); - std::fprintf(out, " --version, -v - print version\n"); - std::fprintf(out, - " --cmd-bridge - setup command queue bridge name\n"); - std::fprintf(out, " --shm - setup shared memory name\n"); - std::fprintf( - out, - " --gpu - specify physical gpu index to use, default is 0\n"); - std::fprintf(out, - " --presenter - set flip engine target\n"); - std::fprintf(out, " --validate - enable validation layers\n"); - std::fprintf(out, " -h, --help - show this message\n"); - std::fprintf(out, "\n"); - std::fprintf(out, " presenter mode:\n"); - std::fprintf(out, " window - create and use native window (default)\n"); -} - -enum class PresenterMode { Window }; - -static VKAPI_ATTR VkBool32 VKAPI_CALL -debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, - VkDebugUtilsMessageTypeFlagsEXT messageType, - const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, - void *pUserData) { - - std::fprintf(stderr, "validation layer: %s\n", pCallbackData->pMessage); - - if (messageSeverity >= VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { - std::abort(); - } - return VK_FALSE; -} - -static VkResult _vkCreateDebugUtilsMessengerEXT( - VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDebugUtilsMessengerEXT *pDebugMessenger) { - static auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( - instance, "vkCreateDebugUtilsMessengerEXT"); - if (func != nullptr) { - return func(instance, pCreateInfo, pAllocator, pDebugMessenger); - } else { - return VK_ERROR_EXTENSION_NOT_PRESENT; - } -} - -int main(int argc, const char *argv[]) { - if (argc == 2) { - if (argv[1] == std::string_view("-h") || - argv[1] == std::string_view("--help")) { - usage(stdout, argv[0]); - return 0; - } - - if (argv[1] == std::string_view("-v") || - argv[1] == std::string_view("--version")) { - std::printf("v%s\n", rx::getVersion().toString().c_str()); - return 0; - } - } - - const char *cmdBridgeName = "/rpcsx-gpu-cmds"; - const char *shmName = "/rpcsx-os-memory"; - unsigned long gpuIndex = 0; - auto presenter = PresenterMode::Window; - bool enableValidation = false; - - for (int i = 1; i < argc; ++i) { - if (argv[i] == std::string_view("--cmd-bridge")) { - if (argc <= i + 1) { - usage(stderr, argv[0]); - return 1; - } - - cmdBridgeName = argv[++i]; - continue; - } - - if (argv[i] == std::string_view("--shm")) { - if (argc <= i + 1) { - usage(stderr, argv[0]); - return 1; - } - shmName = argv[++i]; - continue; - } - - if (argv[i] == std::string_view("--presenter")) { - if (argc <= i + 1) { - usage(stderr, argv[0]); - return 1; - } - - auto presenterText = std::string_view(argv[++i]); - - if (presenterText == "window") { - presenter = PresenterMode::Window; - } else { - usage(stderr, argv[0]); - return 1; - } - continue; - } - - if (argv[i] == std::string_view("--gpu")) { - if (argc <= i + 1) { - usage(stderr, argv[0]); - return 1; - } - - char *endPtr = nullptr; - gpuIndex = std::strtoul(argv[++i], &endPtr, 10); - if (endPtr == nullptr || *endPtr != '\0') { - usage(stderr, argv[0]); - return 1; - } - - continue; - } - - if (argv[i] == std::string_view("--validate")) { - enableValidation = true; - continue; - } - - usage(stderr, argv[0]); - return 1; - } - - if (!rx::mem::reserve((void *)0x40000, 0x60000000000 - 0x40000)) { - std::fprintf(stderr, "failed to reserve virtual memory\n"); - return 1; - } - - glfwInit(); - glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); - auto window = glfwCreateWindow(1280, 720, "RPCSX", nullptr, nullptr); - - const char **glfwExtensions; - uint32_t glfwExtensionCount = 0; - - glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); - - auto requiredInstanceExtensions = std::vector( - glfwExtensions, glfwExtensions + glfwExtensionCount); - - if (enableValidation) { - requiredInstanceExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - } - - uint32_t extCount = 0; - vkEnumerateInstanceExtensionProperties(nullptr, &extCount, nullptr); - std::vector supportedInstanceExtensions; - - if (extCount > 0) { - std::vector extensions(extCount); - if (vkEnumerateInstanceExtensionProperties( - nullptr, &extCount, &extensions.front()) == VK_SUCCESS) { - supportedInstanceExtensions.reserve(extensions.size()); - for (VkExtensionProperties extension : extensions) { - supportedInstanceExtensions.push_back(extension.extensionName); - } - } - } - - for (const char *extension : requiredInstanceExtensions) { - if (std::find(supportedInstanceExtensions.begin(), - supportedInstanceExtensions.end(), - extension) == supportedInstanceExtensions.end()) { - util::unreachable("Requested instance extension '%s' is not present at " - "instance level", - extension); - } - } - - const char *validationLayerName = "VK_LAYER_KHRONOS_validation"; - - VkApplicationInfo appInfo = { - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pApplicationName = "RPCSX", - .pEngineName = "none", - .apiVersion = VK_API_VERSION_1_3, - }; - - VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo{}; - debugCreateInfo.sType = - VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - debugCreateInfo.messageSeverity = - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; - debugCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - 0 - // VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT - ; - debugCreateInfo.pfnUserCallback = debugCallback; - - VkInstanceCreateInfo instanceCreateInfo = {}; - instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - if (enableValidation) { - instanceCreateInfo.pNext = &debugCreateInfo; - } - instanceCreateInfo.pApplicationInfo = &appInfo; - instanceCreateInfo.enabledExtensionCount = requiredInstanceExtensions.size(); - instanceCreateInfo.ppEnabledExtensionNames = - requiredInstanceExtensions.data(); - - std::vector enabledLayers; - // enabledLayers.push_back("VK_LAYER_KHRONOS_shader_object"); - - if (enableValidation) { - enabledLayers.push_back(validationLayerName); - } - - instanceCreateInfo.ppEnabledLayerNames = enabledLayers.data(); - instanceCreateInfo.enabledLayerCount = enabledLayers.size(); - - VkInstance vkInstance; - Verify() << vkCreateInstance(&instanceCreateInfo, nullptr, &vkInstance); - auto getVkPhyDevice = [&](unsigned index) { - std::vector devices(index + 1); - uint32_t count = devices.size(); - Verify() << vkEnumeratePhysicalDevices(vkInstance, &count, devices.data()); - Verify() << (index < count); - return devices[index]; - }; - - if (enableValidation) { - VkDebugUtilsMessengerEXT debugMessenger; - _vkCreateDebugUtilsMessengerEXT(vkInstance, &debugCreateInfo, nullptr, - &debugMessenger); - } - - auto vkPhysicalDevice = getVkPhyDevice(gpuIndex); - - VkPhysicalDeviceProperties vkPhyDeviceProperties; - vkGetPhysicalDeviceProperties(vkPhysicalDevice, &vkPhyDeviceProperties); - std::printf("VK: Selected physical device is %s\n", - vkPhyDeviceProperties.deviceName); - VkPhysicalDeviceMemoryProperties vkPhyDeviceMemoryProperties; - vkGetPhysicalDeviceMemoryProperties(vkPhysicalDevice, - &vkPhyDeviceMemoryProperties); - - VkPhysicalDevice8BitStorageFeatures storage_8bit = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES}; - VkPhysicalDevice16BitStorageFeatures storage_16bit = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, - .pNext = &storage_8bit}; - VkPhysicalDeviceShaderFloat16Int8Features float16_int8 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, - .pNext = &storage_16bit}; - - VkPhysicalDeviceFeatures2 features2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - .pNext = &float16_int8}; - vkGetPhysicalDeviceFeatures2(vkPhysicalDevice, &features2); - - Verify() << storage_8bit.uniformAndStorageBuffer8BitAccess; - Verify() << storage_16bit.uniformAndStorageBuffer16BitAccess; - Verify() << float16_int8.shaderFloat16; - Verify() << float16_int8.shaderInt8; - - std::vector vkSupportedDeviceExtensions; - { - uint32_t extCount = 0; - vkEnumerateDeviceExtensionProperties(vkPhysicalDevice, nullptr, &extCount, - nullptr); - if (extCount > 0) { - std::vector extensions(extCount); - if (vkEnumerateDeviceExtensionProperties(vkPhysicalDevice, nullptr, - &extCount, extensions.data()) == - VK_SUCCESS) { - - vkSupportedDeviceExtensions.reserve(extCount); - - for (auto ext : extensions) { - vkSupportedDeviceExtensions.push_back(ext.extensionName); - } - } - } - } - - auto isDeviceExtensionSupported = [&](std::string_view extension) { - return std::find(vkSupportedDeviceExtensions.begin(), - vkSupportedDeviceExtensions.end(), - extension) != vkSupportedDeviceExtensions.end(); - }; - - std::vector requestedDeviceExtensions = { - VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, - VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME, - VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME, - // VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, - // VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, - VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME, - VK_KHR_SWAPCHAIN_EXTENSION_NAME, - VK_EXT_SHADER_OBJECT_EXTENSION_NAME, - }; - - if (isDeviceExtensionSupported(VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) { - requestedDeviceExtensions.push_back(VK_EXT_DEBUG_MARKER_EXTENSION_NAME); - } - - // for (auto extension : vkSupportedDeviceExtensions) { - // std::printf("supported device extension %s\n", extension.c_str()); - // } - - for (const char *requestedExtension : requestedDeviceExtensions) { - if (!isDeviceExtensionSupported(requestedExtension)) { - std::fprintf( - stderr, - "Requested device extension '%s' is not present at device level\n", - requestedExtension); - std::abort(); - } - } - - std::vector queueFamilyProperties; - - { - uint32_t queueFamilyCount; - vkGetPhysicalDeviceQueueFamilyProperties(vkPhysicalDevice, - &queueFamilyCount, nullptr); - Verify() << (queueFamilyCount > 0); - queueFamilyProperties.resize(queueFamilyCount); - for (auto &property : queueFamilyProperties) { - property.sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2; - } - - vkGetPhysicalDeviceQueueFamilyProperties2( - vkPhysicalDevice, &queueFamilyCount, queueFamilyProperties.data()); - } - - VkSurfaceKHR vkSurface; - Verify() << glfwCreateWindowSurface(vkInstance, window, nullptr, &vkSurface); - - std::set queueFamiliesWithPresentSupport; - std::set queueFamiliesWithTransferSupport; - std::set queueFamiliesWithComputeSupport; - std::set queueFamiliesWithGraphicsSupport; - - uint32_t queueFamiliesCount = 0; - for (auto &familyProperty : queueFamilyProperties) { - VkBool32 supportsPresent; - if (vkGetPhysicalDeviceSurfaceSupportKHR(vkPhysicalDevice, - queueFamiliesCount, vkSurface, - &supportsPresent) == VK_SUCCESS && - supportsPresent != 0) { - queueFamiliesWithPresentSupport.insert(queueFamiliesCount); - } - - if (familyProperty.queueFamilyProperties.queueFlags & - VK_QUEUE_SPARSE_BINDING_BIT) { - if (familyProperty.queueFamilyProperties.queueFlags & - VK_QUEUE_GRAPHICS_BIT) { - queueFamiliesWithGraphicsSupport.insert(queueFamiliesCount); - } - - if (familyProperty.queueFamilyProperties.queueFlags & - VK_QUEUE_COMPUTE_BIT) { - queueFamiliesWithComputeSupport.insert(queueFamiliesCount); - } - } - - if (familyProperty.queueFamilyProperties.queueFlags & - VK_QUEUE_TRANSFER_BIT) { - queueFamiliesWithTransferSupport.insert(queueFamiliesCount); - } - - queueFamiliesCount++; - } - - Verify() << !queueFamiliesWithPresentSupport.empty(); - Verify() << !queueFamiliesWithTransferSupport.empty(); - Verify() << !queueFamiliesWithComputeSupport.empty(); - Verify() << !queueFamiliesWithGraphicsSupport.empty(); - - std::vector requestedQueues; - - std::vector defaultQueuePriorities; - defaultQueuePriorities.resize(32); - - for (uint32_t queueFamily = 0; queueFamily < queueFamiliesCount; - ++queueFamily) { - if (queueFamiliesWithGraphicsSupport.contains(queueFamily)) { - requestedQueues.push_back( - {.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueFamilyIndex = queueFamily, - .queueCount = - std::min(queueFamilyProperties[queueFamily] - .queueFamilyProperties.queueCount, - defaultQueuePriorities.size()), - .pQueuePriorities = defaultQueuePriorities.data()}); - } else if (queueFamiliesWithComputeSupport.contains(queueFamily) || - queueFamiliesWithTransferSupport.contains(queueFamily)) { - requestedQueues.push_back( - {.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueFamilyIndex = queueFamily, - .queueCount = - std::min(queueFamilyProperties[queueFamily] - .queueFamilyProperties.queueCount, - defaultQueuePriorities.size()), - .pQueuePriorities = defaultQueuePriorities.data()}); - } - } - - VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT, - .shaderObject = VK_TRUE}; - - VkPhysicalDeviceVulkan13Features phyDevFeatures13{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, - .pNext = &shaderObjectFeatures, - .synchronization2 = VK_TRUE, - .dynamicRendering = VK_TRUE, - .maintenance4 = VK_TRUE, - }; - - VkPhysicalDeviceVulkan12Features phyDevFeatures12{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, - .pNext = &phyDevFeatures13, - .storageBuffer8BitAccess = VK_TRUE, - .uniformAndStorageBuffer8BitAccess = VK_TRUE, - .shaderFloat16 = VK_TRUE, - .shaderInt8 = VK_TRUE, - .timelineSemaphore = VK_TRUE, - }; - - VkPhysicalDeviceVulkan11Features phyDevFeatures11{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, - .pNext = &phyDevFeatures12, - .storageBuffer16BitAccess = VK_TRUE, - .uniformAndStorageBuffer16BitAccess = VK_TRUE, - }; - - VkDeviceCreateInfo deviceCreateInfo{ - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pNext = &phyDevFeatures11, - .queueCreateInfoCount = static_cast(requestedQueues.size()), - .pQueueCreateInfos = requestedQueues.data(), - .enabledExtensionCount = - static_cast(requestedDeviceExtensions.size()), - .ppEnabledExtensionNames = requestedDeviceExtensions.data(), - .pEnabledFeatures = &features2.features}; - - VkDevice vkDevice; - Verify() << vkCreateDevice(vkPhysicalDevice, &deviceCreateInfo, nullptr, - &vkDevice); - VkSwapchainKHR swapchain = VK_NULL_HANDLE; - VkExtent2D swapchainExtent{}; - - std::vector swapchainImages; - - VkFormat swapchainColorFormat = VK_FORMAT_B8G8R8A8_UNORM; - VkColorSpaceKHR swapchainColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; - - uint32_t formatCount; - Verify() << vkGetPhysicalDeviceSurfaceFormatsKHR(vkPhysicalDevice, vkSurface, - &formatCount, nullptr); - Verify() << (formatCount > 0); - - std::vector surfaceFormats(formatCount); - Verify() << vkGetPhysicalDeviceSurfaceFormatsKHR( - vkPhysicalDevice, vkSurface, &formatCount, surfaceFormats.data()); - - if ((formatCount == 1) && (surfaceFormats[0].format == VK_FORMAT_UNDEFINED)) { - swapchainColorFormat = VK_FORMAT_B8G8R8A8_UNORM; - swapchainColorSpace = surfaceFormats[0].colorSpace; - } else { - bool found_B8G8R8A8_UNORM = false; - for (auto &&surfaceFormat : surfaceFormats) { - if (surfaceFormat.format == VK_FORMAT_B8G8R8A8_UNORM) { - swapchainColorFormat = surfaceFormat.format; - swapchainColorSpace = surfaceFormat.colorSpace; - found_B8G8R8A8_UNORM = true; - break; - } - } - - if (!found_B8G8R8A8_UNORM) { - swapchainColorFormat = surfaceFormats[0].format; - swapchainColorSpace = surfaceFormats[0].colorSpace; - } - } - - auto createSwapchain = [&] { - VkSwapchainKHR oldSwapchain = swapchain; - - VkSurfaceCapabilitiesKHR surfCaps; - Verify() << vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vkPhysicalDevice, - vkSurface, &surfCaps); - uint32_t presentModeCount; - Verify() << vkGetPhysicalDeviceSurfacePresentModesKHR( - vkPhysicalDevice, vkSurface, &presentModeCount, NULL); - Verify() << (presentModeCount > 0); - - std::vector presentModes(presentModeCount); - Verify() << vkGetPhysicalDeviceSurfacePresentModesKHR( - vkPhysicalDevice, vkSurface, &presentModeCount, presentModes.data()); - - if (surfCaps.currentExtent.width != (uint32_t)-1) { - swapchainExtent = surfCaps.currentExtent; - } - - VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR; - for (std::size_t i = 0; i < presentModeCount; i++) { - if (presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR) { - swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; - continue; - } - - if (presentModes[i] == VK_PRESENT_MODE_MAILBOX_KHR) { - swapchainPresentMode = VK_PRESENT_MODE_MAILBOX_KHR; - break; - } - } - - uint32_t desiredNumberOfSwapchainImages = surfCaps.minImageCount; - if ((surfCaps.maxImageCount > 0) && - (desiredNumberOfSwapchainImages > surfCaps.maxImageCount)) { - desiredNumberOfSwapchainImages = surfCaps.maxImageCount; - } - - VkSurfaceTransformFlagsKHR preTransform; - if (surfCaps.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR) { - preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - } else { - preTransform = surfCaps.currentTransform; - } - - VkCompositeAlphaFlagBitsKHR compositeAlpha = - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - std::vector compositeAlphaFlags = { - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, - VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, - VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, - VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, - }; - - for (auto &compositeAlphaFlag : compositeAlphaFlags) { - if (surfCaps.supportedCompositeAlpha & compositeAlphaFlag) { - compositeAlpha = compositeAlphaFlag; - break; - } - } - - VkSwapchainCreateInfoKHR swapchainCI = {}; - swapchainCI.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - swapchainCI.surface = vkSurface; - swapchainCI.minImageCount = desiredNumberOfSwapchainImages; - swapchainCI.imageFormat = swapchainColorFormat; - swapchainCI.imageColorSpace = swapchainColorSpace; - swapchainCI.imageExtent = {swapchainExtent.width, swapchainExtent.height}; - swapchainCI.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - swapchainCI.preTransform = (VkSurfaceTransformFlagBitsKHR)preTransform; - swapchainCI.imageArrayLayers = 1; - swapchainCI.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - swapchainCI.queueFamilyIndexCount = 0; - swapchainCI.presentMode = swapchainPresentMode; - swapchainCI.oldSwapchain = oldSwapchain; - swapchainCI.clipped = VK_TRUE; - swapchainCI.compositeAlpha = compositeAlpha; - - if (surfCaps.supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { - swapchainCI.imageUsage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; - } - - if (surfCaps.supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - swapchainCI.imageUsage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - } - - Verify() << vkCreateSwapchainKHR(vkDevice, &swapchainCI, nullptr, - &swapchain); - - if (oldSwapchain != VK_NULL_HANDLE) { - vkDestroySwapchainKHR(vkDevice, oldSwapchain, nullptr); - } - - uint32_t swapchainImageCount = 0; - Verify() << vkGetSwapchainImagesKHR(vkDevice, swapchain, - &swapchainImageCount, nullptr); - - swapchainImages.resize(swapchainImageCount); - Verify() << vkGetSwapchainImagesKHR( - vkDevice, swapchain, &swapchainImageCount, swapchainImages.data()); - }; - - createSwapchain(); - - std::vector> computeQueues; - std::vector> graphicsQueues; - VkQueue presentQueue = VK_NULL_HANDLE; - unsigned presentQueueFamily; - - for (auto &queueInfo : requestedQueues) { - if (queueFamiliesWithGraphicsSupport.contains(queueInfo.queueFamilyIndex)) { - for (uint32_t queueIndex = 0; queueIndex < queueInfo.queueCount; - ++queueIndex) { - - if (presentQueue == VK_NULL_HANDLE && - queueFamiliesWithPresentSupport.contains( - queueInfo.queueFamilyIndex)) { - presentQueueFamily = queueInfo.queueFamilyIndex; - vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, 0, - &presentQueue); - - continue; - } - - auto &[queue, index] = graphicsQueues.emplace_back(); - index = queueInfo.queueFamilyIndex; - vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex, - &queue); - } - - continue; - } - - if (queueFamiliesWithComputeSupport.contains(queueInfo.queueFamilyIndex)) { - if (!queueFamiliesWithTransferSupport.contains( - queueInfo.queueFamilyIndex)) { - util::unreachable(); - } - - uint32_t queueIndex = 0; - for (; queueIndex < queueInfo.queueCount; ++queueIndex) { - auto &[queue, index] = computeQueues.emplace_back(); - index = queueInfo.queueFamilyIndex; - vkGetDeviceQueue(vkDevice, queueInfo.queueFamilyIndex, queueIndex, - &queue); - } - - continue; - } - } - - if (graphicsQueues.empty() && presentQueue != VK_NULL_HANDLE) { - graphicsQueues.push_back({presentQueue, presentQueueFamily}); - } - - Verify() << (computeQueues.size() > 1); - Verify() << (graphicsQueues.size() > 0); - Verify() << (presentQueue != VK_NULL_HANDLE); - - amdgpu::device::vk::g_computeQueues = computeQueues; - amdgpu::device::vk::g_graphicsQueues = graphicsQueues; - - VkCommandPoolCreateInfo commandPoolCreateInfo = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = presentQueueFamily, - }; - - VkCommandPool commandPool; - Verify() << vkCreateCommandPool(vkDevice, &commandPoolCreateInfo, nullptr, - &commandPool); - std::vector inFlightFences(swapchainImages.size()); - - for (auto &fence : inFlightFences) { - VkFenceCreateInfo fenceInfo{}; - fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; - - Verify() << vkCreateFence(vkDevice, &fenceInfo, nullptr, &fence); - } - - VkSemaphore presentCompleteSemaphore; - VkSemaphore renderCompleteSemaphore; - { - VkSemaphoreCreateInfo semaphoreCreateInfo{}; - semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - - Verify() << vkCreateSemaphore(vkDevice, &semaphoreCreateInfo, nullptr, - &presentCompleteSemaphore); - Verify() << vkCreateSemaphore(vkDevice, &semaphoreCreateInfo, nullptr, - &renderCompleteSemaphore); - } - - amdgpu::device::setVkDevice(vkDevice, vkPhyDeviceMemoryProperties, - vkPhyDeviceProperties); - - auto bridge = amdgpu::bridge::openShmCommandBuffer(cmdBridgeName); - if (bridge == nullptr) { - bridge = amdgpu::bridge::createShmCommandBuffer(cmdBridgeName); - } - - if (bridge->pullerPid > 0 && ::kill(bridge->pullerPid, 0) == 0) { - // another instance of rpcsx-gpu on the same bridge, kill self after that - - std::fprintf(stderr, "Another instance already exists\n"); - return 1; - } - - bridge->pullerPid = ::getpid(); - - amdgpu::bridge::BridgePuller bridgePuller{bridge}; - amdgpu::bridge::Command commandsBuffer[1]; - - int dmemFd[3]; - - for (std::size_t i = 0; i < std::size(dmemFd); ++i) { - auto path = "/dev/shm/rpcsx-dmem-" + std::to_string(i); - if (!std::filesystem::exists(path)) { - std::printf("Waiting for dmem %zu\n", i); - while (!std::filesystem::exists(path)) { - std::this_thread::sleep_for(std::chrono::milliseconds(300)); - } - } - - dmemFd[i] = ::shm_open(("/rpcsx-dmem-" + std::to_string(i)).c_str(), O_RDWR, - S_IRUSR | S_IWUSR); - - if (dmemFd[i] < 0) { - std::printf("failed to open dmem shared memory %zu\n", i); - return 1; - } - } - - { - amdgpu::device::AmdgpuDevice device(bridgePuller.header); - - struct VmMapSlot { - int memoryType; - int prot; - std::int64_t offset; - std::uint64_t baseAddress; - - auto operator<=>(const VmMapSlot &) const = default; - }; - - struct ProcessInfo { - int vmId = -1; - int vmFd = -1; - amdgpu::bridge::CmdBufferAttribute bufferAttributes[10]; - amdgpu::bridge::CmdBuffer buffers[10]; - rx::MemoryTableWithPayload vmTable; - }; - - auto mapProcess = [&](std::int64_t pid, int vmId, ProcessInfo &process) { - process.vmId = vmId; - - auto memory = amdgpu::RemoteMemory{vmId}; - - std::string pidVmName = shmName; - pidVmName += '-'; - pidVmName += std::to_string(pid); - int memoryFd = ::shm_open(pidVmName.c_str(), O_RDWR, S_IRUSR | S_IWUSR); - process.vmFd = memoryFd; - - if (memoryFd < 0) { - std::printf("failed to process %x shared memory\n", (int)pid); - std::abort(); - } - - for (auto [startAddress, endAddress, slot] : process.vmTable) { - auto gpuProt = slot.prot >> 4; - if (gpuProt == 0) { - continue; - } - - auto devOffset = slot.offset + startAddress - slot.baseAddress; - int mapFd = memoryFd; - - if (slot.memoryType >= 0) { - mapFd = dmemFd[slot.memoryType]; - } - - auto mmapResult = - ::mmap(memory.getPointer(startAddress), endAddress - startAddress, - gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset); - - if (mmapResult == MAP_FAILED) { - std::printf( - "failed to map process %x memory, address %lx-%lx, type %x\n", - (int)pid, startAddress, endAddress, slot.memoryType); - std::abort(); - } - - device.handleProtectMemory(memory, startAddress, - endAddress - startAddress, slot.prot); - } - }; - - auto unmapProcess = [&](ProcessInfo &process) { - auto startAddress = static_cast(process.vmId) << 40; - auto size = static_cast(1) << 40; - rx::mem::reserve(reinterpret_cast(startAddress), size); - - ::close(process.vmFd); - process.vmFd = -1; - process.vmId = -1; - }; - - std::unordered_map processInfo; - - std::vector presentCmdBuffers(swapchainImages.size()); - - { - VkCommandBufferAllocateInfo allocInfo{}; - allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - allocInfo.commandPool = commandPool; - allocInfo.commandBufferCount = presentCmdBuffers.size(); - vkAllocateCommandBuffers(vkDevice, &allocInfo, presentCmdBuffers.data()); - } - - std::vector> flipTaskChain( - swapchainImages.size()); - - for (auto &chain : flipTaskChain) { - chain = amdgpu::device::TaskChain::Create(); - } - std::printf("Initialization complete\n"); - - uint32_t imageIndex = 0; - bool isImageAcquired = false; - uint32_t gpIndex = -1; - GLFWgamepadstate gpState; - - while (!glfwWindowShouldClose(window)) { - glfwPollEvents(); - - std::size_t pulledCount = - bridgePuller.pullCommands(commandsBuffer, std::size(commandsBuffer)); - - if (gpIndex > GLFW_JOYSTICK_LAST) { - for (int i = 0; i <= GLFW_JOYSTICK_LAST; ++i) { - if (glfwJoystickIsGamepad(i) == GLFW_TRUE) { - std::printf("Gamepad \"%s\" activated", glfwGetGamepadName(i)); - gpIndex = i; - break; - } - } - } else if (gpIndex <= GLFW_JOYSTICK_LAST) { - if (!glfwJoystickIsGamepad(gpIndex)) { - gpIndex = -1; - } - } - - if (gpIndex <= GLFW_JOYSTICK_LAST) { - if (glfwGetGamepadState(gpIndex, &gpState) == GLFW_TRUE) { - bridge->kbPadState.leftStickX = - gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_X] * 127.5f + 127.5f; - bridge->kbPadState.leftStickY = - gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_Y] * 127.5f + 127.5f; - bridge->kbPadState.rightStickX = - gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_X] * 127.5f + 127.5f; - bridge->kbPadState.rightStickY = - gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_Y] * 127.5f + 127.5f; - bridge->kbPadState.l2 = - (gpState.axes[GLFW_GAMEPAD_AXIS_LEFT_TRIGGER] + 1.0f) * 127.5f; - bridge->kbPadState.r2 = - (gpState.axes[GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER] + 1.0f) * 127.5f; - bridge->kbPadState.buttons = 0; - - if (bridge->kbPadState.l2 == 0xFF) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL2; - } - - if (bridge->kbPadState.r2 == 0xFF) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR2; - } - - static const uint32_t gpmap[GLFW_GAMEPAD_BUTTON_LAST + 1] = { - [GLFW_GAMEPAD_BUTTON_A] = amdgpu::bridge::kPadBtnCross, - [GLFW_GAMEPAD_BUTTON_B] = amdgpu::bridge::kPadBtnCircle, - [GLFW_GAMEPAD_BUTTON_X] = amdgpu::bridge::kPadBtnSquare, - [GLFW_GAMEPAD_BUTTON_Y] = amdgpu::bridge::kPadBtnTriangle, - [GLFW_GAMEPAD_BUTTON_LEFT_BUMPER] = amdgpu::bridge::kPadBtnL1, - [GLFW_GAMEPAD_BUTTON_RIGHT_BUMPER] = amdgpu::bridge::kPadBtnR1, - [GLFW_GAMEPAD_BUTTON_BACK] = 0, - [GLFW_GAMEPAD_BUTTON_START] = amdgpu::bridge::kPadBtnOptions, - [GLFW_GAMEPAD_BUTTON_GUIDE] = 0, - [GLFW_GAMEPAD_BUTTON_LEFT_THUMB] = amdgpu::bridge::kPadBtnL3, - [GLFW_GAMEPAD_BUTTON_RIGHT_THUMB] = amdgpu::bridge::kPadBtnR3, - [GLFW_GAMEPAD_BUTTON_DPAD_UP] = amdgpu::bridge::kPadBtnUp, - [GLFW_GAMEPAD_BUTTON_DPAD_RIGHT] = amdgpu::bridge::kPadBtnRight, - [GLFW_GAMEPAD_BUTTON_DPAD_DOWN] = amdgpu::bridge::kPadBtnDown, - [GLFW_GAMEPAD_BUTTON_DPAD_LEFT] = amdgpu::bridge::kPadBtnLeft}; - - for (int i = 0; i <= GLFW_GAMEPAD_BUTTON_LAST; ++i) { - if (gpState.buttons[i] == GLFW_PRESS) { - bridge->kbPadState.buttons |= gpmap[i]; - } - } - } - } else { - bridge->kbPadState.leftStickX = 0x80; - bridge->kbPadState.leftStickY = 0x80; - bridge->kbPadState.rightStickX = 0x80; - bridge->kbPadState.rightStickY = 0x80; - bridge->kbPadState.buttons = 0; - - if (glfwGetKey(window, GLFW_KEY_A) == GLFW_PRESS) { - bridge->kbPadState.leftStickX = 0; - } else if (glfwGetKey(window, GLFW_KEY_D) == GLFW_PRESS) { - bridge->kbPadState.leftStickX = 0xff; - } - if (glfwGetKey(window, GLFW_KEY_W) == GLFW_PRESS) { - bridge->kbPadState.leftStickY = 0; - } else if (glfwGetKey(window, GLFW_KEY_S) == GLFW_PRESS) { - bridge->kbPadState.leftStickY = 0xff; - } - - if (glfwGetKey(window, GLFW_KEY_O) == GLFW_PRESS) { - bridge->kbPadState.rightStickX = 0; - } else if (glfwGetKey(window, GLFW_KEY_L) == GLFW_PRESS) { - bridge->kbPadState.rightStickX = 0xff; - } - if (glfwGetKey(window, GLFW_KEY_K) == GLFW_PRESS) { - bridge->kbPadState.rightStickY = 0; - } else if (glfwGetKey(window, GLFW_KEY_SEMICOLON) == GLFW_PRESS) { - bridge->kbPadState.rightStickY = 0xff; - } - - if (glfwGetKey(window, GLFW_KEY_UP) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnUp; - } - if (glfwGetKey(window, GLFW_KEY_DOWN) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnDown; - } - if (glfwGetKey(window, GLFW_KEY_LEFT) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnLeft; - } - if (glfwGetKey(window, GLFW_KEY_RIGHT) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnRight; - } - if (glfwGetKey(window, GLFW_KEY_Z) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnSquare; - } - if (glfwGetKey(window, GLFW_KEY_X) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnCross; - } - if (glfwGetKey(window, GLFW_KEY_C) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnCircle; - } - if (glfwGetKey(window, GLFW_KEY_V) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnTriangle; - } - - if (glfwGetKey(window, GLFW_KEY_Q) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL1; - } - if (glfwGetKey(window, GLFW_KEY_E) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL2; - bridge->kbPadState.l2 = 0xff; - } - if (glfwGetKey(window, GLFW_KEY_F) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnL3; - } - if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnPs; - } - if (glfwGetKey(window, GLFW_KEY_I) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR1; - } - if (glfwGetKey(window, GLFW_KEY_P) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR2; - bridge->kbPadState.r2 = 0xff; - } - if (glfwGetKey(window, GLFW_KEY_APOSTROPHE) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnR3; - } - - if (glfwGetKey(window, GLFW_KEY_ENTER) == GLFW_PRESS) { - bridge->kbPadState.buttons |= amdgpu::bridge::kPadBtnOptions; - } - } - - bridge->kbPadState.timestamp = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); - - if (pulledCount == 0) { - // std::this_thread::sleep_for( - // std::chrono::milliseconds(1)); // Just for testing, should be - // removed - continue; - } - - for (auto cmd : std::span(commandsBuffer, pulledCount)) { - switch (cmd.id) { - case amdgpu::bridge::CommandId::ProtectMemory: { - auto &process = processInfo[cmd.memoryProt.pid]; - - auto vmSlotIt = process.vmTable.queryArea(cmd.memoryProt.address); - if (vmSlotIt == process.vmTable.end()) { - std::abort(); - } - - auto vmSlot = (*vmSlotIt).payload; - - process.vmTable.map(cmd.memoryProt.address, - cmd.memoryProt.address + cmd.memoryProt.size, - VmMapSlot{ - .memoryType = vmSlot.memoryType, - .prot = static_cast(cmd.memoryProt.prot), - .offset = vmSlot.offset, - .baseAddress = vmSlot.baseAddress, - }); - - if (process.vmId >= 0) { - auto memory = amdgpu::RemoteMemory{process.vmId}; - rx::mem::protect(memory.getPointer(cmd.memoryProt.address), - cmd.memoryProt.size, cmd.memoryProt.prot >> 4); - device.handleProtectMemory(memory, cmd.memoryProt.address, - cmd.memoryProt.size, - cmd.memoryProt.prot); - } - break; - } - case amdgpu::bridge::CommandId::CommandBuffer: { - auto &process = processInfo[cmd.commandBuffer.pid]; - if (process.vmId >= 0) { - device.handleCommandBuffer( - amdgpu::RemoteMemory{process.vmId}, cmd.commandBuffer.queue, - cmd.commandBuffer.address, cmd.commandBuffer.size); - } - break; - } - case amdgpu::bridge::CommandId::Flip: { - auto &process = processInfo[cmd.flip.pid]; - - if (process.vmId >= 0) { - if (!isImageAcquired) { - Verify() << vkAcquireNextImageKHR(vkDevice, swapchain, UINT64_MAX, - presentCompleteSemaphore, - nullptr, &imageIndex); - - vkWaitForFences(vkDevice, 1, &inFlightFences[imageIndex], VK_TRUE, - UINT64_MAX); - vkResetFences(vkDevice, 1, &inFlightFences[imageIndex]); - } - - isImageAcquired = false; - - vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0); - VkCommandBufferBeginInfo beginInfo{}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - - vkBeginCommandBuffer(presentCmdBuffers[imageIndex], &beginInfo); - - if (device.handleFlip( - amdgpu::RemoteMemory{process.vmId}, presentQueue, - presentCmdBuffers[imageIndex], - *flipTaskChain[imageIndex].get(), cmd.flip.bufferIndex, - cmd.flip.arg, swapchainImages[imageIndex], swapchainExtent, - presentCompleteSemaphore, renderCompleteSemaphore, - inFlightFences[imageIndex], process.buffers, - process.bufferAttributes)) { - VkPresentInfoKHR presentInfo{ - .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &renderCompleteSemaphore, - .swapchainCount = 1, - .pSwapchains = &swapchain, - .pImageIndices = &imageIndex, - }; - if (vkQueuePresentKHR(presentQueue, &presentInfo) != VK_SUCCESS) { - std::printf("swapchain was invalidated\n"); - createSwapchain(); - } - } else { - isImageAcquired = true; - } - } - break; - } - - case amdgpu::bridge::CommandId::MapProcess: { - mapProcess(cmd.mapProcess.pid, cmd.mapProcess.vmId, - processInfo[cmd.mapProcess.pid]); - break; - } - case amdgpu::bridge::CommandId::UnmapProcess: { - unmapProcess(processInfo[cmd.mapProcess.pid]); - break; - } - - case amdgpu::bridge::CommandId::MapMemory: { - auto &process = processInfo[cmd.mapMemory.pid]; - - process.vmTable.map( - cmd.mapMemory.address, cmd.mapMemory.address + cmd.mapMemory.size, - VmMapSlot{ - .memoryType = static_cast(cmd.mapMemory.memoryType >= 0 - ? cmd.mapMemory.dmemIndex - : -1), - .prot = static_cast(cmd.mapMemory.prot), - .offset = cmd.mapMemory.offset, - .baseAddress = cmd.mapMemory.address, - }); - - if (process.vmId >= 0) { - auto memory = amdgpu::RemoteMemory{process.vmId}; - - int mapFd = process.vmFd; - - if (cmd.mapMemory.memoryType >= 0) { - mapFd = dmemFd[cmd.mapMemory.dmemIndex]; - } - - auto mmapResult = - ::mmap(memory.getPointer(cmd.mapMemory.address), - cmd.mapMemory.size, cmd.mapMemory.prot >> 4, - MAP_FIXED | MAP_SHARED, mapFd, cmd.mapMemory.offset); - - if (mmapResult == MAP_FAILED) { - std::printf( - "failed to map process %x memory, address %lx-%lx, type %x\n", - (int)cmd.mapMemory.pid, cmd.mapMemory.address, - cmd.mapMemory.address + cmd.mapMemory.size, - cmd.mapMemory.memoryType); - std::abort(); - } - - device.handleProtectMemory(memory, cmd.mapMemory.address, - cmd.mapMemory.size, cmd.mapMemory.prot); - } - break; - } - - case amdgpu::bridge::CommandId::RegisterBuffer: { - auto &process = processInfo[cmd.buffer.pid]; - - if (cmd.buffer.attrId >= 10 || cmd.buffer.index >= 10) { - std::abort(); - } - - process.buffers[cmd.buffer.index] = cmd.buffer; - break; - } - - case amdgpu::bridge::CommandId::RegisterBufferAttribute: { - auto &process = processInfo[cmd.bufferAttribute.pid]; - if (cmd.bufferAttribute.attrId >= 10) { - std::abort(); - } - - process.bufferAttributes[cmd.bufferAttribute.attrId] = - cmd.bufferAttribute; - break; - } - - default: - util::unreachable("Unexpected command id %u\n", (unsigned)cmd.id); - } - } - } - - if (bridge->pusherPid > 0) { - kill(bridge->pusherPid, SIGINT); - } - - for (auto fence : inFlightFences) { - vkDestroyFence(vkDevice, fence, nullptr); - } - - vkDestroySemaphore(vkDevice, presentCompleteSemaphore, nullptr); - vkDestroySemaphore(vkDevice, renderCompleteSemaphore, nullptr); - vkDestroyCommandPool(vkDevice, commandPool, nullptr); - } - - vkDestroySwapchainKHR(vkDevice, swapchain, nullptr); - vkDestroyDevice(vkDevice, nullptr); - vkDestroySurfaceKHR(vkInstance, vkSurface, nullptr); - vkDestroyInstance(vkInstance, nullptr); - - glfwDestroyWindow(window); - - amdgpu::bridge::destroyShmCommandBuffer(bridge); - amdgpu::bridge::unlinkShm(cmdBridgeName); - return 0; -}