mirror of
https://github.com/xenia-project/xenia.git
synced 2024-11-23 11:39:45 +00:00
[Vulkan] 32-bit index fetch without fullDrawIndexUint32
This commit is contained in:
parent
37579d3bf0
commit
77e85ecaa4
@ -691,6 +691,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
|
||||
// Writing to the trace irrespective of the cache lookup result
|
||||
// because cache behavior depends on runtime configuration and
|
||||
// state.
|
||||
// Example of 16-bit reset index replacement: 415607D4.
|
||||
trace_writer_.WriteMemoryRead(guest_index_base,
|
||||
guest_index_buffer_needed_bytes);
|
||||
// Not specifying the primitive type in the cache key because not
|
||||
|
@ -31,6 +31,7 @@ SpirvShaderTranslator::Features::Features(bool all)
|
||||
max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
|
||||
clip_distance(all),
|
||||
cull_distance(all),
|
||||
full_draw_index_uint32(all),
|
||||
image_view_format_swizzle(all),
|
||||
signed_zero_inf_nan_preserve_float32(all),
|
||||
denorm_flush_to_zero_float32(all) {}
|
||||
@ -40,7 +41,8 @@ SpirvShaderTranslator::Features::Features(
|
||||
: max_storage_buffer_range(
|
||||
provider.device_properties().limits.maxStorageBufferRange),
|
||||
clip_distance(provider.device_features().shaderClipDistance),
|
||||
cull_distance(provider.device_features().shaderCullDistance) {
|
||||
cull_distance(provider.device_features().shaderCullDistance),
|
||||
full_draw_index_uint32(provider.device_features().fullDrawIndexUint32) {
|
||||
uint32_t device_version = provider.device_properties().apiVersion;
|
||||
const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions =
|
||||
provider.device_extensions();
|
||||
@ -221,6 +223,8 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||
sizeof(uint32_t) * 4);
|
||||
const SystemConstant system_constants[] = {
|
||||
{"flags", offsetof(SystemConstants, flags), type_uint_},
|
||||
{"vertex_index_load_address",
|
||||
offsetof(SystemConstants, vertex_index_load_address), type_uint_},
|
||||
{"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian),
|
||||
type_uint_},
|
||||
{"vertex_base_index", offsetof(SystemConstants, vertex_base_index),
|
||||
@ -1129,18 +1133,73 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
||||
if (register_count()) {
|
||||
// TODO(Triang3l): Barycentric coordinates and patch index.
|
||||
if (IsSpirvVertexShader()) {
|
||||
// TODO(Triang3l): Fetch the vertex index from the shared memory when
|
||||
// fullDrawIndexUint32 isn't available and the index is 32-bit and needs
|
||||
// endian swap.
|
||||
// TODO(Triang3l): Close line loop primitive.
|
||||
// Load the unswapped index as uint for swapping.
|
||||
// Load the unswapped index as uint for swapping, or for indirect loading
|
||||
// if needed.
|
||||
spv::Id vertex_index = builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_uint_,
|
||||
builder_->createLoad(input_vertex_index_, spv::NoPrecision));
|
||||
if (!features_.full_draw_index_uint32) {
|
||||
// Check if the full 32-bit index needs to be loaded indirectly.
|
||||
spv::Id load_vertex_index = builder_->createBinOp(
|
||||
spv::OpINotEqual, type_bool_,
|
||||
builder_->createBinOp(
|
||||
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
|
||||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(kSysFlag_VertexIndexLoad))),
|
||||
const_uint_0_);
|
||||
spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint();
|
||||
spv::Block& block_load_vertex_index_start = builder_->makeNewBlock();
|
||||
spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock();
|
||||
SpirvCreateSelectionMerge(block_load_vertex_index_merge.getId(),
|
||||
spv::SelectionControlDontFlattenMask);
|
||||
builder_->createConditionalBranch(load_vertex_index,
|
||||
&block_load_vertex_index_start,
|
||||
&block_load_vertex_index_merge);
|
||||
builder_->setBuildPoint(&block_load_vertex_index_start);
|
||||
// Load the 32-bit index.
|
||||
// TODO(Triang3l): Bounds checking.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
|
||||
spv::Id loaded_vertex_index =
|
||||
LoadUint32FromSharedMemory(builder_->createUnaryOp(
|
||||
spv::OpBitcast, type_int_,
|
||||
builder_->createBinOp(
|
||||
spv::OpIAdd, type_uint_,
|
||||
builder_->createBinOp(
|
||||
spv::OpShiftRightLogical, type_uint_,
|
||||
builder_->createLoad(
|
||||
builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
uniform_system_constants_, id_vector_temp_),
|
||||
spv::NoPrecision),
|
||||
builder_->makeUintConstant(2)),
|
||||
vertex_index)));
|
||||
// Get the actual build point for phi.
|
||||
spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
|
||||
builder_->createBranch(&block_load_vertex_index_merge);
|
||||
// Select between the loaded index and the original index from Vulkan.
|
||||
builder_->setBuildPoint(&block_load_vertex_index_merge);
|
||||
{
|
||||
std::unique_ptr<spv::Instruction> loaded_vertex_index_phi_op =
|
||||
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
|
||||
type_uint_, spv::OpPhi);
|
||||
loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index);
|
||||
loaded_vertex_index_phi_op->addIdOperand(
|
||||
block_load_vertex_index_end.getId());
|
||||
loaded_vertex_index_phi_op->addIdOperand(vertex_index);
|
||||
loaded_vertex_index_phi_op->addIdOperand(
|
||||
block_load_vertex_index_pre.getId());
|
||||
vertex_index = loaded_vertex_index_phi_op->getResultId();
|
||||
builder_->getBuildPoint()->addInstruction(
|
||||
std::move(loaded_vertex_index_phi_op));
|
||||
}
|
||||
}
|
||||
// Endian-swap the index and convert to int.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantIndexVertexIndexEndian));
|
||||
builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
|
||||
spv::Id vertex_index_endian =
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassUniform,
|
||||
@ -1152,7 +1211,7 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
||||
// Add the base to the index.
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kSystemConstantIndexVertexBaseIndex));
|
||||
builder_->makeIntConstant(kSystemConstantVertexBaseIndex));
|
||||
vertex_index = builder_->createBinOp(
|
||||
spv::OpIAdd, type_int_, vertex_index,
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
|
@ -83,6 +83,9 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
};
|
||||
|
||||
enum : uint32_t {
|
||||
kSysFlag_VertexIndexLoad_Shift,
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad_Shift,
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit_Shift,
|
||||
kSysFlag_XYDividedByW_Shift,
|
||||
kSysFlag_ZDividedByW_Shift,
|
||||
kSysFlag_WNotReciprocal_Shift,
|
||||
@ -98,6 +101,22 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
|
||||
kSysFlag_Count,
|
||||
|
||||
// For HostVertexShaderType kVertex, if fullDrawIndexUint32 is not
|
||||
// supported (ignored otherwise), whether to fetch the index manually
|
||||
// (32-bit only - 16-bit indices are always fetched via the Vulkan index
|
||||
// buffer).
|
||||
kSysFlag_VertexIndexLoad = 1u << kSysFlag_VertexIndexLoad_Shift,
|
||||
// For HostVertexShaderTypes kMemexportCompute, kPointListAsTriangleStrip,
|
||||
// kRectangleListAsTriangleStrip, whether the vertex index needs to be
|
||||
// loaded from the index buffer (rather than using autogenerated indices),
|
||||
// and whether it's 32-bit. This is separate from kSysFlag_VertexIndexLoad
|
||||
// because the same system constants may be used for the memexporting
|
||||
// compute shader and the vertex shader for the same draw, but
|
||||
// kSysFlag_VertexIndexLoad may be not needed.
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad =
|
||||
1u << kSysFlag_ComputeOrPrimitiveVertexIndexLoad_Shift,
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit =
|
||||
1u << kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit_Shift,
|
||||
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
|
||||
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
|
||||
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
|
||||
@ -116,11 +135,14 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
|
||||
// - SystemConstantIndex enum.
|
||||
// - Structure members in BeginTranslation.
|
||||
//
|
||||
// Using the std140 layout - vec2 must be aligned to 8 bytes, vec3 and vec4 to
|
||||
// 16 bytes.
|
||||
struct SystemConstants {
|
||||
uint32_t flags;
|
||||
uint32_t vertex_index_load_address;
|
||||
xenos::Endian vertex_index_endian;
|
||||
int32_t vertex_base_index;
|
||||
uint32_t padding_vertex_base_index;
|
||||
|
||||
float ndc_scale[3];
|
||||
uint32_t padding_ndc_scale;
|
||||
@ -216,6 +238,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
uint32_t max_storage_buffer_range;
|
||||
bool clip_distance;
|
||||
bool cull_distance;
|
||||
bool full_draw_index_uint32;
|
||||
bool image_view_format_swizzle;
|
||||
bool signed_zero_inf_nan_preserve_float32;
|
||||
bool denorm_flush_to_zero_float32;
|
||||
@ -576,8 +599,9 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||
|
||||
enum SystemConstantIndex : unsigned int {
|
||||
kSystemConstantFlags,
|
||||
kSystemConstantIndexVertexIndexEndian,
|
||||
kSystemConstantIndexVertexBaseIndex,
|
||||
kSystemConstantVertexIndexLoadAddress,
|
||||
kSystemConstantVertexIndexEndian,
|
||||
kSystemConstantVertexBaseIndex,
|
||||
kSystemConstantNdcScale,
|
||||
kSystemConstantNdcOffset,
|
||||
kSystemConstantTextureSwizzledSigns,
|
||||
|
@ -2383,9 +2383,10 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||
normalized_depth_control);
|
||||
|
||||
// Update system constants before uploading them.
|
||||
UpdateSystemConstantValues(primitive_polygonal,
|
||||
primitive_processing_result.host_index_endian,
|
||||
viewport_info, used_texture_mask);
|
||||
bool vertex_shader_index_load;
|
||||
UpdateSystemConstantValues(primitive_polygonal, primitive_processing_result,
|
||||
viewport_info, used_texture_mask,
|
||||
vertex_shader_index_load);
|
||||
|
||||
// Update uniform buffers and descriptor sets after binding the pipeline with
|
||||
// the new layout.
|
||||
@ -2451,7 +2452,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||
|
||||
// Draw.
|
||||
if (primitive_processing_result.index_buffer_type ==
|
||||
PrimitiveProcessor::ProcessedIndexBufferType::kNone) {
|
||||
PrimitiveProcessor::ProcessedIndexBufferType::kNone ||
|
||||
vertex_shader_index_load) {
|
||||
deferred_command_buffer_.CmdVkDraw(
|
||||
primitive_processing_result.host_draw_vertex_count, 1, 0, 0);
|
||||
} else {
|
||||
@ -3338,8 +3340,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||
bool primitive_polygonal, xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask) {
|
||||
bool primitive_polygonal,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
|
||||
bool& vertex_shader_index_load_out) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
@ -3362,6 +3366,52 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||
|
||||
// Flags.
|
||||
uint32_t flags = 0;
|
||||
// Vertex index shader loading.
|
||||
bool vertex_shader_index_load = false;
|
||||
// Only for ProcessedIndexBufferType kGuest since kHostConverted indices may
|
||||
// be not loaded into the GPU memory (only read on the CPU), though
|
||||
// kHostConverted must never be used for point lists and rectangle lists
|
||||
// without geometry shaders anyway. For regular 32-bit index fetching without
|
||||
// fullDrawIndexUint32, kHostConverted indices are already byte-swapped and
|
||||
// truncated to 24 bits, so indirect fetch is not needed.
|
||||
if (primitive_processing_result.index_buffer_type ==
|
||||
PrimitiveProcessor::ProcessedIndexBufferType::kGuest) {
|
||||
switch (primitive_processing_result.host_vertex_shader_type) {
|
||||
case Shader::HostVertexShaderType::kVertex: {
|
||||
// For guest (usually big-endian) 32-bit indices when they're not
|
||||
// supported by the device.
|
||||
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32) {
|
||||
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
||||
const VkPhysicalDeviceFeatures& device_features =
|
||||
provider.device_features();
|
||||
if (!device_features.fullDrawIndexUint32) {
|
||||
vertex_shader_index_load = true;
|
||||
flags |= SpirvShaderTranslator::kSysFlag_VertexIndexLoad;
|
||||
}
|
||||
}
|
||||
} break;
|
||||
// kMemexportCompute never comes out of the PrimitiveProcessor, as
|
||||
// memexport compute shaders are executed alongside their vertex
|
||||
// counterparts, since they may still result in drawing.
|
||||
case Shader::HostVertexShaderType::kPointListAsTriangleStrip:
|
||||
case Shader::HostVertexShaderType::kRectangleListAsTriangleStrip: {
|
||||
// Always loading the guest index buffer indirectly if it's used, as
|
||||
// host indexing contains a part needed specifically for the host for
|
||||
// the construction of the primitive - host vertices don't map 1:1 to
|
||||
// guest ones.
|
||||
vertex_shader_index_load = true;
|
||||
flags |=
|
||||
SpirvShaderTranslator::kSysFlag_ComputeOrPrimitiveVertexIndexLoad;
|
||||
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32) {
|
||||
flags |= SpirvShaderTranslator ::
|
||||
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
vertex_shader_index_load_out = vertex_shader_index_load;
|
||||
// W0 division control.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
||||
@ -3404,9 +3454,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||
dirty |= system_constants_.flags != flags;
|
||||
system_constants_.flags = flags;
|
||||
|
||||
// Index buffer address for loading in the shaders.
|
||||
if (flags &
|
||||
(SpirvShaderTranslator::kSysFlag_VertexIndexLoad |
|
||||
SpirvShaderTranslator::kSysFlag_ComputeOrPrimitiveVertexIndexLoad)) {
|
||||
dirty |= system_constants_.vertex_index_load_address !=
|
||||
primitive_processing_result.guest_index_base;
|
||||
system_constants_.vertex_index_load_address =
|
||||
primitive_processing_result.guest_index_base;
|
||||
}
|
||||
|
||||
// Index or tessellation edge factor buffer endianness.
|
||||
dirty |= system_constants_.vertex_index_endian != index_endian;
|
||||
system_constants_.vertex_index_endian = index_endian;
|
||||
dirty |= system_constants_.vertex_index_endian !=
|
||||
primitive_processing_result.host_index_endian;
|
||||
system_constants_.vertex_index_endian =
|
||||
primitive_processing_result.host_index_endian;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
|
@ -433,10 +433,11 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
|
||||
bool primitive_polygonal,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control);
|
||||
void UpdateSystemConstantValues(bool primitive_polygonal,
|
||||
xenos::Endian index_endian,
|
||||
const draw_util::ViewportInfo& viewport_info,
|
||||
uint32_t used_texture_mask);
|
||||
void UpdateSystemConstantValues(
|
||||
bool primitive_polygonal,
|
||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
|
||||
bool& vertex_shader_index_load_out);
|
||||
bool UpdateBindings(const VulkanShader* vertex_shader,
|
||||
const VulkanShader* pixel_shader);
|
||||
// Allocates a descriptor set and fills one or two VkWriteDescriptorSet
|
||||
|
@ -208,6 +208,7 @@ enum class Endian128 : uint32_t {
|
||||
|
||||
enum class IndexFormat : uint32_t {
|
||||
kInt16,
|
||||
// Not very common, but used for some world draws in 545407E0.
|
||||
kInt32,
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user