mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2025-02-25 04:43:09 +00:00
radeonsi: support ARB_compute_variable_group_size
Not sure if it's possible to avoid programming the block size twice (once for the userdata and once for the dispatch). Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
014bd4acb8
commit
77c81164bc
@ -279,7 +279,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
|
||||
|
||||
GL_ARB_bindless_texture started (airlied)
|
||||
GL_ARB_cl_event not started
|
||||
GL_ARB_compute_variable_group_size DONE (nvc0)
|
||||
GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi)
|
||||
GL_ARB_ES3_2_compatibility DONE (i965/gen8+)
|
||||
GL_ARB_fragment_shader_interlock not started
|
||||
GL_ARB_gl_spirv not started
|
||||
|
@ -49,7 +49,7 @@ Note: some of the new features are only available with certain drivers.
|
||||
<li>GL_ARB_ES3_1_compatibility on i965</li>
|
||||
<li>GL_ARB_ES3_2_compatibility on i965/gen8+</li>
|
||||
<li>GL_ARB_clear_texture on r600, radeonsi</li>
|
||||
<li>GL_ARB_compute_variable_group_size on nvc0</li>
|
||||
<li>GL_ARB_compute_variable_group_size on nvc0, radeonsi</li>
|
||||
<li>GL_ARB_cull_distance on radeonsi</li>
|
||||
<li>GL_ARB_enhanced_layouts on i965</li>
|
||||
<li>GL_ARB_indirect_parameters on radeonsi</li>
|
||||
|
@ -1037,7 +1037,15 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
}
|
||||
return sizeof(uint32_t);
|
||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||
return 0;
|
||||
if (ret) {
|
||||
uint64_t *max_variable_threads_per_block = ret;
|
||||
if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 &&
|
||||
ir_type == PIPE_SHADER_IR_TGSI)
|
||||
*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
|
||||
else
|
||||
*max_variable_threads_per_block = 0;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
}
|
||||
|
||||
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
|
||||
|
@ -106,6 +106,8 @@
|
||||
#define R600_MAP_BUFFER_ALIGNMENT 64
|
||||
#define R600_MAX_VIEWPORTS 16
|
||||
|
||||
#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
|
||||
|
||||
enum r600_coherency {
|
||||
R600_COHERENCY_NONE, /* no cache flushes needed */
|
||||
R600_COHERENCY_SHADER,
|
||||
|
@ -601,11 +601,19 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
} else {
|
||||
struct si_compute *program = sctx->cs_shader_state.program;
|
||||
bool variable_group_size =
|
||||
program->shader.selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
|
||||
radeon_set_sh_reg_seq(cs, grid_size_reg, variable_group_size ? 6 : 3);
|
||||
radeon_emit(cs, info->grid[0]);
|
||||
radeon_emit(cs, info->grid[1]);
|
||||
radeon_emit(cs, info->grid[2]);
|
||||
if (variable_group_size) {
|
||||
radeon_emit(cs, info->block[0]);
|
||||
radeon_emit(cs, info->block[1]);
|
||||
radeon_emit(cs, info->block[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1770,16 +1770,21 @@ static void declare_system_value(
|
||||
LLVMValueRef values[3];
|
||||
unsigned i;
|
||||
unsigned *properties = ctx->shader->selector->info.properties;
|
||||
unsigned sizes[3] = {
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
|
||||
};
|
||||
|
||||
for (i = 0; i < 3; ++i)
|
||||
values[i] = lp_build_const_int32(gallivm, sizes[i]);
|
||||
if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
|
||||
unsigned sizes[3] = {
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
|
||||
};
|
||||
|
||||
value = lp_build_gather_values(gallivm, values, 3);
|
||||
for (i = 0; i < 3; ++i)
|
||||
values[i] = lp_build_const_int32(gallivm, sizes[i]);
|
||||
|
||||
value = lp_build_gather_values(gallivm, values, 3);
|
||||
} else {
|
||||
value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_BLOCK_SIZE);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -5680,6 +5685,7 @@ static void create_function(struct si_shader_context *ctx)
|
||||
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
params[SI_PARAM_GRID_SIZE] = v3i32;
|
||||
params[SI_PARAM_BLOCK_SIZE] = v3i32;
|
||||
params[SI_PARAM_BLOCK_ID] = v3i32;
|
||||
last_sgpr = SI_PARAM_BLOCK_ID;
|
||||
|
||||
@ -5716,7 +5722,12 @@ static void create_function(struct si_shader_context *ctx)
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
|
||||
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
|
||||
|
||||
assert(max_work_group_size);
|
||||
if (!max_work_group_size) {
|
||||
/* This is a variable group size compute shader,
|
||||
* compile it for the maximum possible group size.
|
||||
*/
|
||||
max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
|
||||
}
|
||||
|
||||
radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
|
||||
"amdgpu-max-work-group-size",
|
||||
@ -6653,11 +6664,16 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||
unsigned max_vgprs = 256;
|
||||
unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
|
||||
unsigned max_sgprs_per_wave = 128;
|
||||
unsigned min_waves_per_cu =
|
||||
DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
|
||||
props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
|
||||
props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH],
|
||||
wave_size);
|
||||
unsigned max_block_threads;
|
||||
|
||||
if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH])
|
||||
max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
|
||||
props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
|
||||
props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
|
||||
else
|
||||
max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
|
||||
|
||||
unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
|
||||
unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
|
||||
|
||||
max_vgprs = max_vgprs / min_waves_per_simd;
|
||||
|
@ -129,7 +129,8 @@ enum {
|
||||
|
||||
/* CS only */
|
||||
SI_SGPR_GRID_SIZE = SI_NUM_RESOURCE_SGPRS,
|
||||
SI_CS_NUM_USER_SGPR = SI_SGPR_GRID_SIZE + 3
|
||||
SI_SGPR_BLOCK_SIZE = SI_SGPR_GRID_SIZE + 3,
|
||||
SI_CS_NUM_USER_SGPR = SI_SGPR_BLOCK_SIZE + 3
|
||||
};
|
||||
|
||||
/* LLVM function parameter indices */
|
||||
@ -219,6 +220,7 @@ enum {
|
||||
|
||||
/* CS only parameters */
|
||||
SI_PARAM_GRID_SIZE = SI_NUM_RESOURCE_PARAMS,
|
||||
SI_PARAM_BLOCK_SIZE,
|
||||
SI_PARAM_BLOCK_ID,
|
||||
SI_PARAM_THREAD_ID,
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user