mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2024-11-23 07:19:50 +00:00
ac,radv: implement the cs_regalloc_hang HW bug workaround
Might fix spurious failures on GFX6 and some GFX7 chips. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11675>
This commit is contained in:
parent
c905e74842
commit
29f264f258
@ -906,6 +906,18 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
info->has_vgt_flush_ngg_legacy_bug = info->chip_class == GFX10 ||
|
||||
info->family == CHIP_SIENNA_CICHLID;
|
||||
|
||||
/* HW bug workaround when CS threadgroups > 256 threads and async compute
|
||||
* isn't used, i.e. only one compute job can run at a time. If async
|
||||
* compute is possible, the threadgroup size must be limited to 256 threads
|
||||
* on all queues to avoid the bug.
|
||||
* Only GFX6 and certain GFX7 chips are affected.
|
||||
*
|
||||
* FIXME: RADV doesn't limit the number of threads for async compute.
|
||||
*/
|
||||
info->has_cs_regalloc_hang_bug = info->chip_class == GFX6 ||
|
||||
info->family == CHIP_BONAIRE ||
|
||||
info->family == CHIP_KABINI;
|
||||
|
||||
/* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
|
||||
* feature version wasn't bumped.
|
||||
*/
|
||||
|
@ -79,6 +79,7 @@ struct radeon_info {
|
||||
bool has_image_load_dcc_bug;
|
||||
bool has_two_planes_iterate256_bug;
|
||||
bool has_vgt_flush_ngg_legacy_bug;
|
||||
bool has_cs_regalloc_hang_bug;
|
||||
bool has_32bit_predication;
|
||||
bool has_3d_cube_border_color_mipmap;
|
||||
|
||||
|
@ -6134,6 +6134,12 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf
|
||||
{
|
||||
bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
|
||||
bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline;
|
||||
bool cs_regalloc_hang = cmd_buffer->device->physical_device->rad_info.has_cs_regalloc_hang_bug &&
|
||||
info->blocks[0] * info->blocks[1] * info->blocks[2] > 256;
|
||||
|
||||
if (cs_regalloc_hang)
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
|
||||
|
||||
if (cmd_buffer->state.flush_bits &
|
||||
(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
|
||||
@ -6190,6 +6196,9 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf
|
||||
: VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
}
|
||||
|
||||
if (cs_regalloc_hang)
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
|
||||
|
||||
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
|
||||
}
|
||||
|
||||
|
@ -890,18 +890,12 @@ static bool si_check_needs_implicit_sync(struct si_context *sctx)
|
||||
static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_screen *sscreen = sctx->screen;
|
||||
struct si_compute *program = sctx->cs_shader_state.program;
|
||||
const amd_kernel_code_t *code_object = si_compute_get_code_object(program, info->pc);
|
||||
int i;
|
||||
/* HW bug workaround when CS threadgroups > 256 threads and async
|
||||
* compute isn't used, i.e. only one compute job can run at a time.
|
||||
* If async compute is possible, the threadgroup size must be limited
|
||||
* to 256 threads on all queues to avoid the bug.
|
||||
* Only GFX6 and certain GFX7 chips are affected.
|
||||
*/
|
||||
bool cs_regalloc_hang =
|
||||
(sctx->chip_class == GFX6 || sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KABINI) &&
|
||||
info->block[0] * info->block[1] * info->block[2] > 256;
|
||||
bool cs_regalloc_hang = sscreen->info.has_cs_regalloc_hang_bug &&
|
||||
info->block[0] * info->block[1] * info->block[2] > 256;
|
||||
|
||||
if (cs_regalloc_hang)
|
||||
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
|
||||
|
Loading…
Reference in New Issue
Block a user