mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2024-12-04 05:33:41 +00:00
intel/dev: fix subslice/eu total computations with some fused configurations
When a device has its first slice/subslice fused off, we can't use the number of slices/subslices to iterate the mask array. v2: Fix spelling (Marcin) Use size_t for iterator (Marcin) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reported-by: Matt Roper <matthew.d.roper@intel.com> Cc: <mesa-stable@lists.freedesktop.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5601 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10015>
This commit is contained in:
parent
e10c641f00
commit
a543a94404
@ -572,10 +572,7 @@ iris_get_compute_param(struct pipe_screen *pscreen,
|
||||
RET((uint32_t []) { 400 }); /* TODO */
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: {
|
||||
unsigned total_num_subslices = 0;
|
||||
for (unsigned i = 0; i < devinfo->num_slices; i++)
|
||||
total_num_subslices += devinfo->num_subslices[i];
|
||||
RET((uint32_t []) { total_num_subslices });
|
||||
RET((uint32_t []) { intel_device_info_subslice_total(devinfo) });
|
||||
}
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
|
||||
|
@ -79,18 +79,18 @@ main(int argc, char *argv[])
|
||||
|
||||
const char *subslice_name = devinfo.ver >= 12 ? "dualsubslice" : "subslice";
|
||||
uint32_t n_s = 0, n_ss = 0, n_eus = 0;
|
||||
for (unsigned s = 0; s < devinfo.num_slices; s++) {
|
||||
for (unsigned s = 0; s < devinfo.max_slices; s++) {
|
||||
n_s += (devinfo.slice_masks & (1u << s)) ? 1 : 0;
|
||||
for (unsigned ss = 0; ss < devinfo.num_subslices[s]; ss++) {
|
||||
for (unsigned ss = 0; ss < devinfo.max_subslices_per_slice; ss++) {
|
||||
fprintf(stdout, " slice%u.%s%u: ", s, subslice_name, ss);
|
||||
if (intel_device_info_subslice_available(&devinfo, s, ss)) {
|
||||
n_ss++;
|
||||
for (unsigned eu = 0; eu < devinfo.num_eu_per_subslice; eu++) {
|
||||
for (unsigned eu = 0; eu < devinfo.max_eu_per_subslice; eu++) {
|
||||
n_eus += intel_device_info_eu_available(&devinfo, s, ss, eu) ? 1 : 0;
|
||||
fprintf(stdout, "%s", intel_device_info_eu_available(&devinfo, s, ss, eu) ? "1" : "0");
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "fused");
|
||||
fprintf(stdout, "fused");
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
|
@ -1050,6 +1050,9 @@ update_from_topology(struct intel_device_info *devinfo,
|
||||
assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
|
||||
memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
|
||||
devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
|
||||
devinfo->max_slices = topology->max_slices;
|
||||
devinfo->max_subslices_per_slice = topology->max_subslices;
|
||||
devinfo->max_eu_per_subslice = topology->max_eus_per_subslice;
|
||||
|
||||
uint32_t subslice_mask_len =
|
||||
topology->max_slices * topology->subslice_stride;
|
||||
@ -1691,7 +1694,7 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
|
||||
devinfo->has_tiling_uapi = has_get_tiling(fd);
|
||||
|
||||
devinfo->subslice_total = 0;
|
||||
for (uint32_t i = 0; i < devinfo->num_slices; i++)
|
||||
for (uint32_t i = 0; i < devinfo->max_slices; i++)
|
||||
devinfo->subslice_total += __builtin_popcount(devinfo->subslice_masks[i]);
|
||||
|
||||
/* Gfx7 and older do not support EU/Subslice info */
|
||||
|
@ -137,11 +137,24 @@ struct intel_device_info
|
||||
*/
|
||||
unsigned num_slices;
|
||||
|
||||
/**
|
||||
* Maximum number of slices present on this device (can be more than
|
||||
* num_slices if some slices are fused).
|
||||
*/
|
||||
unsigned max_slices;
|
||||
|
||||
/**
|
||||
* Number of subslices for each slice (used to be uniform until CNL).
|
||||
*/
|
||||
unsigned num_subslices[INTEL_DEVICE_MAX_SUBSLICES];
|
||||
|
||||
/**
|
||||
* Maximum number of subslices per slice present on this device (can be
|
||||
* more than the maximum value in the num_subslices[] array if some
|
||||
* subslices are fused).
|
||||
*/
|
||||
unsigned max_subslices_per_slice;
|
||||
|
||||
/**
|
||||
* Number of subslices on each pixel pipe (ICL).
|
||||
*/
|
||||
@ -154,6 +167,12 @@ struct intel_device_info
|
||||
*/
|
||||
unsigned num_eu_per_subslice;
|
||||
|
||||
/**
|
||||
* Maximum number of EUs per subslice (can be more than num_eu_per_subslice
|
||||
* if some EUs are fused off).
|
||||
*/
|
||||
unsigned max_eu_per_subslice;
|
||||
|
||||
/**
|
||||
* Number of threads per eu, varies between 4 and 8 between generations.
|
||||
*/
|
||||
@ -356,6 +375,18 @@ intel_device_info_eu_available(const struct intel_device_info *devinfo,
|
||||
return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
intel_device_info_subslice_total(const struct intel_device_info *devinfo)
|
||||
{
|
||||
uint32_t total = 0;
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(devinfo->subslice_masks); i++) {
|
||||
total += __builtin_popcount(devinfo->subslice_masks[i]);
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
intel_device_info_eu_total(const struct intel_device_info *devinfo)
|
||||
{
|
||||
|
@ -31,6 +31,39 @@ main(int argc, char *argv[])
|
||||
assert(devinfo.cs_prefetch_size > 0);
|
||||
|
||||
assert(devinfo.ver < 7 || devinfo.max_constant_urb_size_kb > 0);
|
||||
|
||||
assert(devinfo.num_slices <= ARRAY_SIZE(devinfo.subslice_masks));
|
||||
|
||||
assert(devinfo.num_slices <= devinfo.max_slices);
|
||||
assert(intel_device_info_subslice_total(&devinfo) <=
|
||||
(devinfo.max_slices * devinfo.max_subslices_per_slice));
|
||||
|
||||
for (uint32_t s = 0; s < ARRAY_SIZE(devinfo.num_subslices); s++)
|
||||
assert(devinfo.num_subslices[s] <= devinfo.max_subslices_per_slice);
|
||||
|
||||
assert(__builtin_popcount(devinfo.slice_masks) <= devinfo.max_slices);
|
||||
|
||||
uint32_t total_subslices = 0;
|
||||
for (size_t i = 0; i < ARRAY_SIZE(devinfo.subslice_masks); i++)
|
||||
total_subslices += __builtin_popcount(devinfo.subslice_masks[i]);
|
||||
assert(total_subslices <=
|
||||
(devinfo.max_slices * devinfo.max_subslices_per_slice));
|
||||
|
||||
assert(intel_device_info_eu_total(&devinfo) > 0);
|
||||
assert(intel_device_info_subslice_total(&devinfo) > 0);
|
||||
|
||||
total_subslices = 0;
|
||||
for (uint32_t s = 0; s < devinfo.max_slices; s++)
|
||||
for (uint32_t ss = 0; ss < devinfo.max_subslices_per_slice; ss++)
|
||||
total_subslices += intel_device_info_subslice_available(&devinfo, s, ss);
|
||||
assert(total_subslices == intel_device_info_subslice_total(&devinfo));
|
||||
|
||||
uint32_t total_eus = 0;
|
||||
for (uint32_t s = 0; s < devinfo.max_slices; s++)
|
||||
for (uint32_t ss = 0; ss < devinfo.max_subslices_per_slice; ss++)
|
||||
for (uint32_t eu = 0; eu < devinfo.max_eu_per_subslice; eu++)
|
||||
total_eus += intel_device_info_eu_available(&devinfo, s, ss, eu);
|
||||
assert(total_eus == intel_device_info_eu_total(&devinfo));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user