mirror of
https://gitee.com/openharmony/third_party_mesa3d
synced 2024-12-04 13:54:48 +00:00
ac/rgp: add support for dumping SPM data
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13704>
This commit is contained in:
parent
e928f475cc
commit
3e7bac80ce
@ -29,6 +29,7 @@
|
||||
#include "util/u_process.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "ac_spm.h"
|
||||
#include "ac_sqtt.h"
|
||||
#include "ac_gpu_info.h"
|
||||
#ifdef _WIN32
|
||||
@ -892,8 +893,118 @@ static enum elf_gfxip_level ac_chip_class_to_elf_gfxip_level(enum chip_class chi
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* SQTT SPM DB info.
|
||||
*/
|
||||
struct sqtt_spm_counter_info {
|
||||
enum ac_pc_gpu_block block;
|
||||
uint32_t instance;
|
||||
uint32_t data_offset; /* offset of counter from the beginning of the chunk */
|
||||
uint32_t event_index; /* index of counter within the block */
|
||||
};
|
||||
|
||||
struct sqtt_file_chunk_spm_db {
|
||||
struct sqtt_file_chunk_header header;
|
||||
uint32_t flags;
|
||||
uint32_t num_timestamps;
|
||||
uint32_t num_spm_counter_info;
|
||||
uint32_t sample_interval;
|
||||
};
|
||||
|
||||
static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 32,
|
||||
"sqtt_file_chunk_spm_db doesn't match RGP spec");
|
||||
|
||||
static void ac_sqtt_fill_spm_db(const struct ac_spm_trace_data *spm_trace,
|
||||
struct sqtt_file_chunk_spm_db *chunk,
|
||||
uint32_t num_samples,
|
||||
uint32_t chunk_size)
|
||||
{
|
||||
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_SPM_DB;
|
||||
chunk->header.chunk_id.index = 0;
|
||||
chunk->header.major_version = 1;
|
||||
chunk->header.minor_version = 3;
|
||||
chunk->header.size_in_bytes = chunk_size;
|
||||
|
||||
chunk->flags = 0;
|
||||
chunk->num_timestamps = num_samples;
|
||||
chunk->num_spm_counter_info = spm_trace->num_counters;
|
||||
chunk->sample_interval = spm_trace->sample_interval;
|
||||
}
|
||||
|
||||
static void ac_sqtt_dump_spm(const struct ac_spm_trace_data *spm_trace,
|
||||
size_t file_offset,
|
||||
FILE *output)
|
||||
{
|
||||
uint32_t sample_size_in_bytes = ac_spm_get_sample_size(spm_trace);
|
||||
uint32_t num_samples = ac_spm_get_num_samples(spm_trace);
|
||||
uint8_t *spm_data_ptr = (uint8_t *)spm_trace->ptr;
|
||||
struct sqtt_file_chunk_spm_db spm_db;
|
||||
size_t file_spm_db_offset = file_offset;
|
||||
|
||||
fseek(output, sizeof(struct sqtt_file_chunk_spm_db), SEEK_CUR);
|
||||
file_offset += sizeof(struct sqtt_file_chunk_spm_db);
|
||||
|
||||
/* Skip the reserved 32 bytes of data at beginning. */
|
||||
spm_data_ptr += 32;
|
||||
|
||||
/* SPM timestamps. */
|
||||
uint32_t sample_size_in_qwords = sample_size_in_bytes / sizeof(uint64_t);
|
||||
uint64_t *timestamp_ptr = (uint64_t *)spm_data_ptr;
|
||||
|
||||
for (uint32_t s = 0; s < num_samples; s++) {
|
||||
uint64_t index = s * sample_size_in_qwords;
|
||||
uint64_t timestamp = timestamp_ptr[index];
|
||||
|
||||
file_offset += sizeof(timestamp);
|
||||
fwrite(×tamp, sizeof(timestamp), 1, output);
|
||||
}
|
||||
|
||||
/* SPM counter info. */
|
||||
uint64_t counter_values_size = num_samples * sizeof(uint16_t);
|
||||
uint64_t counter_values_offset = num_samples * sizeof(uint64_t) +
|
||||
spm_trace->num_counters * sizeof(struct sqtt_spm_counter_info);
|
||||
|
||||
for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
|
||||
struct sqtt_spm_counter_info cntr_info = {
|
||||
.block = spm_trace->counters[c].gpu_block,
|
||||
.instance = spm_trace->counters[c].instance,
|
||||
.data_offset = counter_values_offset,
|
||||
.event_index = spm_trace->counters[c].event_id,
|
||||
};
|
||||
|
||||
file_offset += sizeof(cntr_info);
|
||||
fwrite(&cntr_info, sizeof(cntr_info), 1, output);
|
||||
|
||||
counter_values_offset += counter_values_size;
|
||||
}
|
||||
|
||||
/* SPM counter values. */
|
||||
uint32_t sample_size_in_hwords = sample_size_in_bytes / sizeof(uint16_t);
|
||||
uint16_t *counter_values_ptr = (uint16_t *)spm_data_ptr;
|
||||
|
||||
for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
|
||||
uint64_t offset = spm_trace->counters[c].offset;
|
||||
|
||||
for (uint32_t s = 0; s < num_samples; s++) {
|
||||
uint64_t index = offset + (s * sample_size_in_hwords);
|
||||
uint16_t value = counter_values_ptr[index];
|
||||
|
||||
file_offset += sizeof(value);
|
||||
fwrite(&value, sizeof(value), 1, output);
|
||||
}
|
||||
}
|
||||
|
||||
/* SQTT SPM DB chunk. */
|
||||
ac_sqtt_fill_spm_db(spm_trace, &spm_db, num_samples,
|
||||
file_offset - file_spm_db_offset);
|
||||
fseek(output, file_spm_db_offset, SEEK_SET);
|
||||
fwrite(&spm_db, sizeof(struct sqtt_file_chunk_spm_db), 1, output);
|
||||
fseek(output, file_offset, SEEK_SET);
|
||||
}
|
||||
|
||||
static void ac_sqtt_dump_data(struct radeon_info *rad_info,
|
||||
struct ac_thread_trace *thread_trace,
|
||||
const struct ac_spm_trace_data *spm_trace,
|
||||
FILE *output)
|
||||
{
|
||||
struct ac_thread_trace_data *thread_trace_data = thread_trace->data;
|
||||
@ -1071,10 +1182,15 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info,
|
||||
fwrite(se->data_ptr, size, 1, output);
|
||||
}
|
||||
}
|
||||
|
||||
if (spm_trace) {
|
||||
ac_sqtt_dump_spm(spm_trace, file_offset, output);
|
||||
}
|
||||
}
|
||||
|
||||
int ac_dump_rgp_capture(struct radeon_info *info,
|
||||
struct ac_thread_trace *thread_trace)
|
||||
struct ac_thread_trace *thread_trace,
|
||||
const struct ac_spm_trace_data *spm_trace)
|
||||
{
|
||||
char filename[2048];
|
||||
struct tm now;
|
||||
@ -1092,7 +1208,7 @@ int ac_dump_rgp_capture(struct radeon_info *info,
|
||||
if (!f)
|
||||
return -1;
|
||||
|
||||
ac_sqtt_dump_data(info, thread_trace, f);
|
||||
ac_sqtt_dump_data(info, thread_trace, spm_trace, f);
|
||||
|
||||
fprintf(stderr, "RGP capture saved to '%s'\n", filename);
|
||||
|
||||
|
@ -34,6 +34,7 @@
|
||||
struct radeon_info;
|
||||
struct ac_thread_trace;
|
||||
struct ac_thread_trace_data;
|
||||
struct ac_spm_trace_data;
|
||||
|
||||
enum rgp_hardware_stages {
|
||||
RGP_HW_STAGE_VS = 0,
|
||||
@ -189,7 +190,8 @@ struct rgp_clock_calibration {
|
||||
|
||||
int
|
||||
ac_dump_rgp_capture(struct radeon_info *info,
|
||||
struct ac_thread_trace *thread_trace);
|
||||
struct ac_thread_trace *thread_trace,
|
||||
const struct ac_spm_trace_data *spm_trace);
|
||||
|
||||
void
|
||||
ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,
|
||||
|
@ -363,7 +363,7 @@ radv_handle_thread_trace(VkQueue _queue)
|
||||
radv_QueueWaitIdle(_queue);
|
||||
|
||||
if (radv_get_thread_trace(queue, &thread_trace)) {
|
||||
ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace);
|
||||
ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, NULL);
|
||||
} else {
|
||||
/* Trigger a new capture if the driver failed to get
|
||||
* the trace because the buffer was too small.
|
||||
|
@ -710,7 +710,7 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
|
||||
/* Wait for SQTT to finish and read back the bo */
|
||||
if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
|
||||
si_get_thread_trace(sctx, &thread_trace)) {
|
||||
ac_dump_rgp_capture(&sctx->screen->info, &thread_trace);
|
||||
ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, NULL);
|
||||
} else {
|
||||
fprintf(stderr, "Failed to read the trace\n");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user