ac/rgp: add support for dumping SPM data

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13704>
This commit is contained in:
Samuel Pitoiset 2021-05-31 16:42:36 +02:00 committed by Marge Bot
parent e928f475cc
commit 3e7bac80ce
4 changed files with 123 additions and 5 deletions

View File

@ -29,6 +29,7 @@
#include "util/u_process.h"
#include "util/u_math.h"
#include "ac_spm.h"
#include "ac_sqtt.h"
#include "ac_gpu_info.h"
#ifdef _WIN32
@ -892,8 +893,118 @@ static enum elf_gfxip_level ac_chip_class_to_elf_gfxip_level(enum chip_class chi
}
}
/**
* SQTT SPM DB info.
*/
struct sqtt_spm_counter_info {
enum ac_pc_gpu_block block;
uint32_t instance;
uint32_t data_offset; /* offset of counter from the beginning of the chunk */
uint32_t event_index; /* index of counter within the block */
};
struct sqtt_file_chunk_spm_db {
struct sqtt_file_chunk_header header;
uint32_t flags;
uint32_t num_timestamps;
uint32_t num_spm_counter_info;
uint32_t sample_interval;
};
static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 32,
"sqtt_file_chunk_spm_db doesn't match RGP spec");
static void ac_sqtt_fill_spm_db(const struct ac_spm_trace_data *spm_trace,
struct sqtt_file_chunk_spm_db *chunk,
uint32_t num_samples,
uint32_t chunk_size)
{
chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_SPM_DB;
chunk->header.chunk_id.index = 0;
chunk->header.major_version = 1;
chunk->header.minor_version = 3;
chunk->header.size_in_bytes = chunk_size;
chunk->flags = 0;
chunk->num_timestamps = num_samples;
chunk->num_spm_counter_info = spm_trace->num_counters;
chunk->sample_interval = spm_trace->sample_interval;
}
static void ac_sqtt_dump_spm(const struct ac_spm_trace_data *spm_trace,
size_t file_offset,
FILE *output)
{
uint32_t sample_size_in_bytes = ac_spm_get_sample_size(spm_trace);
uint32_t num_samples = ac_spm_get_num_samples(spm_trace);
uint8_t *spm_data_ptr = (uint8_t *)spm_trace->ptr;
struct sqtt_file_chunk_spm_db spm_db;
size_t file_spm_db_offset = file_offset;
fseek(output, sizeof(struct sqtt_file_chunk_spm_db), SEEK_CUR);
file_offset += sizeof(struct sqtt_file_chunk_spm_db);
/* Skip the reserved 32 bytes of data at beginning. */
spm_data_ptr += 32;
/* SPM timestamps. */
uint32_t sample_size_in_qwords = sample_size_in_bytes / sizeof(uint64_t);
uint64_t *timestamp_ptr = (uint64_t *)spm_data_ptr;
for (uint32_t s = 0; s < num_samples; s++) {
uint64_t index = s * sample_size_in_qwords;
uint64_t timestamp = timestamp_ptr[index];
file_offset += sizeof(timestamp);
fwrite(&timestamp, sizeof(timestamp), 1, output);
}
/* SPM counter info. */
uint64_t counter_values_size = num_samples * sizeof(uint16_t);
uint64_t counter_values_offset = num_samples * sizeof(uint64_t) +
spm_trace->num_counters * sizeof(struct sqtt_spm_counter_info);
for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
struct sqtt_spm_counter_info cntr_info = {
.block = spm_trace->counters[c].gpu_block,
.instance = spm_trace->counters[c].instance,
.data_offset = counter_values_offset,
.event_index = spm_trace->counters[c].event_id,
};
file_offset += sizeof(cntr_info);
fwrite(&cntr_info, sizeof(cntr_info), 1, output);
counter_values_offset += counter_values_size;
}
/* SPM counter values. */
uint32_t sample_size_in_hwords = sample_size_in_bytes / sizeof(uint16_t);
uint16_t *counter_values_ptr = (uint16_t *)spm_data_ptr;
for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
uint64_t offset = spm_trace->counters[c].offset;
for (uint32_t s = 0; s < num_samples; s++) {
uint64_t index = offset + (s * sample_size_in_hwords);
uint16_t value = counter_values_ptr[index];
file_offset += sizeof(value);
fwrite(&value, sizeof(value), 1, output);
}
}
/* SQTT SPM DB chunk. */
ac_sqtt_fill_spm_db(spm_trace, &spm_db, num_samples,
file_offset - file_spm_db_offset);
fseek(output, file_spm_db_offset, SEEK_SET);
fwrite(&spm_db, sizeof(struct sqtt_file_chunk_spm_db), 1, output);
fseek(output, file_offset, SEEK_SET);
}
static void ac_sqtt_dump_data(struct radeon_info *rad_info,
struct ac_thread_trace *thread_trace,
const struct ac_spm_trace_data *spm_trace,
FILE *output)
{
struct ac_thread_trace_data *thread_trace_data = thread_trace->data;
@ -1071,10 +1182,15 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info,
fwrite(se->data_ptr, size, 1, output);
}
}
if (spm_trace) {
ac_sqtt_dump_spm(spm_trace, file_offset, output);
}
}
int ac_dump_rgp_capture(struct radeon_info *info,
struct ac_thread_trace *thread_trace)
struct ac_thread_trace *thread_trace,
const struct ac_spm_trace_data *spm_trace)
{
char filename[2048];
struct tm now;
@ -1092,7 +1208,7 @@ int ac_dump_rgp_capture(struct radeon_info *info,
if (!f)
return -1;
ac_sqtt_dump_data(info, thread_trace, f);
ac_sqtt_dump_data(info, thread_trace, spm_trace, f);
fprintf(stderr, "RGP capture saved to '%s'\n", filename);

View File

@ -34,6 +34,7 @@
struct radeon_info;
struct ac_thread_trace;
struct ac_thread_trace_data;
struct ac_spm_trace_data;
enum rgp_hardware_stages {
RGP_HW_STAGE_VS = 0,
@ -189,7 +190,8 @@ struct rgp_clock_calibration {
int
ac_dump_rgp_capture(struct radeon_info *info,
struct ac_thread_trace *thread_trace);
struct ac_thread_trace *thread_trace,
const struct ac_spm_trace_data *spm_trace);
void
ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,

View File

@ -363,7 +363,7 @@ radv_handle_thread_trace(VkQueue _queue)
radv_QueueWaitIdle(_queue);
if (radv_get_thread_trace(queue, &thread_trace)) {
ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace);
ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, NULL);
} else {
/* Trigger a new capture if the driver failed to get
* the trace because the buffer was too small.

View File

@ -710,7 +710,7 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
/* Wait for SQTT to finish and read back the bo */
if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
si_get_thread_trace(sctx, &thread_trace)) {
ac_dump_rgp_capture(&sctx->screen->info, &thread_trace);
ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, NULL);
} else {
fprintf(stderr, "Failed to read the trace\n");
}