initial profiler support for getting full costs for subroutine calls

includes:
- common code for collecting, processing and outputting
  the data about the full costs
- full CPU side support
- related structure changes in DSP code
- some extra refactoring to improve code CPU/DSP profiling code
- post-processor support for parsing the new data
  and storing it internally

missing:
- DSP side code (opcode classification) needed for
  getting the full costs
- post-processor actually using the new data
- testing the new data correctness
This commit is contained in:
Eero Tamminen 2013-03-18 22:02:22 +02:00
parent e5fd99c8c3
commit 57694ea262
5 changed files with 513 additions and 250 deletions

View File

@ -11,6 +11,7 @@
const char Profile_fileid[] = "Hatari profile.c : " __DATE__ " " __TIME__;
#include <stdio.h>
#include <assert.h>
#include "main.h"
#include "version.h"
#include "debugui.h"
@ -44,17 +45,60 @@ static const struct {
*/
static int cmp_callers(const void *c1, const void *c2)
{
Uint32 count1 = ((const caller_t*)c1)->count;
Uint32 count2 = ((const caller_t*)c2)->count;
if (count1 > count2) {
Uint32 calls1 = ((const caller_t*)c1)->calls;
Uint32 calls2 = ((const caller_t*)c2)->calls;
if (calls1 > calls2) {
return -1;
}
if (count1 < count2) {
if (calls1 < calls2) {
return 1;
}
return 0;
}
/**
* output caller counter information
*/
static bool output_counter_info(FILE *fp, counters_t *counter)
{
if (!counter->count) {
return false;
}
fprintf(fp, " %lld/%lld/%lld", counter->calls, counter->count, counter->cycles);
if (counter->misses) {
/* these are only with specific WinUAE CPU core */
fprintf(fp, "/%lld", counter->misses);
}
return true;
}
/**
* output caller call counts, call type(s) and costs
*/
static void output_caller_info(FILE *fp, caller_t *info, Uint32 *typeaddr)
{
int k, typecount;
fprintf(fp, "0x%x = %d", info->addr, info->calls);
if (info->flags) { /* calltypes supported? */
fputc(' ', fp);
typecount = 0;
for (k = 0; k < ARRAYSIZE(flaginfo); k++) {
if (info->flags & flaginfo[k].bit) {
fputc(flaginfo[k].chr, fp);
typecount++;
}
}
if (typecount > 1) {
*typeaddr = info->addr;
}
}
if (output_counter_info(fp, &(info->all))) {
output_counter_info(fp, &(info->own));
}
fputs(", ", fp);
}
/*
* Show collected CPU/DSP callee/caller information.
*
@ -63,19 +107,19 @@ static int cmp_callers(const void *c1, const void *c2)
*/
void Profile_ShowCallers(FILE *fp, int sites, callee_t *callsite, const char * (*addr2name)(Uint32, Uint64 *))
{
int typecount, countissues, countdiff;
int i, j, k;
int i, j, countissues, countdiff;
const char *name;
caller_t *info;
Uint64 total;
Uint32 addr, typeaddr;
/* legend */
fputs("# ", fp);
for (k = 0; k < ARRAYSIZE(flaginfo); k++) {
fprintf(fp, "%c = %s, ", flaginfo[k].chr, flaginfo[k].info);
fputs("# <callee>: <caller1> = <calls> <types>[ <inclusive/totals>[ <exclusive/totals>]], <caller2> ..., <callee name>", fp);
fputs("\n# types: ", fp);
for (i = 0; i < ARRAYSIZE(flaginfo); i++) {
fprintf(fp, "%c = %s, ", flaginfo[i].chr, flaginfo[i].info);
}
fputs("\n", fp);
fputs("- = info missing\n# totals: calls/instructions/cycles/misses\n", fp);
countdiff = 0;
countissues = 0;
@ -94,25 +138,11 @@ void Profile_ShowCallers(FILE *fp, int sites, callee_t *callsite, const char * (
if (!info->addr) {
break;
}
fprintf(fp, "0x%x = %d", info->addr, info->count);
total -= info->count;
if (info->flags) { /* calltypes supported? */
fputs(" ", fp);
typecount = 0;
for (k = 0; k < ARRAYSIZE(flaginfo); k++) {
if (info->flags & flaginfo[k].bit) {
fputc(flaginfo[k].chr, fp);
typecount++;
}
}
if (typecount > 1) {
typeaddr = info->addr;
}
}
fputs(", ", fp);
total -= info->calls;
output_caller_info(fp, info, &typeaddr);
}
if (name) {
fprintf(fp, "(%s)", name);
fprintf(fp, "%s", name);
}
fputs("\n", fp);
if (total) {
@ -138,14 +168,61 @@ void Profile_ShowCallers(FILE *fp, int sites, callee_t *callsite, const char * (
}
}
/**
* Update CPU/DSP callee / caller information, if called address contains
* symbol address (= function, or other interesting place in code)
* add second counter values to first counters
*/
void Profile_UpdateCaller(callee_t *callsite, Uint32 pc, Uint32 caller, calltype_t flag)
static void add_counter_costs(counters_t *dst, counters_t *src)
{
dst->calls += src->calls;
dst->count += src->count;
dst->cycles += src->cycles;
dst->misses += src->misses;
}
/**
* set first counter values to their difference from a reference value
*/
static void set_counter_diff(counters_t *dst, counters_t *ref)
{
dst->calls = ref->calls - dst->calls;
dst->count = ref->count - dst->count;
dst->cycles = ref->cycles - dst->cycles;
dst->misses = ref->misses - dst->misses;
}
/**
* add called (callee) function costs to caller information
*/
static void add_callee_cost(callee_t *callsite, callstack_t *stack)
{
caller_t *info = callsite->callers;
counters_t owncost;
int i;
for (i = 0; i < callsite->count; i++, info++) {
if (info->addr == stack->caller_addr) {
/* own cost for callee is its child (out) costs
* deducted from full (all) costs
*/
owncost = stack->out;
set_counter_diff(&owncost, &(stack->all));
add_counter_costs(&(info->own), &owncost);
add_counter_costs(&(info->all), &(stack->all));
return;
}
}
/* cost is only added for updated callers,
* so they should always exist
*/
assert(0);
}
static void add_caller(callee_t *callsite, Uint32 pc, Uint32 prev_pc, calltype_t flag)
{
int i, count;
caller_t *info;
int i, count;
/* need to store real call addresses as symbols can change
* after profiling has been stopped
@ -166,16 +243,16 @@ void Profile_UpdateCaller(callee_t *callsite, Uint32 pc, Uint32 caller, calltype
count = callsite->count;
for (;;) {
for (i = 0; i < count; i++, info++) {
if (info->addr == caller) {
if (info->addr == prev_pc) {
info->flags |= flag;
info->count++;
info->calls++;
return;
}
if (!info->addr) {
/* empty slot */
info->addr = caller;
info->addr = prev_pc;
info->flags |= flag;
info->count = 1;
info->calls = 1;
return;
}
}
@ -192,29 +269,151 @@ void Profile_UpdateCaller(callee_t *callsite, Uint32 pc, Uint32 caller, calltype
}
}
int Profile_AllocCallerInfo(const char *info, int oldcount, int count, callee_t **callsite)
static void subcall_start_check(int idx, callinfo_t *callinfo, Uint32 prev_pc, calltype_t flag, Uint32 pc, Uint32 ret_pc, counters_t *runcounts)
{
if (*callsite) {
/* free old data */
int i;
callee_t *site = *callsite;
for (i = 0; i < oldcount; i++, site++) {
callstack_t *stack;
int count;
if (flag != CALL_SUBROUTINE) {
return;
}
runcounts->calls++;
if (unlikely(!callinfo->count)) {
/* initial stack alloc, can be a bit larger */
count = 8;
stack = calloc(count, sizeof(*stack));
if (!stack) {
fputs("ERROR: callstack alloc failed!\n", stderr);
return;
}
callinfo->stack = stack;
callinfo->count = count;
} else if (unlikely(callinfo->depth+1 >= callinfo->count)) {
/* need to alloc more stack space for new call? */
count = callinfo->count * 2;
stack = realloc(callinfo->stack, count * sizeof(*stack));
if (!stack) {
fputs("ERROR: callstack alloc failed!\n", stderr);
return;
}
memset(stack + callinfo->count, 0, callinfo->count * sizeof(*stack));
callinfo->stack = stack;
callinfo->count = count;
}
/* called function */
stack = &(callinfo->stack[callinfo->depth++]);
/* store current running totals & zero subcall costs */
stack->all = *runcounts;
memset(&(stack->out), 0, sizeof(stack->out));
/* set subroutine call information */
assert(ret_pc);
stack->ret_addr = ret_pc;
stack->callee_idx = idx;
stack->caller_addr = prev_pc;
stack->callee_addr = pc;
}
static void subcall_end_check(callinfo_t *callinfo, Uint32 prev_pc, calltype_t flag, Uint32 pc, Uint32 ret_pc, counters_t *runcounts)
{
callstack_t *stack;
if (!callinfo->depth) {
/* no calls yet */
return;
}
stack = &(callinfo->stack[callinfo->depth-1]);
if (likely(pc != stack->ret_addr)) {
/* not return address */
return;
}
if (unlikely(flag == CALL_SUBROUTINE)) {
/* return address, but not return
* (EmuTOS perversity: JSR back from JSR)
*/
return;
}
/* remove call info from stack */
callinfo->depth--;
if (flag != CALL_SUBRETURN) {
/* wasn't a real subroutine call... */
if (flag == CALL_EXCRETURN) {
/* back from interrupt handler, ignore whole call */
return;
}
/* ...but a mystery to debug */
fprintf(stderr, "WARNING: subroutine call from 0x%x -> 0x%x returned 0x%x -> 0x%x!\n",
stack->caller_addr, stack->callee_addr, prev_pc, pc);
DebugUI(REASON_USER);
return;
}
/* full cost is orignal global cost (in ->all)
* deducted from current global (runcost) cost
*/
set_counter_diff(&(stack->all), runcounts);
add_callee_cost(callinfo->site + stack->callee_idx, stack);
/* add full cost of this to parent caller's outside costs */
if (callinfo->depth) {
callstack_t *pstack = stack - 1;
add_counter_costs(&(pstack->out), &(stack->all));
}
}
/**
* Update CPU/DSP callee / caller information, if called address contains
* symbol address (= function, or other interesting place in code)
*/
void Profile_UpdateCallinfo(int idx, callinfo_t *callinfo, Uint32 prev_pc, calltype_t flag, Uint32 pc, Uint32 ret_pc, counters_t *runcounts)
{
if (unlikely(idx >= 0 && idx < callinfo->sites)) {
subcall_start_check(idx, callinfo, prev_pc, flag, pc, ret_pc, runcounts);
add_caller(callinfo->site + idx, pc, prev_pc, flag);
} else {
subcall_end_check(callinfo, prev_pc, flag, pc, ret_pc, runcounts);
}
}
int Profile_AllocCallinfo(callinfo_t *callinfo, int count, const char *name)
{
callinfo->sites = count;
if (count) {
/* alloc & clear new data */
callinfo->site = calloc(count, sizeof(callee_t));
if (callinfo->site) {
printf("Allocated %s profile callsite buffer for %d symbols.\n", name, count);
} else {
fprintf(stderr, "ERROR: callesite buffer alloc failed!\n");
callinfo->sites = 0;
}
}
return callinfo->sites;
}
void Profile_FreeCallinfo(callinfo_t *callinfo)
{
int i;
if (callinfo->sites) {
callee_t *site = callinfo->site;
for (i = 0; i < callinfo->sites; i++, site++) {
if (site->callers) {
free(site->callers);
}
}
free(*callsite);
*callsite = NULL;
}
if (count) {
/* alloc & clear new data */
*callsite = calloc(count, sizeof(callee_t));
if (*callsite) {
printf("Allocated %s profile callsite buffer for %d symbols.\n", info, count);
return count;
free(callinfo->site);
if (callinfo->stack) {
free(callinfo->stack);
}
memset(callinfo, 0, sizeof(*callinfo));
}
return 0;
}

View File

@ -21,11 +21,26 @@ typedef enum {
CALL_INTERRUPT = 128
} calltype_t;
typedef struct {
Uint64 calls, count, cycles, misses;
} counters_t;
typedef struct {
int callee_idx; /* index of called function */
Uint32 ret_addr; /* address after returning from call */
Uint32 caller_addr; /* remove informational caller address */
Uint32 callee_addr; /* remove informational callee address */
counters_t all; /* totals including everything called code does */
counters_t out; /* totals for subcalls done from callee */
} callstack_t;
/* callee/caller information */
typedef struct {
calltype_t flags:8; /* what kind of call it was */
unsigned int addr:24; /* address for the caller */
Uint32 count; /* number of calls */
Uint32 calls; /* number of calls, exclusive */
counters_t all; /* totals including everything called code does */
counters_t own; /* totals excluding called code (=sum(all-out)) */
} caller_t;
typedef struct {
@ -34,19 +49,29 @@ typedef struct {
caller_t *callers; /* who called this address */
} callee_t;
typedef struct {
int sites; /* number of symbol callsites */
int depth; /* how many callstack calls haven't yet returned */
int count; /* number of items allocated for stack */
callee_t *site; /* symbol specific caller information */
callstack_t *stack; /* calls that will return */
} callinfo_t;
/* CPU/DSP memory area statistics */
typedef struct {
Uint64 all_cycles, all_count, all_misses;
Uint64 max_cycles; /* for overflow check (cycles > count or misses) */
counters_t counters; /* counters for this area */
Uint32 lowest, highest; /* active address range within memory area */
int active; /* number of active addresses */
bool overflow; /* whether counters overflowed */
} profile_area_t;
/* generic profile caller functions */
extern void Profile_ShowCallers(FILE *fp, int sites, callee_t *callsite, const char * (*addr2name)(Uint32, Uint64 *));
extern int Profile_AllocCallerInfo(const char *info, int oldcount, int count, callee_t **callsite);
extern void Profile_UpdateCaller(callee_t *callsite, Uint32 pc, Uint32 caller, calltype_t flag);
extern void Profile_UpdateCallinfo(int idx, callinfo_t *callinfo, Uint32 prev_pc, calltype_t flag, Uint32 pc, Uint32 ret_pc, counters_t *runcounts);
extern int Profile_AllocCallinfo(callinfo_t *callinfo, int count, const char *info);
extern void Profile_FreeCallinfo(callinfo_t *callinfo);
/* parser helpers */
extern void Profile_CpuGetPointers(bool **enabled, Uint32 **disasm_addr);

View File

@ -27,6 +27,8 @@ const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
/* if non-zero, output (more) warnings on syspicious cycle/instruction counts */
#define DEBUG 0
static callinfo_t cpu_callinfo;
/* This is relevant with WinUAE CPU core:
* - the default cycle exact variant needs this define to be non-zero
* - non-cycle exact and MMU variants need this define to be 0
@ -37,7 +39,7 @@ const char Profilecpu_fileid[] = "Hatari profilecpu.c : " __DATE__ " " __TIME__;
#define MAX_CPU_PROFILE_VALUE 0xFFFFFFFF
typedef struct {
Uint32 count; /* how many times this address is used */
Uint32 count; /* how many times this address instrcution is executed */
Uint32 cycles; /* how many CPU cycles was taken at this address */
Uint32 misses; /* how many CPU cache misses happened at this address */
} cpu_profile_item_t;
@ -45,16 +47,14 @@ typedef struct {
#define MAX_MISS 4
static struct {
Uint64 all_cycles, all_count, all_misses;
Uint32 miss_counts[MAX_MISS];
counters_t all; /* total counts for all areas */
Uint32 miss_counts[MAX_MISS]; /* cache miss counts */
cpu_profile_item_t *data; /* profile data items */
Uint32 size; /* number of allocated profile data items */
profile_area_t ram; /* normal RAM stats */
profile_area_t rom; /* cartridge ROM stats */
profile_area_t tos; /* ROM TOS stats */
int active; /* number of active data items in all areas */
int sites; /* number of symbol callsites */
callee_t *callsite; /* symbol specific caller information */
Uint32 *sort_arr; /* data indexes used for sorting */
Uint32 prev_cycles; /* previous instruction cycles counter */
Uint32 prev_pc; /* previous instruction address */
@ -129,8 +129,8 @@ bool Profile_CpuAddressData(Uint32 addr, float *percentage, Uint32 *count, Uint3
*misses = cpu_profile.data[idx].misses;
*cycles = cpu_profile.data[idx].cycles;
*count = cpu_profile.data[idx].count;
if (cpu_profile.all_count) {
*percentage = 100.0*(*count)/cpu_profile.all_count;
if (cpu_profile.all.count) {
*percentage = 100.0*(*count)/cpu_profile.all.count;
} else {
*percentage = 0.0;
}
@ -153,20 +153,20 @@ static void show_cpu_area_stats(profile_area_t *area)
area->active,
100.0 * area->active / cpu_profile.active);
fprintf(stderr, "- executed instructions:\n %llu (%.2f%% of all)\n",
area->all_count,
100.0 * area->all_count / cpu_profile.all_count);
area->counters.count,
100.0 * area->counters.count / cpu_profile.all.count);
#if ENABLE_WINUAE_CPU
if (cpu_profile.all_misses) { /* CPU cache in use? */
if (cpu_profile.all.misses) { /* CPU cache in use? */
fprintf(stderr, "- instruction cache misses:\n %llu (%.2f%% of all)\n",
area->all_misses,
100.0 * area->all_misses / cpu_profile.all_misses);
area->counters.misses,
100.0 * area->counters.misses / cpu_profile.all.misses);
}
#endif
fprintf(stderr, "- used cycles:\n %llu (%.2f%% of all)\n = %.5fs\n",
area->all_cycles,
100.0 * area->all_cycles / cpu_profile.all_cycles,
(double)area->all_cycles / MachineClocks.CPU_Freq);
if (area->max_cycles == MAX_CPU_PROFILE_VALUE) {
area->counters.cycles,
100.0 * area->counters.cycles / cpu_profile.all.cycles,
(double)area->counters.cycles / MachineClocks.CPU_Freq);
if (area->overflow) {
fprintf(stderr, " *** COUNTER OVERFLOW! ***\n");
}
}
@ -187,10 +187,10 @@ void Profile_CpuShowStats(void)
show_cpu_area_stats(&cpu_profile.rom);
fprintf(stderr, "\n= %.5fs\n",
(double)cpu_profile.all_cycles / MachineClocks.CPU_Freq);
(double)cpu_profile.all.cycles / MachineClocks.CPU_Freq);
#if ENABLE_WINUAE_CPU
if (cpu_profile.all_misses) { /* CPU cache in use? */
if (cpu_profile.all.misses) { /* CPU cache in use? */
int i;
fprintf(stderr, "\nCache misses per instruction, number of occurrences:\n");
for (i = 0; i < MAX_MISS; i++) {
@ -309,7 +309,7 @@ void Profile_CpuShowMisses(int show)
for (end = sort_arr + show; sort_arr < end; sort_arr++) {
addr = index2address(*sort_arr);
count = data[*sort_arr].misses;
percentage = 100.0*count/cpu_profile.all_misses;
percentage = 100.0*count/cpu_profile.all.misses;
printf("0x%06x\t%.2f%%\t%d%s\n", addr, percentage, count,
count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
}
@ -357,7 +357,7 @@ void Profile_CpuShowCycles(int show)
for (end = sort_arr + show; sort_arr < end; sort_arr++) {
addr = index2address(*sort_arr);
count = data[*sort_arr].cycles;
percentage = 100.0*count/cpu_profile.all_cycles;
percentage = 100.0*count/cpu_profile.all.cycles;
printf("0x%06x\t%.2f%%\t%d%s\n", addr, percentage, count,
count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
}
@ -410,7 +410,7 @@ void Profile_CpuShowCounts(int show, bool only_symbols)
for (end = sort_arr + show; sort_arr < end; sort_arr++) {
addr = index2address(*sort_arr);
count = data[*sort_arr].count;
percentage = 100.0*count/cpu_profile.all_count;
percentage = 100.0*count/cpu_profile.all.count;
printf("0x%06x\t%.2f%%\t%d%s\n",
addr, percentage, count,
count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
@ -435,7 +435,7 @@ void Profile_CpuShowCounts(int show, bool only_symbols)
continue;
}
count = data[*sort_arr].count;
percentage = 100.0*count/cpu_profile.all_count;
percentage = 100.0*count/cpu_profile.all.count;
printf("0x%06x\t%.2f%%\t%d\t%s%s\n",
addr, percentage, count, name,
count == MAX_CPU_PROFILE_VALUE ? " (OVERFLOW)" : "");
@ -461,7 +461,7 @@ static const char * addr2name(Uint32 addr, Uint64 *total)
*/
void Profile_CpuShowCallers(FILE *fp)
{
Profile_ShowCallers(fp, cpu_profile.sites, cpu_profile.callsite, addr2name);
Profile_ShowCallers(fp, cpu_callinfo.sites, cpu_callinfo.site, addr2name);
}
/**
@ -494,6 +494,9 @@ void Profile_CpuSave(FILE *out)
*/
bool Profile_CpuStart(void)
{
int size;
Profile_FreeCallinfo(&(cpu_callinfo));
if (cpu_profile.sort_arr) {
/* remove previous results */
free(cpu_profile.sort_arr);
@ -505,30 +508,36 @@ bool Profile_CpuStart(void)
if (!cpu_profile.enabled) {
return false;
}
/* zero everything */
memset(&cpu_profile, 0, sizeof(cpu_profile));
/* Shouldn't change within same debug session */
cpu_profile.size = (STRamEnd + 0x20000 + TosSize) / 2;
size = (STRamEnd + 0x20000 + TosSize) / 2;
/* Add one entry for catching invalid PC values */
cpu_profile.data = calloc(cpu_profile.size+1, sizeof(*cpu_profile.data));
if (cpu_profile.data) {
printf("Allocated CPU profile buffer (%d MB).\n",
(int)sizeof(*cpu_profile.data)*cpu_profile.size/(1024*1024));
} else {
cpu_profile.data = calloc(size + 1, sizeof(*cpu_profile.data));
if (!cpu_profile.data) {
perror("ERROR, new CPU profile buffer alloc failed");
cpu_profile.enabled = false;
return false;
}
cpu_profile.sites = Profile_AllocCallerInfo("CPU", cpu_profile.sites, Symbols_CpuCount(), &(cpu_profile.callsite));
printf("Allocated CPU profile buffer (%d MB).\n",
(int)sizeof(*cpu_profile.data)*size/(1024*1024));
cpu_profile.size = size;
Profile_AllocCallinfo(&(cpu_callinfo), Symbols_CpuCount(), "CPU");
memset(cpu_profile.miss_counts, 0, sizeof(cpu_profile.miss_counts));
cpu_profile.prev_cycles = Cycles_GetCounter(CYCLES_COUNTER_CPU);
cpu_profile.prev_pc = M68000_GetPC();
cpu_profile.disasm_addr = 0;
cpu_profile.processed = false;
cpu_profile.enabled = true;
return cpu_profile.enabled;
}
/* return branch type based on caller instruction type */
/**
* return caller instruction type classification
*/
static calltype_t cpu_opcode_type(Uint32 prev_pc, Uint32 pc)
{
switch (OpcodeFamily) {
@ -570,7 +579,6 @@ static calltype_t cpu_opcode_type(Uint32 prev_pc, Uint32 pc)
return CALL_UNKNOWN;
}
/**
* Update CPU cycle and count statistics for PC address.
*
@ -588,15 +596,6 @@ void Profile_CpuUpdate(void)
prev_pc = cpu_profile.prev_pc;
cpu_profile.prev_pc = pc = M68000_GetPC();
if (cpu_profile.sites) {
int idx = Symbols_GetCpuAddressIndex(pc);
if (unlikely(idx >= 0 && idx < cpu_profile.sites)) {
calltype_t flag = cpu_opcode_type(prev_pc, pc);
Profile_UpdateCaller(cpu_profile.callsite + idx,
pc, prev_pc, flag);
}
}
idx = address2index(prev_pc);
assert(idx <= cpu_profile.size);
prev = cpu_profile.data + idx;
@ -604,6 +603,7 @@ void Profile_CpuUpdate(void)
if (likely(prev->count < MAX_CPU_PROFILE_VALUE)) {
prev->count++;
}
cpu_profile.all.count++;
#if USE_CYCLES_COUNTER
/* Confusingly, with DSP enabled, cycle counter is for this instruction,
@ -627,6 +627,7 @@ void Profile_CpuUpdate(void)
} else {
prev->cycles = MAX_CPU_PROFILE_VALUE;
}
cpu_profile.all.cycles += cycles;
#if ENABLE_WINUAE_CPU
misses = CpuInstruction.iCacheMisses;
@ -637,6 +638,7 @@ void Profile_CpuUpdate(void)
} else {
prev->misses = MAX_CPU_PROFILE_VALUE;
}
cpu_profile.all.misses += misses;
#endif
#if DEBUG
@ -657,13 +659,27 @@ void Profile_CpuUpdate(void)
Disasm(stderr, prev_pc, &nextpc, 1);
}
#endif
if (cpu_callinfo.sites) {
int symidx;
Uint32 ret_pc;
calltype_t flag = cpu_opcode_type(prev_pc, pc);
if (flag == CALL_SUBROUTINE) {
/* slow, so needs to be checked only when needed */
ret_pc = Disasm_GetNextPC(prev_pc);
} else {
ret_pc = 0;
}
symidx = Symbols_GetCpuAddressIndex(pc);
Profile_UpdateCallinfo(symidx, &cpu_callinfo, prev_pc, flag, pc, ret_pc, &(cpu_profile.all));
}
}
/**
* Helper for collecting CPU profile area statistics.
* Helper for accounting CPU profile area item.
*/
static void update_cpu_area(Uint32 addr, cpu_profile_item_t *item, profile_area_t *area)
static void update_area_item(profile_area_t *area, Uint32 addr, cpu_profile_item_t *item)
{
Uint32 cycles = item->cycles;
Uint32 count = item->count;
@ -671,14 +687,13 @@ static void update_cpu_area(Uint32 addr, cpu_profile_item_t *item, profile_area_
if (!count) {
return;
}
area->all_count += count;
area->all_misses += item->misses;
area->all_cycles += cycles;
area->counters.count += count;
area->counters.misses += item->misses;
area->counters.cycles += cycles;
if (cycles > area->max_cycles) {
area->max_cycles = cycles;
if (cycles == MAX_CPU_PROFILE_VALUE) {
area->overflow = true;
}
if (addr < area->lowest) {
area->lowest = addr;
}
@ -687,6 +702,40 @@ static void update_cpu_area(Uint32 addr, cpu_profile_item_t *item, profile_area_
area->active++;
}
/**
* Helper for collecting CPU profile area statistics.
*/
static Uint32 update_area(profile_area_t *area, Uint32 start, Uint32 end)
{
cpu_profile_item_t *item;
Uint32 addr;
memset(area, 0, sizeof(profile_area_t));
area->lowest = cpu_profile.size;
item = &(cpu_profile.data[start]);
for (addr = start; addr < end; addr++, item++) {
update_area_item(area, addr, item);
}
return addr;
}
/**
* Helper for initializing CPU profile area sorting indexes.
*/
static Uint32* index_area(profile_area_t *area, Uint32 *sort_arr)
{
cpu_profile_item_t *item;
Uint32 addr;
item = &(cpu_profile.data[area->lowest]);
for (addr = area->lowest; addr <= area->highest; addr++, item++) {
if (item->count) {
*sort_arr++ = addr;
}
}
return sort_arr;
}
/**
* Stop and process the CPU profiling data; collect stats and
@ -694,11 +743,8 @@ static void update_cpu_area(Uint32 addr, cpu_profile_item_t *item, profile_area_
*/
void Profile_CpuStop(void)
{
cpu_profile_item_t *item;
profile_area_t *area;
Uint32 *sort_arr;
Uint32 *sort_arr, next;
int active;
Uint32 i;
if (cpu_profile.processed || !cpu_profile.enabled) {
return;
@ -706,39 +752,15 @@ void Profile_CpuStop(void)
/* user didn't change RAM or TOS size in the meanwhile? */
assert(cpu_profile.size == (STRamEnd + 0x20000 + TosSize) / 2);
/* find lowest and highest addresses executed... */
item = cpu_profile.data;
/* find lowest and highest addresses executed etc */
next = update_area(&cpu_profile.ram, 0, STRamEnd/2);
next = update_area(&cpu_profile.tos, next, (STRamEnd + TosSize)/2);
next = update_area(&cpu_profile.ram, next, cpu_profile.size);
assert(next == cpu_profile.size);
/* ...for normal RAM */
area = &cpu_profile.ram;
memset(area, 0, sizeof(profile_area_t));
area->lowest = cpu_profile.size;
for (i = 0; i < STRamEnd/2; i++, item++) {
update_cpu_area(i, item, area);
}
/* ...for ROM TOS */
area = &cpu_profile.tos;
memset(area, 0, sizeof(profile_area_t));
area->lowest = cpu_profile.size;
for (; i < (STRamEnd + TosSize)/2; i++, item++) {
update_cpu_area(i, item, area);
}
/* ... for Cartridge ROM */
area = &cpu_profile.rom;
memset(area, 0, sizeof(profile_area_t));
area->lowest = cpu_profile.size;
for (; i < cpu_profile.size; i++, item++) {
update_cpu_area(i, item, area);
}
cpu_profile.all_misses = cpu_profile.ram.all_misses + cpu_profile.rom.all_misses + cpu_profile.tos.all_misses;
cpu_profile.all_cycles = cpu_profile.ram.all_cycles + cpu_profile.rom.all_cycles + cpu_profile.tos.all_cycles;
cpu_profile.all_count = cpu_profile.ram.all_count + cpu_profile.rom.all_count + cpu_profile.tos.all_count;
assert(cpu_profile.all.misses == cpu_profile.ram.counters.misses + cpu_profile.rom.counters.misses + cpu_profile.tos.counters.misses);
assert(cpu_profile.all.cycles == cpu_profile.ram.counters.cycles + cpu_profile.rom.counters.cycles + cpu_profile.tos.counters.cycles);
assert(cpu_profile.all.count = cpu_profile.ram.counters.count + cpu_profile.rom.counters.count + cpu_profile.tos.counters.count);
/* allocate address array for sorting */
active = cpu_profile.ram.active + cpu_profile.rom.active + cpu_profile.tos.active;
@ -756,33 +778,10 @@ void Profile_CpuStop(void)
cpu_profile.active = active;
/* and fill addresses for used instructions... */
/* ...for normal RAM */
area = &cpu_profile.ram;
item = cpu_profile.data + area->lowest;
for (i = area->lowest; i <= area->highest; i++, item++) {
if (item->count) {
*sort_arr++ = i;
}
}
/* ...for TOS ROM */
area = &cpu_profile.tos;
item = cpu_profile.data + area->lowest;
for (i = area->lowest; i <= area->highest; i++, item++) {
if (item->count) {
*sort_arr++ = i;
}
}
/* ...for Cartridge ROM */
area = &cpu_profile.rom;
item = cpu_profile.data + area->lowest;
for (i = area->lowest; i <= area->highest; i++, item++) {
if (item->count) {
*sort_arr++ = i;
}
}
sort_arr = index_area(&cpu_profile.ram, sort_arr);
sort_arr = index_area(&cpu_profile.tos, sort_arr);
sort_arr = index_area(&cpu_profile.rom, sort_arr);
assert(sort_arr == cpu_profile.sort_arr + cpu_profile.active);
//printf("%d/%d/%d\n", area->active, sort_arr-cpu_profile.sort_arr, active);
Profile_CpuShowStats();

View File

@ -19,6 +19,8 @@ const char Profiledsp_fileid[] = "Hatari profiledsp.c : " __DATE__ " " __TIME__;
#include "profile_priv.h"
#include "symbols.h"
static callinfo_t dsp_callinfo;
#define DSP_PROFILE_ARR_SIZE 0x10000
#define MAX_DSP_PROFILE_VALUE 0xFFFFFFFFFFFFFFFFLL
@ -31,9 +33,7 @@ typedef struct {
static struct {
dsp_profile_item_t *data; /* profile data */
profile_area_t ram; /* normal RAM stats */
int sites; /* number of symbol callsites */
callee_t *callsite; /* symbol specific caller information */
profile_area_t ram; /* statistics for whole memory */
Uint16 *sort_arr; /* data indexes used for sorting */
Uint16 prev_pc; /* previous PC for which the cycles are for */
Uint32 disasm_addr; /* 'dspaddresses' command start address */
@ -63,8 +63,8 @@ bool Profile_DspAddressData(Uint16 addr, float *percentage, Uint64 *count, Uint6
} else {
*cycle_diff = 0;
}
if (dsp_profile.ram.all_count) {
*percentage = 100.0*(*count)/dsp_profile.ram.all_count;
if (dsp_profile.ram.counters.count) {
*percentage = 100.0*(*count)/dsp_profile.ram.counters.count;
} else {
*percentage = 0.0;
}
@ -87,20 +87,20 @@ void Profile_DspShowStats(void)
fprintf(stderr, "- active instruction addresses:\n %d\n",
area->active);
fprintf(stderr, "- executed instructions:\n %llu\n",
area->all_count);
area->counters.count);
/* indicates either instruction(s) that address different memory areas
* (they can have different access costs), or more significantly,
* DSP code that has changed during profiling.
*/
fprintf(stderr, "- sum of per instruction cycle changes\n (can indicate code change during profiling):\n %llu\n",
area->all_misses);
area->counters.misses);
fprintf(stderr, "- used cycles:\n %llu\n",
area->all_cycles);
if (area->max_cycles == MAX_DSP_PROFILE_VALUE) {
area->counters.cycles);
if (area->overflow) {
fprintf(stderr, " *** COUNTERS OVERFLOW! ***\n");
}
fprintf(stderr, "\n= %.5fs\n", (double)(area->all_cycles) / MachineClocks.DSP_Freq);
fprintf(stderr, "\n= %.5fs\n", (double)(area->counters.cycles) / MachineClocks.DSP_Freq);
}
/**
@ -199,7 +199,7 @@ void Profile_DspShowCycles(int show)
for (end = sort_arr + show; sort_arr < end; sort_arr++) {
addr = *sort_arr;
count = data[addr].cycles;
percentage = 100.0*count/dsp_profile.ram.all_cycles;
percentage = 100.0*count/dsp_profile.ram.counters.cycles;
printf("0x%04x\t%.2f%%\t%lld%s\n", addr, percentage, count,
count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
}
@ -253,7 +253,7 @@ void Profile_DspShowCounts(int show, bool only_symbols)
for (end = sort_arr + show; sort_arr < end; sort_arr++) {
addr = *sort_arr;
count = data[addr].count;
percentage = 100.0*count/dsp_profile.ram.all_count;
percentage = 100.0*count/dsp_profile.ram.counters.count;
printf("0x%04x\t%.2f%%\t%lld%s\n",
addr, percentage, count,
count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
@ -278,7 +278,7 @@ void Profile_DspShowCounts(int show, bool only_symbols)
continue;
}
count = data[addr].count;
percentage = 100.0*count/dsp_profile.ram.all_count;
percentage = 100.0*count/dsp_profile.ram.counters.count;
printf("0x%04x\t%.2f%%\t%lld\t%s%s\n",
addr, percentage, count, name,
count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
@ -303,7 +303,7 @@ static const char * addr2name(Uint32 addr, Uint64 *total)
*/
void Profile_DspShowCallers(FILE *fp)
{
Profile_ShowCallers(fp, dsp_profile.sites, dsp_profile.callsite, addr2name);
Profile_ShowCallers(fp, dsp_callinfo.sites, dsp_callinfo.site, addr2name);
}
/**
@ -334,6 +334,7 @@ bool Profile_DspStart(void)
dsp_profile_item_t *item;
int i;
Profile_FreeCallinfo(&(dsp_callinfo));
if (dsp_profile.sort_arr) {
/* remove previous results */
free(dsp_profile.sort_arr);
@ -345,33 +346,34 @@ bool Profile_DspStart(void)
if (!dsp_profile.enabled) {
return false;
}
/* zero everything */
memset(&dsp_profile, 0, sizeof(dsp_profile));
dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data));
if (dsp_profile.data) {
printf("Allocated DSP profile buffer (%d KB).\n",
(int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
} else {
if (!dsp_profile.data) {
perror("ERROR, new DSP profile buffer alloc failed");
dsp_profile.enabled = false;
return false;
}
printf("Allocated DSP profile buffer (%d KB).\n",
(int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
Profile_AllocCallinfo(&(dsp_callinfo), Symbols_DspCount(), "DSP");
item = dsp_profile.data;
for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
item->min_cycle = 0xFFFF;
}
dsp_profile.sites = Profile_AllocCallerInfo("DSP", dsp_profile.sites, Symbols_DspCount(), &(dsp_profile.callsite));
dsp_profile.prev_pc = DSP_GetPC();
dsp_profile.disasm_addr = 0;
dsp_profile.processed = false;
dsp_profile.enabled = true;
return dsp_profile.enabled;
}
/* return branch type based on caller instruction type */
static calltype_t dsp_opcode_type(Uint16 prev_pc, Uint16 pc)
{
/* not supported (yet) */
return CALL_UNDEFINED;
}
@ -388,18 +390,12 @@ void Profile_DspUpdate(void)
prev_pc = dsp_profile.prev_pc;
dsp_profile.prev_pc = pc = DSP_GetPC();
if (dsp_profile.sites) {
int idx = Symbols_GetDspAddressIndex(pc);
if (unlikely(idx >= 0 && idx < dsp_profile.sites)) {
calltype_t flag = dsp_opcode_type(prev_pc, pc);
Profile_UpdateCaller(dsp_profile.callsite + idx,
pc, prev_pc, flag);
}
}
prev = dsp_profile.data + prev_pc;
if (likely(prev->count < MAX_DSP_PROFILE_VALUE)) {
prev->count++;
}
dsp_profile.ram.counters.count++;
cycles = DSP_GetInstrCycles();
if (likely(prev->cycles < MAX_DSP_PROFILE_VALUE - cycles)) {
@ -407,18 +403,34 @@ void Profile_DspUpdate(void)
} else {
prev->cycles = MAX_DSP_PROFILE_VALUE;
}
dsp_profile.ram.counters.cycles += cycles;
if (unlikely(cycles < prev->min_cycle)) {
prev->min_cycle = cycles;
}
if (unlikely(cycles > prev->max_cycle)) {
prev->max_cycle = cycles;
}
if (dsp_callinfo.sites) {
int symidx;
Uint16 ret_pc;
calltype_t flag = dsp_opcode_type(prev_pc, pc);
if (flag == CALL_SUBROUTINE) {
/* slow, so needs to be checked only when needed */
ret_pc = DSP_GetNextPC(prev_pc);
} else {
ret_pc = 0;
}
symidx = Symbols_GetDspAddressIndex(pc);
Profile_UpdateCallinfo(symidx, &dsp_callinfo, prev_pc, flag, pc, ret_pc, &(dsp_profile.ram.counters));
}
}
/**
* Helper for collecting DSP profile area statistics.
*/
static void update_dsp_area(Uint16 addr, dsp_profile_item_t *item, profile_area_t *area)
static void update_area_item(profile_area_t *area, Uint16 addr, dsp_profile_item_t *item)
{
Uint64 cycles = item->cycles;
Uint64 count = item->count;
@ -427,17 +439,15 @@ static void update_dsp_area(Uint16 addr, dsp_profile_item_t *item, profile_area_
if (!count) {
return;
}
area->all_count += count;
area->all_cycles += cycles;
if (cycles > area->max_cycles) {
area->max_cycles = cycles;
if (cycles == MAX_DSP_PROFILE_VALUE) {
area->overflow = true;
}
if (item->max_cycle) {
diff = item->max_cycle - item->min_cycle;
} else {
diff = 0;
}
area->all_misses += diff;
area->counters.misses += diff;
if (addr < area->lowest) {
area->lowest = addr;
@ -456,19 +466,19 @@ void Profile_DspStop(void)
dsp_profile_item_t *item;
profile_area_t *area;
Uint16 *sort_arr;
Uint32 i;
Uint32 addr;
if (dsp_profile.processed || !dsp_profile.enabled) {
return;
}
/* find lowest and highest addresses executed */
item = dsp_profile.data;
area = &dsp_profile.ram;
item = dsp_profile.data;
memset(area, 0, sizeof(profile_area_t));
area->lowest = DSP_PROFILE_ARR_SIZE;
for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
update_dsp_area(i, item, area);
for (addr = 0; addr < DSP_PROFILE_ARR_SIZE; addr++, item++) {
update_area_item(area, addr, item);
}
/* allocate address array for sorting */
@ -486,10 +496,10 @@ void Profile_DspStop(void)
/* ...and fill addresses for used instructions... */
area = &dsp_profile.ram;
item = dsp_profile.data + area->lowest;
for (i = area->lowest; i <= area->highest; i++, item++) {
item = &(dsp_profile.data[area->lowest]);
for (addr = area->lowest; addr <= area->highest; addr++, item++) {
if (item->count) {
*sort_arr++ = i;
*sort_arr++ = addr;
}
}
//printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active);

View File

@ -528,6 +528,46 @@ class ProfileSymbols(Output):
return self.get_area(addr)
# ---------------------------------------------------------------------
class Callinfo:
"class representing parsed profile data caller/callee linkage information"
def __init__(self, addr, calls, flags, intotal, extotal):
self.addr = addr
# how many times this caller called the destination
self.calls = int(calls, 10)
# what kind of "call" it was?
if flags:
self.flags = flags.strip()
else:
self.flags = "-"
self.total = [None, None]
# cost of these calls including further function calls
self._parse_totals(0, intotal)
# costs exclusing further function calls
self._parse_totals(1, extotal)
def _parse_totals(self, idx, totalstr):
"parse '/' separate totals string into integer sequence, or None"
if totalstr:
self.total[idx] = [int(x, 10) for x in totalstr.strip().split('/')]
def _add_total(self, idx, newtotal):
"add given totals to own totals"
mytotal = self.total[idx]
if mytotal and newtotal:
self.total[idx] = [x + y for x, y in zip(mytotal, newtotal)]
if newtotal:
self.total[idx] = newtotal
def add(self, newinfo):
"add counts and totals from other item to this one"
self.calls += newinfo.calls
for i in range(len(self.total)):
self._add_total(i, newinfo.total[i])
# ---------------------------------------------------------------------
class ProfileCallers(Output):
"profile data callee/caller information parser & handler"
@ -535,9 +575,8 @@ class ProfileCallers(Output):
def __init__(self):
Output.__init__(self)
# caller info in callee line:
# 0x<hex>: 0x<hex> = <count>, N*[0x<hex> = <count>,][ (<symbol>)
# 0x<hex> = <count>
self.r_caller = re.compile("^0x([0-9a-f]+) = ([0-9]+)( [a-z]+)?$")
# 0x<hex> = <count> <typeletter>[ inclusive/totals][ exclusive/totals]
self.r_caller = re.compile("^0x([0-9a-f]+) = ([0-9]+)( [a-z]+)?( [0-9/]+)?( [0-9/]+)?$")
# whether there is any caller info
self.present = False
# address dicts
@ -567,7 +606,7 @@ class ProfileCallers(Output):
def parse_callers(self, fobj, parsed, line):
"parse callee: caller call count information"
#0x<hex>: 0x<hex> = <count>, N*[0x<hex> = <count>,][ (<symbol>)
#0x<hex>: 0x<hex> = <count>[ <flags>][, <inclusive/totals>[, <exclusive/totals>]]; N*[0x<hex> = <count>...;][ <symbol>]
self.callinfo = {}
while True:
parsed += 1
@ -583,24 +622,19 @@ class ProfileCallers(Output):
self.error_exit("caller info missing on callee line %d\n\t'%s'" % (parsed, line))
if ':' not in callers[0]:
self.error_exit("callee/caller separator ':' missing on callee line %d\n\t'%s'" % (parsed, line))
addr, callers[0] = callers[0].split(':')
last = callers[-1].strip()
if len(last) and last[-1] != ')':
if last.startswith('0x'):
self.error_exit("last item isn't empty or symbol name on callee line %d\n\t'%s'" % (parsed, line))
addr, callers[0] = callers[0].split(':')
callinfo = {}
for caller in callers[:-1]:
caller = caller.strip()
match = self.r_caller.match(caller)
if match:
caddr, count, flags = match.groups()
caddr = int(caddr, 16)
count = int(count, 10)
if flags:
flags = flags.strip()
else:
flags = '-'
callinfo[caddr] = (count, flags)
items = match.groups()
caddr = int(items[0], 16)
callinfo[caddr] = Callinfo(caddr, *items[1:])
else:
self.error_exit("unrecognized caller info '%s' on callee line %d\n\t'%s'" % (caller, parsed, line))
self.callinfo[int(addr, 16)] = callinfo
@ -621,14 +655,12 @@ class ProfileCallers(Output):
continue
# ...and their callers
ignore = total = 0
for item in caller.items():
laddr, info = item
count, flags = info
for laddr, info in caller.items():
pname, offset = symbols.get_preceeding_symbol(laddr)
if len(flags) > 1:
self.warning("caller instruction change ('%s') detected for '%s', did its code change during profiling?" % (flags, pname))
elif flags in self.removable_calltypes:
ignore += count
if len(info.flags) > 1:
self.warning("caller instruction change ('%s') detected for '%s', did its code change during profiling?" % (info.flags, pname))
elif info.flags in self.removable_calltypes:
ignore += info.calls
continue
# function address for the caller
paddr = laddr - offset
@ -637,17 +669,15 @@ class ProfileCallers(Output):
self.warning("overriding parsed function 0x%x name '%s' with resolved caller 0x%x name '%s'" % (parent.addr, parent.name, paddr, pname))
parent.name = pname
# link parent and child function together
item = (laddr, count, flags)
if paddr in child.parent:
if self.compact:
oldcount = child.parent[paddr][0][1]
child.parent[paddr] = ((paddr, oldcount + count, flags),)
child.parent[paddr][0].add(info)
else:
child.parent[paddr] += (item,)
child.parent[paddr] += (info,)
else:
child.parent[paddr] = (item,)
child.parent[paddr] = (info,)
parent.child[caddr] = True
total += count
total += info.calls
# validate call count
if ignore:
all_ignored += ignore
@ -687,7 +717,7 @@ class ProfileCallers(Output):
total = 0
# add together calls from this particular parent
for item in info:
total += item[1] # count
total += item.calls
if parents == 1 and total != calls:
self.error_exit("%s -> %s, single parent, but callcounts don't match: %d != %d" % (parent.name, child.name, total, calls))
# parent's share of current propagated child costs
@ -784,7 +814,7 @@ class ProfileCallgrind(Output):
info = child.parent[paddr]
total = 0
for item in info:
total += item[1] # count
total += item.calls
self.write("cfn=%s\n" % child.name)
self.write("calls=%d %d\n" % (total, child.line))
if child.total:
@ -1438,8 +1468,8 @@ label="%s";
# total calls count for child
calls = profile[caddr].data[0]
# calls to child done from different locations in parent
for laddr, count, flags in info:
self.edges[(laddr, caddr)] = (paddr, count, calls, flags)
for edge in info:
self.edges[(edge.addr, caddr)] = (paddr, calls, edge)
return (len(profile) - len(self.nodes))
def _output_nodes(self, stats, field, limit):
@ -1487,7 +1517,7 @@ label="%s";
"output graph edges from filtered edges dict, after nodes is called"
for linkage, info in self.edges.items():
laddr, caddr = linkage
paddr, count, calls, flags = info
paddr, calls, edge = info
pname = self.profile[paddr].name
offset = laddr - paddr
style = ""
@ -1496,6 +1526,7 @@ label="%s";
# arrowhead/tail styles:
# none, normal, inv, dot, odot, invdot, invodot, tee, empty,
# invempty, open, halfopen, diamond, odiamond, box, obox, crow
flags = edge.flags
if flags == CALL_NEXT or flags == CALL_BRANCH:
style += " arrowhead=dot"
elif flags == CALL_SUBROUTINE:
@ -1512,9 +1543,9 @@ label="%s";
label = "%s+%d\\n($%x)" % (pname, offset, laddr)
else:
label = pname
if count != calls:
percentage = 100.0 * count / calls
label = "%s\\n%d calls\\n=%.2f%%" % (label, count, percentage)
if edge.calls != calls:
percentage = 100.0 * edge.calls / calls
label = "%s\\n%d calls\\n=%.2f%%" % (label, edge.calls, percentage)
self.write("N_%X -> N_%X [label=\"%s\"%s];\n" % (paddr, caddr, label, style))
def do_output(self, profobj, fname):
@ -1522,7 +1553,6 @@ label="%s";
if not (self.output_enabled and profobj.callers.present):
return
stats = profobj.stats
basename = os.path.splitext(fname)[0]
for field in range(profobj.stats.items):
if not stats.totals[field]:
continue