linux/tools/perf/util/evsel.h
Arnaldo Carvalho de Melo ee29be625b perf tools: Save some loops using perf_evlist__id2evsel
Since we already ask for PERF_SAMPLE_ID and use it to quickly find the
associated evsel, add handler func + data to struct perf_evsel to avoid
using chains of if(strcmp(event_name)) and also to avoid all the linear
list searches via trace_event_find.

To demonstrate the technique convert 'perf sched' to it:

 # perf sched record sleep 5m

And then:

 Performance counter stats for '/tmp/oldperf sched lat':

        646.929438 task-clock                #    0.999 CPUs utilized
                 9 context-switches          #    0.000 M/sec
                 0 CPU-migrations            #    0.000 M/sec
            20,901 page-faults               #    0.032 M/sec
     1,290,144,450 cycles                    #    1.994 GHz
   <not supported> stalled-cycles-frontend
   <not supported> stalled-cycles-backend
     1,606,158,439 instructions              #    1.24  insns per cycle
       339,088,395 branches                  #  524.151 M/sec
         4,550,735 branch-misses             #    1.34% of all branches

       0.647524759 seconds time elapsed

Versus:

 Performance counter stats for 'perf sched lat':

        473.564691 task-clock                #    0.999 CPUs utilized
                 9 context-switches          #    0.000 M/sec
                 0 CPU-migrations            #    0.000 M/sec
            20,903 page-faults               #    0.044 M/sec
       944,367,984 cycles                    #    1.994 GHz
   <not supported> stalled-cycles-frontend
   <not supported> stalled-cycles-backend
     1,442,385,571 instructions              #    1.53  insns per cycle
       308,383,106 branches                  #  651.195 M/sec
         4,481,784 branch-misses             #    1.45% of all branches

       0.474215751 seconds time elapsed

[root@emilia ~]#

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-1kbzpl74lwi6lavpqke2u2p3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-11-28 17:57:40 -02:00

173 lines
4.5 KiB
C

#ifndef __PERF_EVSEL_H
#define __PERF_EVSEL_H 1
#include <linux/list.h>
#include <stdbool.h>
#include "../../../include/linux/perf_event.h"
#include "types.h"
#include "xyarray.h"
#include "cgroup.h"
#include "hist.h"
struct perf_counts_values {
union {
struct {
u64 val;
u64 ena;
u64 run;
};
u64 values[3];
};
};
struct perf_counts {
s8 scaled;
struct perf_counts_values aggr;
struct perf_counts_values cpu[];
};
struct perf_evsel;
/*
* Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
* more than one entry in the evlist.
*/
struct perf_sample_id {
struct hlist_node node;
u64 id;
struct perf_evsel *evsel;
};
/** struct perf_evsel - event selector
*
* @name - Can be set to retain the original event name passed by the user,
* so that when showing results in tools such as 'perf stat', we
* show the name used, not some alias.
*/
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
struct xyarray *sample_id;
u64 *id;
struct perf_counts *counts;
int idx;
int ids;
struct hists hists;
char *name;
union {
void *priv;
off_t id_offset;
};
struct cgroup_sel *cgrp;
struct {
void *func;
void *data;
} handler;
bool supported;
};
struct cpu_map;
struct thread_map;
struct perf_evlist;
struct perf_record_opts;
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
void perf_evsel__init(struct perf_evsel *evsel,
struct perf_event_attr *attr, int idx);
void perf_evsel__exit(struct perf_evsel *evsel);
void perf_evsel__delete(struct perf_evsel *evsel);
void perf_evsel__config(struct perf_evsel *evsel,
struct perf_record_opts *opts);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
void perf_evsel__free_fd(struct perf_evsel *evsel);
void perf_evsel__free_id(struct perf_evsel *evsel);
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
struct cpu_map *cpus, bool group,
struct xyarray *group_fds);
int perf_evsel__open_per_thread(struct perf_evsel *evsel,
struct thread_map *threads, bool group,
struct xyarray *group_fds);
int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads, bool group,
struct xyarray *group_fds);
void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);
#define perf_evsel__match(evsel, t, c) \
(evsel->attr.type == PERF_TYPE_##t && \
evsel->attr.config == PERF_COUNT_##c)
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
int cpu, int thread, bool scale);
/**
* perf_evsel__read_on_cpu - Read out the results on a CPU and thread
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @thread - thread of interest
*/
static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
int cpu, int thread)
{
return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
}
/**
* perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @thread - thread of interest
*/
static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
int cpu, int thread)
{
return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
}
int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
bool scale);
/**
* perf_evsel__read - Read the aggregate results on all CPUs
*
* @evsel - event selector to read value
* @ncpus - Number of cpus affected, from zero
* @nthreads - Number of threads affected, from zero
*/
static inline int perf_evsel__read(struct perf_evsel *evsel,
int ncpus, int nthreads)
{
return __perf_evsel__read(evsel, ncpus, nthreads, false);
}
/**
* perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
*
* @evsel - event selector to read value
* @ncpus - Number of cpus affected, from zero
* @nthreads - Number of threads affected, from zero
*/
static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
int ncpus, int nthreads)
{
return __perf_evsel__read(evsel, ncpus, nthreads, true);
}
int __perf_evsel__sample_size(u64 sample_type);
static inline int perf_evsel__sample_size(struct perf_evsel *evsel)
{
return __perf_evsel__sample_size(evsel->attr.sample_type);
}
#endif /* __PERF_EVSEL_H */