mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-01-14 13:39:10 +00:00
aece948f5d
The PERF_EVENT_IOC_SET_OUTPUT ioctl was returning -EINVAL when using --pid when monitoring multithreaded apps, as we can only share a ring buffer for events on the same thread if not doing per cpu. Fix it by using per thread ring buffers. Tested with: [root@felicio ~]# tuna -t 26131 -CP | nl 1 thread ctxt_switches 2 pid SCHED_ rtpri affinity voluntary nonvoluntary cmd 3 26131 OTHER 0 0,1 10814276 2397830 chromium-browse 4 642 OTHER 0 0,1 14688 0 chromium-browse 5 26148 OTHER 0 0,1 713602 115479 chromium-browse 6 26149 OTHER 0 0,1 801958 2262 chromium-browse 7 26150 OTHER 0 0,1 1271128 248 chromium-browse 8 26151 OTHER 0 0,1 3 0 chromium-browse 9 27049 OTHER 0 0,1 36796 9 chromium-browse 10 618 OTHER 0 0,1 14711 0 chromium-browse 11 661 OTHER 0 0,1 14593 0 chromium-browse 12 29048 OTHER 0 0,1 28125 0 chromium-browse 13 26143 OTHER 0 0,1 2202789 781 chromium-browse [root@felicio ~]# So 11 threads under pid 26131, then: [root@felicio ~]# perf record -F 50000 --pid 26131 [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7fa4a2538000-7fa4a25b9000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 2 7fa4a25b9000-7fa4a263a000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 3 7fa4a263a000-7fa4a26bb000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 4 7fa4a26bb000-7fa4a273c000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 5 7fa4a273c000-7fa4a27bd000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 6 7fa4a27bd000-7fa4a283e000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 7 7fa4a283e000-7fa4a28bf000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 8 7fa4a28bf000-7fa4a2940000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 9 7fa4a2940000-7fa4a29c1000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 10 7fa4a29c1000-7fa4a2a42000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 11 7fa4a2a42000-7fa4a2ac3000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# 11 mmaps, one per thread since we didn't specify any CPU list, so we need one mmap per thread and: [root@felicio ~]# perf record -F 50000 --pid 26131 ^M ^C[ perf record: Woken up 79 times to write data ] [ perf record: Captured and wrote 20.614 MB perf.data (~900639 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 371310 26131 2 96516 26148 3 95694 26149 4 95203 26150 5 7291 26143 6 87 27049 7 76 661 8 60 29048 9 47 618 10 43 642 [root@felicio ~]# Ok, one of the threads, 26151 was quiescent, so no samples there, but all the others are there. Then, if I specify one CPU: [root@felicio ~]# perf record -F 50000 --pid 26131 --cpu 1 ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.680 MB perf.data (~29730 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 8444 26131 2 2584 26149 3 2518 26148 4 2324 26150 5 123 26143 6 9 661 7 9 29048 [root@felicio ~]# This machine has two cores, so fewer threads appeared on the radar, and: [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7f484b922000-7f484b9a3000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# Just one mmap, as now we can use just one per-cpu buffer instead of the per-thread needed in the previous case. For global profiling: [root@felicio ~]# perf record -F 50000 -a ^C[ perf record: Woken up 26 times to write data ] [ perf record: Captured and wrote 7.128 MB perf.data (~311412 samples) ] [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7fb49b435000-7fb49b4b6000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 2 7fb49b4b6000-7fb49b537000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# It uses per-cpu buffers. For just one thread: [root@felicio ~]# perf record -F 50000 --tid 26148 ^C[ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.330 MB perf.data (~14426 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 9969 26148 [root@felicio ~]# [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7f286a51b000-7f286a59c000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# Tested-by: David Ahern <dsahern@gmail.com> Tested-by: Lin Ming <ming.m.lin@intel.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Tom Zanussi <tzanussi@gmail.com> Link: http://lkml.kernel.org/r/20110426204401.GB1746@ghostprotocols.net Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
70 lines
2.1 KiB
C
70 lines
2.1 KiB
C
#ifndef __PERF_EVLIST_H
|
|
#define __PERF_EVLIST_H 1
|
|
|
|
#include <linux/list.h>
|
|
#include "../perf.h"
|
|
#include "event.h"
|
|
|
|
struct pollfd;
|
|
struct thread_map;
|
|
struct cpu_map;
|
|
|
|
#define PERF_EVLIST__HLIST_BITS 8
|
|
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
|
|
|
|
struct perf_evlist {
|
|
struct list_head entries;
|
|
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
|
|
int nr_entries;
|
|
int nr_fds;
|
|
int nr_mmaps;
|
|
int mmap_len;
|
|
bool overwrite;
|
|
union perf_event event_copy;
|
|
struct perf_mmap *mmap;
|
|
struct pollfd *pollfd;
|
|
struct thread_map *threads;
|
|
struct cpu_map *cpus;
|
|
};
|
|
|
|
struct perf_evsel;
|
|
|
|
struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
|
|
struct thread_map *threads);
|
|
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
|
|
struct thread_map *threads);
|
|
void perf_evlist__exit(struct perf_evlist *evlist);
|
|
void perf_evlist__delete(struct perf_evlist *evlist);
|
|
|
|
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
|
|
int perf_evlist__add_default(struct perf_evlist *evlist);
|
|
|
|
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
|
|
int cpu, int thread, u64 id);
|
|
|
|
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
|
|
void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
|
|
|
|
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
|
|
|
|
union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
|
|
|
|
int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
|
|
int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
|
|
void perf_evlist__munmap(struct perf_evlist *evlist);
|
|
|
|
static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
|
|
struct cpu_map *cpus,
|
|
struct thread_map *threads)
|
|
{
|
|
evlist->cpus = cpus;
|
|
evlist->threads = threads;
|
|
}
|
|
|
|
int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
|
|
pid_t target_tid, const char *cpu_list);
|
|
void perf_evlist__delete_maps(struct perf_evlist *evlist);
|
|
int perf_evlist__set_filters(struct perf_evlist *evlist);
|
|
|
|
#endif /* __PERF_EVLIST_H */
|