From cd7f355ac4067177e0a45e7331d11472bd7fd7ca Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 6 Jan 2017 14:18:08 -0800 Subject: [PATCH 01/13] perf jvmti: Create libdir directory before installing libperf-jvmti.so The install command for libperf-jvmti.so does not check if libdir exists before installing. This means that when the install command is run: install libperf-jvmti.so '/tmp/test_root/usr/lib64'; libperf-jvmti.so will get installed to /usr/lib64 as a file and break further installation. Fix this by ensuring the directory gets created first. See https://bugzilla.redhat.com/show_bug.cgi?id=1410296 Signed-off-by: Laura Abbott Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Peter Zijlstra Fixes: d4dfdf00d43e ("perf jvmti: Plug compilation into perf build") Link: http://lkml.kernel.org/r/1483741088-13543-1-git-send-email-labbott@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8bb16aa9d661..4da19b6ba94a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -661,6 +661,7 @@ ifndef NO_PERF_READ_VDSOX32 endif ifndef NO_JVMTI $(call QUIET_INSTALL, $(LIBJVMTI)) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \ $(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)'; endif $(call QUIET_INSTALL, libexec) \ From d5f805c09620dadc1a7806fbd46189d183f6c395 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Jan 2017 12:44:05 -0300 Subject: [PATCH 02/13] tools lib subcmd: Add missing linux/kernel.h include to subcmd.h As it was getting the BUILD_BUG_ON_ZERO() definition by luck. Cc: Jiri Olsa Cc: Wang Nan Cc: Josh Poimboeuf Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-dh71o31ar72ajck8o2x4aoae@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/subcmd/parse-options.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 11c3be3bcce7..37e2d1a6fc2a 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -1,6 +1,7 @@ #ifndef __SUBCMD_PARSE_OPTIONS_H #define __SUBCMD_PARSE_OPTIONS_H +#include #include #include From 7d132caaf9392853ad637c8e6e53333cbeb99aa5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Jan 2017 15:31:48 -0300 Subject: [PATCH 03/13] perf machine: Add a kallsyms loading constructor To reduce the boilerplate for searching for functions in the running kernel and modules. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-93iqzayafpaxaguoiwjqezgz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 19 +++++++++++++++++++ tools/perf/util/machine.h | 1 + 2 files changed, 20 insertions(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9b33bef54581..747a034d1ff3 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -87,6 +87,25 @@ out_delete: return NULL; } +struct machine *machine__new_kallsyms(void) +{ + struct machine *machine = machine__new_host(); + /* + * FIXME: + * 1) MAP__FUNCTION will go away when we stop loading separate maps for + * functions and data objects. + * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely + * ask for not using the kcore parsing code, once this one is fixed + * to create a map per module. + */ + if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) { + machine__delete(machine); + machine = NULL; + } + + return machine; +} + static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 354de6e56109..a28305029711 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -129,6 +129,7 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); +struct machine *machine__new_kallsyms(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); From 355637717d575f14169954c3ed31536d45778f08 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Jan 2017 15:33:32 -0300 Subject: [PATCH 04/13] perf kallsyms: Introduce tool to look for extended symbol information on the running kernel Its similar to doing grep on a /proc/kallsyms, but it also shows extra information like the path to the kernel module and the unrelocated addresses in it, to help in diagnosing problems. It is also helps demonstrate the use of the symbols routines so that tool writers can use them more effectively. Using it: $ perf kallsyms e1000_xmit_frame netif_rx usb_stor_set_xfer_buf e1000_xmit_frame: [e1000e] /lib/modules/4.9.0+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko 0xffffffffc046fc10-0xffffffffc0470bb0 (0x19c80-0x1ac20) netif_rx: [kernel] [kernel.kallsyms] 0xffffffff916f03a0-0xffffffff916f0410 (0xffffffff916f03a0-0xffffffff916f0410) usb_stor_set_xfer_buf: [usb_storage] /lib/modules/4.9.0+/kernel/drivers/usb/storage/usb-storage.ko 0xffffffffc057aea0-0xffffffffc057af19 (0xf10-0xf89) $ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-79bk9pakujn4l4vq0f90klv3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 1 + tools/perf/Documentation/perf-kallsyms.txt | 24 ++++++++ tools/perf/builtin-help.c | 2 +- tools/perf/builtin-kallsyms.c | 67 ++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + 7 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 tools/perf/Documentation/perf-kallsyms.txt create mode 100644 tools/perf/builtin-kallsyms.c diff --git a/tools/perf/Build b/tools/perf/Build index b12d5d1666e3..0b48806f93c2 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -7,6 +7,7 @@ perf-y += builtin-help.o perf-y += builtin-sched.o perf-y += builtin-buildid-list.o perf-y += builtin-buildid-cache.o +perf-y += builtin-kallsyms.o perf-y += builtin-list.o perf-y += builtin-record.o perf-y += builtin-report.o diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt new file mode 100644 index 000000000000..954ea9e21236 --- /dev/null +++ b/tools/perf/Documentation/perf-kallsyms.txt @@ -0,0 +1,24 @@ +perf-kallsyms(1) +============== + +NAME +---- +perf-kallsyms - Searches running kernel for symbols + +SYNOPSIS +-------- +[verse] +'perf kallsyms symbol_name[,symbol_name...]' + +DESCRIPTION +----------- +This command searches the running kernel kallsyms file for the given symbol(s) +and prints information about it, including the DSO, the kallsyms begin/end +addresses and the addresses in the ELF kallsyms symbol table (for symbols in +modules). + +OPTIONS +------- +-v:: +--verbose=:: + Increase verbosity level, showing details about symbol table loading, etc. diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 3bdb2c78a21b..93da24a638be 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -434,7 +434,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) const char * const builtin_help_subcommands[] = { "buildid-cache", "buildid-list", "diff", "evlist", "help", "list", "record", "report", "bench", "stat", "timechart", "top", "annotate", - "script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data", + "script", "sched", "kallsyms", "kmem", "lock", "kvm", "test", "inject", "mem", "data", #ifdef HAVE_LIBELF_SUPPORT "probe", #endif diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c new file mode 100644 index 000000000000..224bfc454b4a --- /dev/null +++ b/tools/perf/builtin-kallsyms.c @@ -0,0 +1,67 @@ +/* + * builtin-kallsyms.c + * + * Builtin command: Look for a symbol in the running kernel and its modules + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ +#include "builtin.h" +#include +#include +#include "debug.h" +#include "machine.h" +#include "symbol.h" + +static int __cmd_kallsyms(int argc, const char **argv) +{ + int i; + struct machine *machine = machine__new_kallsyms(); + + if (machine == NULL) { + pr_err("Couldn't read /proc/kallsyms\n"); + return -1; + } + + for (i = 0; i < argc; ++i) { + struct map *map; + struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map); + + if (symbol == NULL) { + printf("%s: not found\n", argv[i]); + continue; + } + + printf("%s: %s %s %#" PRIx64 "-%#" PRIx64 " (%#" PRIx64 "-%#" PRIx64")\n", + symbol->name, map->dso->short_name, map->dso->long_name, + map->unmap_ip(map, symbol->start), map->unmap_ip(map, symbol->end), + symbol->start, symbol->end); + } + + machine__delete(machine); + return 0; +} + +int cmd_kallsyms(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_END() + }; + const char * const kallsyms_usage[] = { + "perf kallsyms [] symbol_name", + NULL + }; + + argc = parse_options(argc, argv, options, kallsyms_usage, 0); + if (argc < 1) + usage_with_options(kallsyms_usage, options); + + symbol_conf.sort_by_name = true; + symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); + if (symbol__init(NULL) < 0) + return -1; + + return __cmd_kallsyms(argc, argv); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 0bcf68e98ccc..b55f5be486a1 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -23,6 +23,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix); int cmd_evlist(int argc, const char **argv, const char *prefix); int cmd_help(int argc, const char **argv, const char *prefix); int cmd_sched(int argc, const char **argv, const char *prefix); +int cmd_kallsyms(int argc, const char **argv, const char *prefix); int cmd_list(int argc, const char **argv, const char *prefix); int cmd_record(int argc, const char **argv, const char *prefix); int cmd_report(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index ab5cbaa170d0..fb45613dba9e 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -12,6 +12,7 @@ perf-diff mainporcelain common perf-config mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common +perf-kallsyms mainporcelain common perf-kmem mainporcelain common perf-kvm mainporcelain common perf-list mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index aa23b3347d6b..13c8a7db055e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -47,6 +47,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, + { "kallsyms", cmd_kallsyms, 0 }, { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, From 017037ff3d0b11842012878a546fe2df47822259 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Jan 2017 17:26:26 -0300 Subject: [PATCH 05/13] perf trace: Allow specifying list of syscalls and events in -e/--expr/--event Makes it easier to specify both events and syscalls (to be formatter strace-like), i.e. previously one would have to do: # perf trace -e nanosleep --event sched:sched_switch usleep 1 Now it is possible to do: # perf trace -e nanosleep,sched:sched_switch usleep 1 0.000 ( 0.021 ms): usleep/17962 nanosleep(rqtp: 0x7ffdedd61ec0) ... 0.021 ( ): sched:sched_switch:usleep:17962 [120] S ==> swapper/1:0 [120]) 0.000 ( 0.066 ms): usleep/17962 ... [continued]: nanosleep()) = 0 # The old style --expr and using both -e and --event continues to work. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ieg6bakub4657l9e6afn85r4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 8 +- tools/perf/builtin-trace.c | 120 ++++++++++++++++++------ 2 files changed, 96 insertions(+), 32 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 781b019751a4..afd728672b6f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -35,7 +35,10 @@ OPTIONS -e:: --expr:: - List of syscalls to show, currently only syscall names. +--event:: + List of syscalls and other perf events (tracepoints, HW cache events, + etc) to show. + See 'perf list' for a complete list of events. Prefixing with ! shows all syscalls but the ones specified. You may need to escape it. @@ -135,9 +138,6 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --kernel-syscall-graph:: Show the kernel callchains on the syscall exit path. ---event:: - Trace other events, see 'perf list' for a complete list. - --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. Note that at this point diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 206bf72b77fc..40ef9b293d1b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -40,6 +40,7 @@ #include /* FIXME: Still needed for audit_errno_to_name */ #include +#include #include #include #include @@ -2699,6 +2700,91 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) evsel->handler = handler; } +/* + * XXX: Hackish, just splitting the combined -e+--event (syscalls + * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use + * existing facilities unchanged (trace->ev_qualifier + parse_options()). + * + * It'd be better to introduce a parse_options() variant that would return a + * list with the terms it didn't match to an event... + */ +static int trace__parse_events_option(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct trace *trace = (struct trace *)opt->value; + const char *s = str; + char *sep = NULL, *lists[2] = { NULL, NULL, }; + int len = strlen(str), err = -1, list; + char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); + char group_name[PATH_MAX]; + + if (strace_groups_dir == NULL) + return -1; + + if (*s == '!') { + ++s; + trace->not_ev_qualifier = true; + } + + while (1) { + if ((sep = strchr(s, ',')) != NULL) + *sep = '\0'; + + list = 0; + if (syscalltbl__id(trace->sctbl, s) >= 0) { + list = 1; + } else { + path__join(group_name, sizeof(group_name), strace_groups_dir, s); + if (access(group_name, R_OK) == 0) + list = 1; + } + + if (lists[list]) { + sprintf(lists[list] + strlen(lists[list]), ",%s", s); + } else { + lists[list] = malloc(len); + if (lists[list] == NULL) + goto out; + strcpy(lists[list], s); + } + + if (!sep) + break; + + *sep = ','; + s = sep + 1; + } + + if (lists[1] != NULL) { + struct strlist_config slist_config = { + .dirname = strace_groups_dir, + }; + + trace->ev_qualifier = strlist__new(lists[1], &slist_config); + if (trace->ev_qualifier == NULL) { + fputs("Not enough memory to parse event qualifier", trace->output); + goto out; + } + + if (trace__validate_ev_qualifier(trace)) + goto out; + } + + err = 0; + + if (lists[0]) { + struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option); + err = parse_events_option(&o, lists[0], 0); + } +out: + if (sep) + *sep = ','; + + return err; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { const char *trace_usage[] = { @@ -2730,15 +2816,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .max_stack = UINT_MAX, }; const char *output_name = NULL; - const char *ev_qualifier_str = NULL; const struct option trace_options[] = { - OPT_CALLBACK(0, "event", &trace.evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option), + OPT_CALLBACK('e', "event", &trace, "event", + "event/syscall selector. use 'perf list' to list available events", + trace__parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), - OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), + OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace", + trace__parse_events_option), OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", @@ -2863,7 +2949,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - if (!trace.trace_syscalls && ev_qualifier_str) { + if (!trace.trace_syscalls && trace.ev_qualifier) { pr_err("The -e option can't be used with --no-syscalls.\n"); goto out; } @@ -2878,28 +2964,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) trace.open_id = syscalltbl__id(trace.sctbl, "open"); - if (ev_qualifier_str != NULL) { - const char *s = ev_qualifier_str; - struct strlist_config slist_config = { - .dirname = system_path(STRACE_GROUPS_DIR), - }; - - trace.not_ev_qualifier = *s == '!'; - if (trace.not_ev_qualifier) - ++s; - trace.ev_qualifier = strlist__new(s, &slist_config); - if (trace.ev_qualifier == NULL) { - fputs("Not enough memory to parse event qualifier", - trace.output); - err = -ENOMEM; - goto out_close; - } - - err = trace__validate_ev_qualifier(&trace); - if (err) - goto out_close; - } - err = target__validate(&trace.opts.target); if (err) { target__strerror(&trace.opts.target, err, bf, sizeof(bf)); From e978be9ea2990f1af60fe10eadd2312a6250e0b8 Mon Sep 17 00:00:00 2001 From: Soramichi Akiyama Date: Tue, 10 Jan 2017 10:41:00 -0300 Subject: [PATCH 06/13] perf evlist: Fix typo in perf_evlist__start_workload() This patch fixes a typo: s/enable to/unable to/ Signed-off-by: Soramichi AKIYAMA Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: bcf3145fbeb1 ("perf evlist: Enhance perf_evlist__start_workload()") Link: http://lkml.kernel.org/r/20170110200006.e1f7a766b4faf1f107ae2e1b@m.soramichi.jp [ Wasn't applying, fixed it up by hand, added Fixes: tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d92e02006fb8..23e6f33edcf2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1797,7 +1797,7 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) */ ret = write(evlist->workload.cork_fd, &bf, 1); if (ret < 0) - perror("enable to write to pipe"); + perror("unable to write to pipe"); close(evlist->workload.cork_fd); return ret; From 9808143ba2e54818a3cf445d9b69b3f5f15451ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:55 +0100 Subject: [PATCH 07/13] perf tools: Add unit_number__scnprintf function Add unit_number__scnprintf function to display size units and use it in -m option info message. Before: $ perf record -m 10M ls rounding mmap pages size to 16777216 bytes (4096 pages) ... After: $ perf record -m 10M ls rounding mmap pages size to 16M (4096 pages) ... Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1483955520-29063-2-git-send-email-jolsa@kernel.org [ Rename it to unit_number__scnprintf for consistency ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 +++ tools/perf/tests/tests.h | 1 + tools/perf/tests/unit_number__scnprintf.c | 37 +++++++++++++++++++++++ tools/perf/util/evlist.c | 8 +++-- tools/perf/util/util.c | 13 ++++++++ tools/perf/util/util.h | 1 + 7 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 tools/perf/tests/unit_number__scnprintf.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 6676c2dd6dcb..1cb3d9b540e9 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -44,6 +44,7 @@ perf-y += is_printable_array.o perf-y += bitmap.o perf-y += perf-hooks.o perf-y += clang.o +perf-y += unit_number__scnprintf.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index a77dcc0d24e3..37e326bfd2dc 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -246,6 +246,10 @@ static struct test generic_tests[] = { .get_desc = test__clang_subtest_get_desc, } }, + { + .desc = "unit_number__scnprintf", + .func = test__unit_number__scnprint, + }, { .func = NULL, }, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index a512f0c8ff5b..1fa9b9d83aa5 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -96,6 +96,7 @@ int test__perf_hooks(int subtest); int test__clang(int subtest); const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); +int test__unit_number__scnprint(int subtest); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c new file mode 100644 index 000000000000..623c2aa53c4a --- /dev/null +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -0,0 +1,37 @@ +#include +#include +#include "tests.h" +#include "util.h" +#include "debug.h" + +int test__unit_number__scnprint(int subtest __maybe_unused) +{ + struct { + u64 n; + const char *str; + } test[] = { + { 1, "1B" }, + { 10*1024, "10K" }, + { 20*1024*1024, "20M" }, + { 30*1024*1024*1024ULL, "30G" }, + { 0, "0B" }, + { 0, NULL }, + }; + unsigned i = 0; + + while (test[i].str) { + char buf[100]; + + unit_number__scnprintf(buf, sizeof(buf), test[i].n); + + pr_debug("n %" PRIu64 ", str '%s', buf '%s'\n", + test[i].n, test[i].str, buf); + + if (strcmp(test[i].str, buf)) + return TEST_FAIL; + + i++; + } + + return TEST_OK; +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 23e6f33edcf2..dc4df3d2660e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1224,12 +1224,16 @@ static long parse_pages_arg(const char *str, unsigned long min, if (pages == 0 && min == 0) { /* leave number of pages at 0 */ } else if (!is_power_of_2(pages)) { + char buf[100]; + /* round pages up to next power of 2 */ pages = roundup_pow_of_two(pages); if (!pages) return -EINVAL; - pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", - pages * page_size, pages); + + unit_number__scnprintf(buf, sizeof(buf), pages * page_size); + pr_info("rounding mmap pages size to %s (%lu pages)\n", + buf, pages); } if (pages > max) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9ddd98827d12..bf29aed16bd6 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -789,3 +789,16 @@ int is_printable_array(char *p, unsigned int len) } return 1; } + +int unit_number__scnprintf(char *buf, size_t size, u64 n) +{ + char unit[4] = "BKMG"; + int i = 0; + + while (((n / 1024) > 1) && (i < 3)) { + n /= 1024; + i++; + } + + return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 1d639e38aa82..6e8be174ec0b 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -363,4 +363,5 @@ int is_printable_array(char *p, unsigned int len); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); +int unit_number__scnprintf(char *buf, size_t size, u64 n); #endif /* GIT_COMPAT_UTIL_H */ From 1b43b70484a9617de5fe2c12e64bea006010ac1c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:56 +0100 Subject: [PATCH 08/13] perf record: Add struct switch_output Next patches will add more --switch-output option arguments, so preparing the data holder. Signed-off-by: Jiri Olsa Acked-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4ec10e9427d9..f7e805b30527 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -46,6 +46,10 @@ #include #include +struct switch_output { + bool signal; +}; + struct record { struct perf_tool tool; struct record_opts opts; @@ -62,7 +66,7 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool timestamp_filename; - bool switch_output; + struct switch_output switch_output; unsigned long long samples; }; @@ -842,11 +846,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); - if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) { + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.signal) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) trigger_on(&auxtrace_snapshot_trigger); - if (rec->switch_output) + if (rec->switch_output.signal) trigger_on(&switch_output_trigger); } else { signal(SIGUSR2, SIG_IGN); @@ -1519,7 +1523,7 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), - OPT_BOOLEAN(0, "switch-output", &record.switch_output, + OPT_BOOLEAN(0, "switch-output", &record.switch_output.signal, "Switch output when receive SIGUSR2"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), @@ -1578,7 +1582,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } - if (rec->switch_output) + if (rec->switch_output.signal) rec->timestamp_filename = true; if (!rec->itr) { @@ -1629,7 +1633,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); - } else if (rec->switch_output) { + } else if (rec->switch_output.signal) { /* * In 'perf record --switch-output', disable buildid * generation by default to reduce data file switching From cb4e1ebb6a398ff5b0067034b0d16566af4d78e8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:57 +0100 Subject: [PATCH 09/13] perf record: Change switch-output option to take optional argument Next patches will add --switch-output option arguments, changing the option to allow that and adding its default value to 'signal'. Signed-off-by: Jiri Olsa Acked-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f7e805b30527..2bf811acaf8d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -48,6 +48,8 @@ struct switch_output { bool signal; + const char *str; + bool set; }; struct record { @@ -1356,6 +1358,22 @@ out_free: return ret; } +static int switch_output_setup(struct record *rec) +{ + struct switch_output *s = &rec->switch_output; + + if (!s->set) + return 0; + + if (!strcmp(s->str, "signal")) { + s->signal = true; + pr_debug("switch-output with SIGUSR2 signal\n"); + return 0; + } + + return -1; +} + static const char * const __record_usage[] = { "perf record [] []", "perf record [] -- []", @@ -1523,8 +1541,9 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), - OPT_BOOLEAN(0, "switch-output", &record.switch_output.signal, - "Switch output when receive SIGUSR2"), + OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, + &record.switch_output.set, "signal", + "Switch output when receive SIGUSR2", "signal"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), OPT_END() @@ -1582,6 +1601,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } + if (switch_output_setup(rec)) { + parse_options_usage(record_usage, record_options, "switch-output", 0); + return -EINVAL; + } + if (rec->switch_output.signal) rec->timestamp_filename = true; From dc0c6127c231d4d264570497a916fa19740c915b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:58 +0100 Subject: [PATCH 10/13] perf record: Add switch-output size option argument It's now possible to specify the threshold size for perf.data like: $ perf record --switch-output=2G ... Once it's reached, the current data are dumped in to the perf.data. file and session does on. $ perf record --switch-output=2G ... [ perf record: dump data: Woken up 7244 times ] [ perf record: Dump perf.data.2017010214093746 ] ... The size is expected to be a number with appended unit character - B/K/M/G. Signed-off-by: Jiri Olsa Acked-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 12 ++++- tools/perf/builtin-record.c | 67 +++++++++++++++++++----- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5054d9147f0f..3d55d2fd48b3 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -421,9 +421,17 @@ Configure all used events to run in user space. --timestamp-filename Append timestamp to output file name. ---switch-output:: +--switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one -when receiving a SIGUSR2. +based on 'mode' value: + "signal" - when receiving a SIGUSR2 (default value) or + - when reaching the size threshold, size is expected to + be a number with appended unit character - B/K/M/G + + Note: the precision of the size threshold hugely depends + on your configuration - the number and size of your ring + buffers (-m). It is generally more precise for higher sizes + (like >5M), for lower values expect different sizes. A possible use case is to, given an external event, slice the perf.data file that gets then processed, possibly via a perf script, to decide if that diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2bf811acaf8d..3fa64492ee62 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -47,7 +47,9 @@ #include struct switch_output { + bool enabled; bool signal; + unsigned long size; const char *str; bool set; }; @@ -72,6 +74,23 @@ struct record { unsigned long long samples; }; +static volatile int auxtrace_record__snapshot_started; +static DEFINE_TRIGGER(auxtrace_snapshot_trigger); +static DEFINE_TRIGGER(switch_output_trigger); + +static bool switch_output_signal(struct record *rec) +{ + return rec->switch_output.signal && + trigger_is_ready(&switch_output_trigger); +} + +static bool switch_output_size(struct record *rec) +{ + return rec->switch_output.size && + trigger_is_ready(&switch_output_trigger) && + (rec->bytes_written >= rec->switch_output.size); +} + static int record__write(struct record *rec, void *bf, size_t size) { if (perf_data_file__write(rec->session->file, bf, size) < 0) { @@ -80,6 +99,10 @@ static int record__write(struct record *rec, void *bf, size_t size) } rec->bytes_written += size; + + if (switch_output_size(rec)) + trigger_hit(&switch_output_trigger); + return 0; } @@ -199,10 +222,6 @@ static volatile int done; static volatile int signr = -1; static volatile int child_finished; -static volatile int auxtrace_record__snapshot_started; -static DEFINE_TRIGGER(auxtrace_snapshot_trigger); -static DEFINE_TRIGGER(switch_output_trigger); - static void sig_handler(int sig) { if (sig == SIGCHLD) @@ -848,11 +867,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); - if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.signal) { + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) trigger_on(&auxtrace_snapshot_trigger); - if (rec->switch_output.signal) + if (rec->switch_output.enabled) trigger_on(&switch_output_trigger); } else { signal(SIGUSR2, SIG_IGN); @@ -1361,6 +1380,14 @@ out_free: static int switch_output_setup(struct record *rec) { struct switch_output *s = &rec->switch_output; + static struct parse_tag tags_size[] = { + { .tag = 'B', .mult = 1 }, + { .tag = 'K', .mult = 1 << 10 }, + { .tag = 'M', .mult = 1 << 20 }, + { .tag = 'G', .mult = 1 << 30 }, + { .tag = 0 }, + }; + unsigned long val; if (!s->set) return 0; @@ -1368,10 +1395,22 @@ static int switch_output_setup(struct record *rec) if (!strcmp(s->str, "signal")) { s->signal = true; pr_debug("switch-output with SIGUSR2 signal\n"); - return 0; + goto enabled; + } + + val = parse_tag_value(s->str, tags_size); + if (val != (unsigned long) -1) { + s->size = val; + pr_debug("switch-output with %s size threshold\n", s->str); + goto enabled; } return -1; + +enabled: + rec->timestamp_filename = true; + s->enabled = true; + return 0; } static const char * const __record_usage[] = { @@ -1542,8 +1581,9 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, - &record.switch_output.set, "signal", - "Switch output when receive SIGUSR2", "signal"), + &record.switch_output.set, "signal,size", + "Switch output when receive SIGUSR2 or cross size threshold", + "signal"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), OPT_END() @@ -1606,9 +1646,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) return -EINVAL; } - if (rec->switch_output.signal) - rec->timestamp_filename = true; - if (!rec->itr) { rec->itr = auxtrace_record__init(rec->evlist, &err); if (err) @@ -1657,7 +1694,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); - } else if (rec->switch_output.signal) { + } else if (rec->switch_output.enabled) { /* * In 'perf record --switch-output', disable buildid * generation by default to reduce data file switching @@ -1749,6 +1786,8 @@ out: static void snapshot_sig_handler(int sig __maybe_unused) { + struct record *rec = &record; + if (trigger_is_ready(&auxtrace_snapshot_trigger)) { trigger_hit(&auxtrace_snapshot_trigger); auxtrace_record__snapshot_started = 1; @@ -1756,6 +1795,6 @@ static void snapshot_sig_handler(int sig __maybe_unused) trigger_error(&auxtrace_snapshot_trigger); } - if (trigger_is_ready(&switch_output_trigger)) + if (switch_output_signal(rec)) trigger_hit(&switch_output_trigger); } From 0c5824498e8bd5b7d30dc03448cd89efaee4bead Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:51:59 +0100 Subject: [PATCH 11/13] perf record: Add switch-output size warning Adding switch-output size warning if the requested size of lower than the wakeup ring buffer size. $ perf record --switch-output=1K ls WARNING: switch-output data size lower than wakeup kernel buffer size (258K) expect bigger perf.data sizes ... Signed-off-by: Jiri Olsa Suggested-and-Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1483955520-29063-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 21 +++++++++++++++++++++ tools/perf/util/evlist.c | 2 +- tools/perf/util/evlist.h | 2 ++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3fa64492ee62..93319e1be3ac 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1377,6 +1377,23 @@ out_free: return ret; } +static void switch_output_size_warn(struct record *rec) +{ + u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); + struct switch_output *s = &rec->switch_output; + + wakeup_size /= 2; + + if (s->size < wakeup_size) { + char buf[100]; + + unit_number__scnprintf(buf, sizeof(buf), wakeup_size); + pr_warning("WARNING: switch-output data size lower than " + "wakeup kernel buffer size (%s) " + "expect bigger perf.data sizes\n", buf); + } +} + static int switch_output_setup(struct record *rec) { struct switch_output *s = &rec->switch_output; @@ -1410,6 +1427,10 @@ static int switch_output_setup(struct record *rec) enabled: rec->timestamp_filename = true; s->enabled = true; + + if (s->size && !rec->opts.no_buffering) + switch_output_size_warn(rec); + return 0; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dc4df3d2660e..b601f2814a30 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1184,7 +1184,7 @@ unsigned long perf_event_mlock_kb_in_pages(void) return pages; } -static size_t perf_evlist__mmap_size(unsigned long pages) +size_t perf_evlist__mmap_size(unsigned long pages) { if (pages == UINT_MAX) pages = perf_event_mlock_kb_in_pages(); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4fd034f22d2f..389b9ccdf8c7 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -218,6 +218,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); +size_t perf_evlist__mmap_size(unsigned long pages); + void perf_evlist__disable(struct perf_evlist *evlist); void perf_evlist__enable(struct perf_evlist *evlist); void perf_evlist__toggle_enable(struct perf_evlist *evlist); From bfacbe3bf2443c805aec4c04ecb558d03d0d3ebc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jan 2017 10:52:00 +0100 Subject: [PATCH 12/13] perf record: Add switch-output time option argument It's now possible to specify the threshold time for perf.data like: $ perf record --switch-output=30s ... Once it's reached, the current data are dumped in to the perf.data. file and session does on. $ perf record --switch-output=30s ... [ perf record: dump data: Woken up 44 times ] [ perf record: Dump perf.data.2017010213043746 ] ... The time is expected to be a number with appended unit character - s/m/h/d. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Acked-by: Wang Nan Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1483955520-29063-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 2 ++ tools/perf/builtin-record.c | 44 ++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 3d55d2fd48b3..27256bc68eda 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -427,6 +427,8 @@ based on 'mode' value: "signal" - when receiving a SIGUSR2 (default value) or - when reaching the size threshold, size is expected to be a number with appended unit character - B/K/M/G +