linux/arch/arm/kernel/perf_callchain.c
Arnaldo Carvalho de Melo c5dfd78eb7 perf core: Allow setting up max frame stack depth via sysctl
The default remains 127, which is good for most cases, and not even hit
most of the time, but then for some cases, as reported by Brendan, 1024+
deep frames are appearing on the radar for things like groovy, ruby.

And in some workloads putting a _lower_ cap on this may make sense. One
that is per event still needs to be put in place tho.

The new file is:

  # cat /proc/sys/kernel/perf_event_max_stack
  127

Chaging it:

  # echo 256 > /proc/sys/kernel/perf_event_max_stack
  # cat /proc/sys/kernel/perf_event_max_stack
  256

But as soon as there is some event using callchains we get:

  # echo 512 > /proc/sys/kernel/perf_event_max_stack
  -bash: echo: write error: Device or resource busy
  #

Because we only allocate the callchain percpu data structures when there
is a user, which allows for changing the max easily, its just a matter
of having no callchain users at that point.

Reported-and-Tested-by: Brendan Gregg <brendan.d.gregg@gmail.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/20160426002928.GB16708@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-27 10:20:39 -03:00

137 lines
3.1 KiB
C

/*
* ARM callchain support
*
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
* Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
*
* This code is based on the ARM OProfile backtrace code.
*/
#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <asm/stacktrace.h>
/*
* The registers we're interested in are at the end of the variable
* length saved register structure. The fp points at the end of this
* structure so the address of this struct is:
* (struct frame_tail *)(xxx->fp)-1
*
* This code has been adapted from the ARM OProfile support.
*/
struct frame_tail {
struct frame_tail __user *fp;
unsigned long sp;
unsigned long lr;
} __attribute__((packed));
/*
* Get the return address for a single stackframe and return a pointer to the
* next frame tail.
*/
static struct frame_tail __user *
user_backtrace(struct frame_tail __user *tail,
struct perf_callchain_entry *entry)
{
struct frame_tail buftail;
unsigned long err;
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
return NULL;
pagefault_disable();
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
pagefault_enable();
if (err)
return NULL;
perf_callchain_store(entry, buftail.lr);
/*
* Frame pointers should strictly progress back up the stack
* (towards higher addresses).
*/
if (tail + 1 >= buftail.fp)
return NULL;
return buftail.fp - 1;
}
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct frame_tail __user *tail;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
perf_callchain_store(entry, regs->ARM_pc);
if (!current->mm)
return;
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
while ((entry->nr < sysctl_perf_event_max_stack) &&
tail && !((unsigned long)tail & 0x3))
tail = user_backtrace(tail, entry);
}
/*
* Gets called by walk_stackframe() for every stackframe. This will be called
* whist unwinding the stackframe and is like a subroutine return so we use
* the PC.
*/
static int
callchain_trace(struct stackframe *fr,
void *data)
{
struct perf_callchain_entry *entry = data;
perf_callchain_store(entry, fr->pc);
return 0;
}
void
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct stackframe fr;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
arm_get_current_stackframe(regs, &fr);
walk_stackframe(&fr, callchain_trace, entry);
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
return perf_guest_cbs->get_guest_ip();
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
int misc = 0;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_cbs->is_user_mode())
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
}
return misc;
}