From 3912f2abc942a002ef611fc973add5e5eadb3432 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 14 Dec 2009 03:23:56 -0500 Subject: [PATCH 001/640] perf: Use format string of printf to align strings Instead of filling whitespaces to do alignment, use printf's format string. This simplifies the code a bit. Signed-off-by: WANG Cong Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091214082700.4224.57640.sendpatchset@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/builtin-help.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 9f810b17c25c..e427d6965e0c 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -286,8 +286,7 @@ void list_common_cmds_help(void) puts(" The most commonly used perf commands are:"); for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { - printf(" %s ", common_cmds[i].name); - mput_char(' ', longest - strlen(common_cmds[i].name)); + printf(" %-*s ", longest, common_cmds[i].name); puts(common_cmds[i].help); } } From 06aae590033d1ae3c35b2920ef950cfc603e2a2d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:36:59 -0200 Subject: [PATCH 002/640] perf session: Move the event processing routines to session.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need for an extra "data_map" file since the routines there operate mainly on a perf_session instance. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 - tools/perf/util/data_map.c | 252 ------------------------------------- tools/perf/util/session.c | 245 ++++++++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+), 253 deletions(-) delete mode 100644 tools/perf/util/data_map.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 652a470b5f74..4172c3b0e4a7 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -423,7 +423,6 @@ LIB_OBJS += util/trace-event-perl.o LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o -LIB_OBJS += util/data_map.o LIB_OBJS += util/probe-event.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c deleted file mode 100644 index b557b836de3d..000000000000 --- a/tools/perf/util/data_map.c +++ /dev/null @@ -1,252 +0,0 @@ -#include "symbol.h" -#include "util.h" -#include "debug.h" -#include "thread.h" -#include "session.h" - -static int process_event_stub(event_t *event __used, - struct perf_session *session __used) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) -{ - if (!handler->process_sample_event) - handler->process_sample_event = process_event_stub; - if (!handler->process_mmap_event) - handler->process_mmap_event = process_event_stub; - if (!handler->process_comm_event) - handler->process_comm_event = process_event_stub; - if (!handler->process_fork_event) - handler->process_fork_event = process_event_stub; - if (!handler->process_exit_event) - handler->process_exit_event = process_event_stub; - if (!handler->process_lost_event) - handler->process_lost_event = process_event_stub; - if (!handler->process_read_event) - handler->process_read_event = process_event_stub; - if (!handler->process_throttle_event) - handler->process_throttle_event = process_event_stub; - if (!handler->process_unthrottle_event) - handler->process_unthrottle_event = process_event_stub; -} - -static const char *event__name[] = { - [0] = "TOTAL", - [PERF_RECORD_MMAP] = "MMAP", - [PERF_RECORD_LOST] = "LOST", - [PERF_RECORD_COMM] = "COMM", - [PERF_RECORD_EXIT] = "EXIT", - [PERF_RECORD_THROTTLE] = "THROTTLE", - [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE", - [PERF_RECORD_FORK] = "FORK", - [PERF_RECORD_READ] = "READ", - [PERF_RECORD_SAMPLE] = "SAMPLE", -}; - -unsigned long event__total[PERF_RECORD_MAX]; - -void event__print_totals(void) -{ - int i; - for (i = 0; i < PERF_RECORD_MAX; ++i) - pr_info("%10s events: %10ld\n", - event__name[i], event__total[i]); -} - -static int process_event(event_t *event, struct perf_session *session, - struct perf_event_ops *ops, - unsigned long offset, unsigned long head) -{ - trace_event(event); - - if (event->header.type < PERF_RECORD_MAX) { - dump_printf("%p [%p]: PERF_RECORD_%s", - (void *)(offset + head), - (void *)(long)(event->header.size), - event__name[event->header.type]); - ++event__total[0]; - ++event__total[event->header.type]; - } - - switch (event->header.type) { - case PERF_RECORD_SAMPLE: - return ops->process_sample_event(event, session); - case PERF_RECORD_MMAP: - return ops->process_mmap_event(event, session); - case PERF_RECORD_COMM: - return ops->process_comm_event(event, session); - case PERF_RECORD_FORK: - return ops->process_fork_event(event, session); - case PERF_RECORD_EXIT: - return ops->process_exit_event(event, session); - case PERF_RECORD_LOST: - return ops->process_lost_event(event, session); - case PERF_RECORD_READ: - return ops->process_read_event(event, session); - case PERF_RECORD_THROTTLE: - return ops->process_throttle_event(event, session); - case PERF_RECORD_UNTHROTTLE: - return ops->process_unthrottle_event(event, session); - default: - ops->total_unknown++; - return -1; - } -} - -int perf_header__read_build_ids(int input, u64 offset, u64 size) -{ - struct build_id_event bev; - char filename[PATH_MAX]; - u64 limit = offset + size; - int err = -1; - - while (offset < limit) { - struct dso *dso; - ssize_t len; - - if (read(input, &bev, sizeof(bev)) != sizeof(bev)) - goto out; - - len = bev.header.size - sizeof(bev); - if (read(input, filename, len) != len) - goto out; - - dso = dsos__findnew(filename); - if (dso != NULL) - dso__set_build_id(dso, &bev.build_id); - - offset += bev.header.size; - } - err = 0; -out: - return err; -} - -static struct thread *perf_session__register_idle_thread(struct perf_session *self) -{ - struct thread *thread = perf_session__findnew(self, 0); - - if (!thread || thread__set_comm(thread, "swapper")) { - pr_err("problem inserting idle task.\n"); - thread = NULL; - } - - return thread; -} - -int perf_session__process_events(struct perf_session *self, - struct perf_event_ops *ops) -{ - int err; - unsigned long head, shift; - unsigned long offset = 0; - size_t page_size; - event_t *event; - uint32_t size; - char *buf; - - if (perf_session__register_idle_thread(self) == NULL) - return -ENOMEM; - - perf_event_ops__fill_defaults(ops); - - page_size = getpagesize(); - - head = self->header.data_offset; - self->sample_type = perf_header__sample_type(&self->header); - - err = -EINVAL; - if (ops->sample_type_check && ops->sample_type_check(self) < 0) - goto out_err; - - if (!ops->full_paths) { - char bf[PATH_MAX]; - - if (getcwd(bf, sizeof(bf)) == NULL) { - err = -errno; -out_getcwd_err: - pr_err("failed to get the current directory\n"); - goto out_err; - } - self->cwd = strdup(bf); - if (self->cwd == NULL) { - err = -ENOMEM; - goto out_getcwd_err; - } - self->cwdlen = strlen(self->cwd); - } - - shift = page_size * (head / page_size); - offset += shift; - head -= shift; - -remap: - buf = mmap(NULL, page_size * self->mmap_window, PROT_READ, - MAP_SHARED, self->fd, offset); - if (buf == MAP_FAILED) { - pr_err("failed to mmap file\n"); - err = -errno; - goto out_err; - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * self->mmap_window) { - int munmap_ret; - - shift = page_size * (head / page_size); - - munmap_ret = munmap(buf, page_size * self->mmap_window); - assert(munmap_ret == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - dump_printf("\n%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); - - if (!size || process_event(event, self, ops, offset, head) < 0) { - - dump_printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; - } - - head += size; - - if (offset + head >= self->header.data_offset + self->header.data_size) - goto done; - - if (offset + head < self->size) - goto more; - -done: - err = 0; -out_err: - return err; -} diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ce3a6c8abe76..736d4fda9272 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -148,3 +148,248 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self, return syms; } + +static int process_event_stub(event_t *event __used, + struct perf_session *session __used) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) +{ + if (handler->process_sample_event == NULL) + handler->process_sample_event = process_event_stub; + if (handler->process_mmap_event == NULL) + handler->process_mmap_event = process_event_stub; + if (handler->process_comm_event == NULL) + handler->process_comm_event = process_event_stub; + if (handler->process_fork_event == NULL) + handler->process_fork_event = process_event_stub; + if (handler->process_exit_event == NULL) + handler->process_exit_event = process_event_stub; + if (handler->process_lost_event == NULL) + handler->process_lost_event = process_event_stub; + if (handler->process_read_event == NULL) + handler->process_read_event = process_event_stub; + if (handler->process_throttle_event == NULL) + handler->process_throttle_event = process_event_stub; + if (handler->process_unthrottle_event == NULL) + handler->process_unthrottle_event = process_event_stub; +} + +static const char *event__name[] = { + [0] = "TOTAL", + [PERF_RECORD_MMAP] = "MMAP", + [PERF_RECORD_LOST] = "LOST", + [PERF_RECORD_COMM] = "COMM", + [PERF_RECORD_EXIT] = "EXIT", + [PERF_RECORD_THROTTLE] = "THROTTLE", + [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE", + [PERF_RECORD_FORK] = "FORK", + [PERF_RECORD_READ] = "READ", + [PERF_RECORD_SAMPLE] = "SAMPLE", +}; + +unsigned long event__total[PERF_RECORD_MAX]; + +void event__print_totals(void) +{ + int i; + for (i = 0; i < PERF_RECORD_MAX; ++i) + pr_info("%10s events: %10ld\n", + event__name[i], event__total[i]); +} + +static int perf_session__process_event(struct perf_session *self, + event_t *event, + struct perf_event_ops *ops, + unsigned long offset, unsigned long head) +{ + trace_event(event); + + if (event->header.type < PERF_RECORD_MAX) { + dump_printf("%p [%p]: PERF_RECORD_%s", + (void *)(offset + head), + (void *)(long)(event->header.size), + event__name[event->header.type]); + ++event__total[0]; + ++event__total[event->header.type]; + } + + switch (event->header.type) { + case PERF_RECORD_SAMPLE: + return ops->process_sample_event(event, self); + case PERF_RECORD_MMAP: + return ops->process_mmap_event(event, self); + case PERF_RECORD_COMM: + return ops->process_comm_event(event, self); + case PERF_RECORD_FORK: + return ops->process_fork_event(event, self); + case PERF_RECORD_EXIT: + return ops->process_exit_event(event, self); + case PERF_RECORD_LOST: + return ops->process_lost_event(event, self); + case PERF_RECORD_READ: + return ops->process_read_event(event, self); + case PERF_RECORD_THROTTLE: + return ops->process_throttle_event(event, self); + case PERF_RECORD_UNTHROTTLE: + return ops->process_unthrottle_event(event, self); + default: + ops->total_unknown++; + return -1; + } +} + +int perf_header__read_build_ids(int input, u64 offset, u64 size) +{ + struct build_id_event bev; + char filename[PATH_MAX]; + u64 limit = offset + size; + int err = -1; + + while (offset < limit) { + struct dso *dso; + ssize_t len; + + if (read(input, &bev, sizeof(bev)) != sizeof(bev)) + goto out; + + len = bev.header.size - sizeof(bev); + if (read(input, filename, len) != len) + goto out; + + dso = dsos__findnew(filename); + if (dso != NULL) + dso__set_build_id(dso, &bev.build_id); + + offset += bev.header.size; + } + err = 0; +out: + return err; +} + +static struct thread *perf_session__register_idle_thread(struct perf_session *self) +{ + struct thread *thread = perf_session__findnew(self, 0); + + if (thread == NULL || thread__set_comm(thread, "swapper")) { + pr_err("problem inserting idle task.\n"); + thread = NULL; + } + + return thread; +} + +int perf_session__process_events(struct perf_session *self, + struct perf_event_ops *ops) +{ + int err; + unsigned long head, shift; + unsigned long offset = 0; + size_t page_size; + event_t *event; + uint32_t size; + char *buf; + + if (perf_session__register_idle_thread(self) == NULL) + return -ENOMEM; + + perf_event_ops__fill_defaults(ops); + + page_size = getpagesize(); + + head = self->header.data_offset; + self->sample_type = perf_header__sample_type(&self->header); + + err = -EINVAL; + if (ops->sample_type_check && ops->sample_type_check(self) < 0) + goto out_err; + + if (!ops->full_paths) { + char bf[PATH_MAX]; + + if (getcwd(bf, sizeof(bf)) == NULL) { + err = -errno; +out_getcwd_err: + pr_err("failed to get the current directory\n"); + goto out_err; + } + self->cwd = strdup(bf); + if (self->cwd == NULL) { + err = -ENOMEM; + goto out_getcwd_err; + } + self->cwdlen = strlen(self->cwd); + } + + shift = page_size * (head / page_size); + offset += shift; + head -= shift; + +remap: + buf = mmap(NULL, page_size * self->mmap_window, PROT_READ, + MAP_SHARED, self->fd, offset); + if (buf == MAP_FAILED) { + pr_err("failed to mmap file\n"); + err = -errno; + goto out_err; + } + +more: + event = (event_t *)(buf + head); + + size = event->header.size; + if (size == 0) + size = 8; + + if (head + event->header.size >= page_size * self->mmap_window) { + int munmap_ret; + + shift = page_size * (head / page_size); + + munmap_ret = munmap(buf, page_size * self->mmap_window); + assert(munmap_ret == 0); + + offset += shift; + head -= shift; + goto remap; + } + + size = event->header.size; + + dump_printf("\n%p [%p]: event: %d\n", + (void *)(offset + head), + (void *)(long)event->header.size, + event->header.type); + + if (size == 0 || + perf_session__process_event(self, event, ops, offset, head) < 0) { + dump_printf("%p [%p]: skipping unknown header type: %d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.type); + /* + * assume we lost track of the stream, check alignment, and + * increment a single u64 in the hope to catch on again 'soon'. + */ + if (unlikely(head & 7)) + head &= ~7ULL; + + size = 8; + } + + head += size; + + if (offset + head >= self->header.data_offset + self->header.data_size) + goto done; + + if (offset + head < self->size) + goto more; +done: + err = 0; +out_err: + return err; +} From 4a58e61161074776aa34187ea369414ce4852394 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:37:00 -0200 Subject: [PATCH 003/640] perf tools: Move the map class definition to a separate header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And this resulted in the need for adding some missing includes in some places that were getting the definitions needed out of sheer luck. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/util/debug.c | 1 + tools/perf/util/event.h | 65 ++---------------------------- tools/perf/util/map.h | 73 ++++++++++++++++++++++++++++++++++ tools/perf/util/probe-finder.h | 2 + 5 files changed, 80 insertions(+), 62 deletions(-) create mode 100644 tools/perf/util/map.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4172c3b0e4a7..fafea0b6f323 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -357,6 +357,7 @@ LIB_H += util/event.h LIB_H += util/exec_cmd.h LIB_H += util/types.h LIB_H += util/levenshtein.h +LIB_H += util/map.h LIB_H += util/parse-options.h LIB_H += util/parse-events.h LIB_H += util/quote.h diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 28d520d5a1fb..0905600c3851 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -9,6 +9,7 @@ #include "color.h" #include "event.h" #include "debug.h" +#include "util.h" int verbose = 0; int dump_trace = 0; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 690a96d0467c..80fb3653c809 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -1,10 +1,10 @@ #ifndef __PERF_RECORD_H #define __PERF_RECORD_H +#include + #include "../perf.h" -#include "util.h" -#include -#include +#include "map.h" /* * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * @@ -101,67 +101,8 @@ struct events_stats { void event__print_totals(void); -enum map_type { - MAP__FUNCTION = 0, - MAP__VARIABLE, -}; - -#define MAP__NR_TYPES (MAP__VARIABLE + 1) - -struct map { - union { - struct rb_node rb_node; - struct list_head node; - }; - u64 start; - u64 end; - enum map_type type; - u64 pgoff; - u64 (*map_ip)(struct map *, u64); - u64 (*unmap_ip)(struct map *, u64); - struct dso *dso; -}; - -static inline u64 map__map_ip(struct map *map, u64 ip) -{ - return ip - map->start + map->pgoff; -} - -static inline u64 map__unmap_ip(struct map *map, u64 ip) -{ - return ip + map->start - map->pgoff; -} - -static inline u64 identity__map_ip(struct map *map __used, u64 ip) -{ - return ip; -} - -struct symbol; - -typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); - -void map__init(struct map *self, enum map_type type, - u64 start, u64 end, u64 pgoff, struct dso *dso); -struct map *map__new(struct mmap_event *event, enum map_type, - char *cwd, int cwdlen); -void map__delete(struct map *self); -struct map *map__clone(struct map *self); -int map__overlap(struct map *l, struct map *r); -size_t map__fprintf(struct map *self, FILE *fp); - struct perf_session; -int map__load(struct map *self, struct perf_session *session, - symbol_filter_t filter); -struct symbol *map__find_symbol(struct map *self, struct perf_session *session, - u64 addr, symbol_filter_t filter); -struct symbol *map__find_symbol_by_name(struct map *self, const char *name, - struct perf_session *session, - symbol_filter_t filter); -void map__fixup_start(struct map *self); -void map__fixup_end(struct map *self); - int event__synthesize_thread(pid_t pid, int (*process)(event_t *event, struct perf_session *session), diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h new file mode 100644 index 000000000000..72f0b6ab5ea5 --- /dev/null +++ b/tools/perf/util/map.h @@ -0,0 +1,73 @@ +#ifndef __PERF_MAP_H +#define __PERF_MAP_H + +#include +#include +#include +#include + +enum map_type { + MAP__FUNCTION = 0, + MAP__VARIABLE, +}; + +#define MAP__NR_TYPES (MAP__VARIABLE + 1) + +struct dso; + +struct map { + union { + struct rb_node rb_node; + struct list_head node; + }; + u64 start; + u64 end; + enum map_type type; + u64 pgoff; + u64 (*map_ip)(struct map *, u64); + u64 (*unmap_ip)(struct map *, u64); + struct dso *dso; +}; + +static inline u64 map__map_ip(struct map *map, u64 ip) +{ + return ip - map->start + map->pgoff; +} + +static inline u64 map__unmap_ip(struct map *map, u64 ip) +{ + return ip + map->start - map->pgoff; +} + +static inline u64 identity__map_ip(struct map *map __used, u64 ip) +{ + return ip; +} + +struct symbol; +struct mmap_event; + +typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); + +void map__init(struct map *self, enum map_type type, + u64 start, u64 end, u64 pgoff, struct dso *dso); +struct map *map__new(struct mmap_event *event, enum map_type, + char *cwd, int cwdlen); +void map__delete(struct map *self); +struct map *map__clone(struct map *self); +int map__overlap(struct map *l, struct map *r); +size_t map__fprintf(struct map *self, FILE *fp); + +struct perf_session; + +int map__load(struct map *self, struct perf_session *session, + symbol_filter_t filter); +struct symbol *map__find_symbol(struct map *self, struct perf_session *session, + u64 addr, symbol_filter_t filter); +struct symbol *map__find_symbol_by_name(struct map *self, const char *name, + struct perf_session *session, + symbol_filter_t filter); +void map__fixup_start(struct map *self); +void map__fixup_end(struct map *self); + +#endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index a4086aaddb73..e3f396806e6e 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -1,6 +1,8 @@ #ifndef _PROBE_FINDER_H #define _PROBE_FINDER_H +#include "util.h" + #define MAX_PATH_LEN 256 #define MAX_PROBE_BUFFER 1024 #define MAX_PROBES 128 From 27295592c22e71bbd38110c302da8dbb43912a60 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:37:01 -0200 Subject: [PATCH 004/640] perf session: Share the common trace sample_check routine as perf_session__has_traces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-kmem.c | 14 +------------- tools/perf/builtin-sched.c | 14 +------------- tools/perf/builtin-timechart.c | 13 +------------ tools/perf/builtin-trace.c | 14 +------------- tools/perf/util/session.c | 11 +++++++++++ tools/perf/util/session.h | 2 ++ 6 files changed, 17 insertions(+), 51 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index fc21ad79dd83..a85936f09f3e 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -342,22 +342,10 @@ static int process_sample_event(event_t *event, struct perf_session *session) return 0; } -static int sample_type_check(struct perf_session *session) -{ - if (!(session->sample_type & PERF_SAMPLE_RAW)) { - fprintf(stderr, - "No trace sample to read. Did you call perf record " - "without -R?"); - return -1; - } - - return 0; -} - static struct perf_event_ops event_ops = { .process_sample_event = process_sample_event, .process_comm_event = event__process_comm, - .sample_type_check = sample_type_check, + .sample_type_check = perf_session__has_traces, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 80209df6cfe8..d65098c42990 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1653,23 +1653,11 @@ static int process_lost_event(event_t *event __used, return 0; } -static int sample_type_check(struct perf_session *session __used) -{ - if (!(session->sample_type & PERF_SAMPLE_RAW)) { - fprintf(stderr, - "No trace sample to read. Did you call perf record " - "without -R?"); - return -1; - } - - return 0; -} - static struct perf_event_ops event_ops = { .process_sample_event = process_sample_event, .process_comm_event = event__process_comm, .process_lost_event = process_lost_event, - .sample_type_check = sample_type_check, + .sample_type_check = perf_session__has_traces, }; static int read_events(void) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index a589a43112d6..b42f337c17d9 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1029,23 +1029,12 @@ static void process_samples(struct perf_session *session) } } -static int sample_type_check(struct perf_session *session) -{ - if (!(session->sample_type & PERF_SAMPLE_RAW)) { - fprintf(stderr, "No trace samples found in the file.\n" - "Have you used 'perf timechart record' to record it?\n"); - return -1; - } - - return 0; -} - static struct perf_event_ops event_ops = { .process_comm_event = process_comm_event, .process_fork_event = process_fork_event, .process_exit_event = process_exit_event, .process_sample_event = queue_sample_event, - .sample_type_check = sample_type_check, + .sample_type_check = perf_session__has_traces, }; static int __cmd_timechart(void) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 574a215e800b..b0ba2ac37e2c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -103,22 +103,10 @@ static int process_sample_event(event_t *event, struct perf_session *session) return 0; } -static int sample_type_check(struct perf_session *session) -{ - if (!(session->sample_type & PERF_SAMPLE_RAW)) { - fprintf(stderr, - "No trace sample to read. Did you call perf record " - "without -R?"); - return -1; - } - - return 0; -} - static struct perf_event_ops event_ops = { .process_sample_event = process_sample_event, .process_comm_event = event__process_comm, - .sample_type_check = sample_type_check, + .sample_type_check = perf_session__has_traces, }; static int __cmd_trace(struct perf_session *session) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 736d4fda9272..60eab8b3ff34 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -393,3 +393,14 @@ done: out_err: return err; } + +int perf_session__has_traces(struct perf_session *self) +{ + if (!(self->sample_type & PERF_SAMPLE_RAW)) { + pr_err("No trace sample to read. Did you call perf record " + "without -R?"); + return -1; + } + + return 0; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 32eaa1bada06..a6951d2f700f 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -56,6 +56,8 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self, struct ip_callchain *chain, struct symbol **parent); +int perf_session__has_traces(struct perf_session *self); + int perf_header__read_build_ids(int input, u64 offset, u64 file_size); #endif /* __PERF_SESSION_H */ From d549c7690190d9739005e19604faad6da4b802ac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:37:02 -0200 Subject: [PATCH 005/640] perf session: Remove sample_type_check from event_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is really something tools need to do before asking for the events to be processed, leaving perf_session__process_events to do just that, process events. Also add a msg parameter to perf_session__has_traces() so that the right message can be printed, fixing a regression added by me in the previous cset (right timechart message) and also fixing 'perf kmem', that was not asking if 'perf kmem record' was ran. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-6-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-kmem.c | 6 ++++-- tools/perf/builtin-report.c | 16 +++++++++------- tools/perf/builtin-sched.c | 7 ++++--- tools/perf/builtin-timechart.c | 6 ++++-- tools/perf/builtin-trace.c | 4 +++- tools/perf/util/session.c | 16 ++++++---------- tools/perf/util/session.h | 3 +-- 7 files changed, 31 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a85936f09f3e..73b065022e27 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -345,7 +345,6 @@ static int process_sample_event(event_t *event, struct perf_session *session) static struct perf_event_ops event_ops = { .process_sample_event = process_sample_event, .process_comm_event = event__process_comm, - .sample_type_check = perf_session__has_traces, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) @@ -492,11 +491,14 @@ static void sort_result(void) static int __cmd_kmem(void) { - int err; + int err = -EINVAL; struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0); if (session == NULL) return -ENOMEM; + if (!perf_session__has_traces(session, "kmem record")) + goto out_delete; + setup_pager(); err = perf_session__process_events(session, &event_ops); if (err != 0) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index db10c0e8ecae..08259184cedb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -156,14 +156,14 @@ static int process_read_event(event_t *event, struct perf_session *session __use return 0; } -static int sample_type_check(struct perf_session *session) +static int perf_session__setup_sample_type(struct perf_session *self) { - if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { fprintf(stderr, "selected --sort parent, but no" " callchain data. Did you call" " perf record without -g?\n"); - return -1; + return -EINVAL; } if (symbol_conf.use_callchain) { fprintf(stderr, "selected -g but no callchain data." @@ -176,7 +176,7 @@ static int sample_type_check(struct perf_session *session) if (register_callchain_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain" " params\n"); - return -1; + return -EINVAL; } } @@ -191,13 +191,11 @@ static struct perf_event_ops event_ops = { .process_fork_event = event__process_task, .process_lost_event = event__process_lost, .process_read_event = process_read_event, - .sample_type_check = sample_type_check, }; - static int __cmd_report(void) { - int ret; + int ret = -EINVAL; struct perf_session *session; session = perf_session__new(input_name, O_RDONLY, force); @@ -207,6 +205,10 @@ static int __cmd_report(void) if (show_threads) perf_read_values_init(&show_threads_values); + ret = perf_session__setup_sample_type(session); + if (ret) + goto out_delete; + ret = perf_session__process_events(session, &event_ops); if (ret) goto out_delete; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d65098c42990..e862e71f4e68 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1657,17 +1657,18 @@ static struct perf_event_ops event_ops = { .process_sample_event = process_sample_event, .process_comm_event = event__process_comm, .process_lost_event = process_lost_event, - .sample_type_check = perf_session__has_traces, }; static int read_events(void) { - int err; + int err = -EINVAL; struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0); if (session == NULL) return -ENOMEM; - err = perf_session__process_events(session, &event_ops); + if (perf_session__has_traces(session, "record -R")) + err = perf_session__process_events(session, &event_ops); + perf_session__delete(session); return err; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index b42f337c17d9..825283794985 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1034,17 +1034,19 @@ static struct perf_event_ops event_ops = { .process_fork_event = process_fork_event, .process_exit_event = process_exit_event, .process_sample_event = queue_sample_event, - .sample_type_check = perf_session__has_traces, }; static int __cmd_timechart(void) { struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0); - int ret; + int ret = -EINVAL; if (session == NULL) return -ENOMEM; + if (!perf_session__has_traces(session, "timechart record")) + goto out_delete; + ret = perf_session__process_events(session, &event_ops); if (ret) goto out_delete; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b0ba2ac37e2c..e94f34631585 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -106,7 +106,6 @@ static int process_sample_event(event_t *event, struct perf_session *session) static struct perf_event_ops event_ops = { .process_sample_event = process_sample_event, .process_comm_event = event__process_comm, - .sample_type_check = perf_session__has_traces, }; static int __cmd_trace(struct perf_session *session) @@ -580,6 +579,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) if (session == NULL) return -ENOMEM; + if (!perf_session__has_traces(session, "record -R")) + return -EINVAL; + if (generate_script_lang) { struct stat perf_stat; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 60eab8b3ff34..bc84a5217955 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -73,6 +73,8 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc if (mode == O_RDONLY && perf_session__open(self, force) < 0) goto out_delete; + + self->sample_type = perf_header__sample_type(&self->header); out: return self; out_free: @@ -302,11 +304,6 @@ int perf_session__process_events(struct perf_session *self, page_size = getpagesize(); head = self->header.data_offset; - self->sample_type = perf_header__sample_type(&self->header); - - err = -EINVAL; - if (ops->sample_type_check && ops->sample_type_check(self) < 0) - goto out_err; if (!ops->full_paths) { char bf[PATH_MAX]; @@ -394,13 +391,12 @@ out_err: return err; } -int perf_session__has_traces(struct perf_session *self) +bool perf_session__has_traces(struct perf_session *self, const char *msg) { if (!(self->sample_type & PERF_SAMPLE_RAW)) { - pr_err("No trace sample to read. Did you call perf record " - "without -R?"); - return -1; + pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg); + return false; } - return 0; + return true; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index a6951d2f700f..5771ccb3fe03 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -40,7 +40,6 @@ struct perf_event_ops { event_op process_read_event; event_op process_throttle_event; event_op process_unthrottle_event; - int (*sample_type_check)(struct perf_session *session); unsigned long total_unknown; bool full_paths; }; @@ -56,7 +55,7 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self, struct ip_callchain *chain, struct symbol **parent); -int perf_session__has_traces(struct perf_session *self); +bool perf_session__has_traces(struct perf_session *self, const char *msg); int perf_header__read_build_ids(int input, u64 offset, u64 file_size); From 31d337c4ee3152b7271897eae576251643f5a3b5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:37:03 -0200 Subject: [PATCH 006/640] perf session: Move total_unknown to perf_session->unknown events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As this is a session property, not belonging to perf_event_ops, that can be shared by many perf_session instances. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-7-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/session.c | 3 ++- tools/perf/util/session.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index bc84a5217955..4ca427f73994 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -66,6 +66,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc self->mmap_window = 32; self->cwd = NULL; self->cwdlen = 0; + self->unknown_events = 0; map_groups__init(&self->kmaps); if (perf_session__create_kernel_maps(self) < 0) @@ -239,7 +240,7 @@ static int perf_session__process_event(struct perf_session *self, case PERF_RECORD_UNTHROTTLE: return ops->process_unthrottle_event(event, self); default: - ops->total_unknown++; + self->unknown_events++; return -1; } } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5771ccb3fe03..585937b6f9ee 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,6 +20,7 @@ struct perf_session { struct thread *last_match; struct events_stats events_stats; unsigned long event_total[PERF_RECORD_MAX]; + unsigned long unknown_events; struct rb_root hists; u64 sample_type; int fd; @@ -40,7 +41,6 @@ struct perf_event_ops { event_op process_read_event; event_op process_throttle_event; event_op process_unthrottle_event; - unsigned long total_unknown; bool full_paths; }; From f7d87444e6ee6f4a19634e5412664c1c529a2370 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:37:04 -0200 Subject: [PATCH 007/640] perf session: Move full_paths config to symbol_conf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now perf_event_ops has just that, event handlers. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-8-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/util/session.c | 2 +- tools/perf/util/session.h | 1 - tools/perf/util/symbol.h | 3 ++- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index bd71b8ceafb7..e164b3d45cd4 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -204,7 +204,7 @@ static const struct option options[] = { OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), - OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths, + OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths, "Don't shorten the pathnames taking into account the cwd"), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 08259184cedb..f695084910c0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -321,7 +321,7 @@ static const struct option options[] = { "pretty printing style key: normal raw"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), - OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths, + OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths, "Don't shorten the pathnames taking into account the cwd"), OPT_STRING('p', "parent", &parent_pattern, "regex", "regex filter to identify parent, see: '--sort parent'"), diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4ca427f73994..4f2eeb584da8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -306,7 +306,7 @@ int perf_session__process_events(struct perf_session *self, head = self->header.data_offset; - if (!ops->full_paths) { + if (!symbol_conf.full_paths) { char bf[PATH_MAX]; if (getcwd(bf, sizeof(bf)) == NULL) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 585937b6f9ee..2ff77fea06ef 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -41,7 +41,6 @@ struct perf_event_ops { event_op process_read_event; event_op process_throttle_event; event_op process_unthrottle_event; - bool full_paths; }; struct perf_session *perf_session__new(const char *filename, int mode, bool force); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 8aded2356f79..9eabd60f819d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -58,7 +58,8 @@ struct symbol_conf { sort_by_name, show_nr_samples, use_callchain, - exclude_other; + exclude_other, + full_paths; const char *vmlinux_name, *field_sep; char *dso_list_str, From 55aa640f54280da25046acd2075842d464f451e6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:37:05 -0200 Subject: [PATCH 008/640] perf session: Remove redundant prefix & suffix from perf_event_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since now all that we have are perf event handlers, leave just the name of the event. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-9-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 8 ++--- tools/perf/builtin-diff.c | 12 ++++---- tools/perf/builtin-kmem.c | 4 +-- tools/perf/builtin-report.c | 14 ++++----- tools/perf/builtin-sched.c | 6 ++-- tools/perf/builtin-timechart.c | 8 ++--- tools/perf/builtin-trace.c | 4 +-- tools/perf/util/session.c | 54 +++++++++++++++++----------------- tools/perf/util/session.h | 18 ++++++------ 9 files changed, 64 insertions(+), 64 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 593ff25006de..117bbae844bf 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -451,10 +451,10 @@ static void perf_session__find_annotations(struct perf_session *self) } static struct perf_event_ops event_ops = { - .process_sample_event = process_sample_event, - .process_mmap_event = event__process_mmap, - .process_comm_event = event__process_comm, - .process_fork_event = event__process_task, + .sample = process_sample_event, + .mmap = event__process_mmap, + .comm = event__process_comm, + .fork = event__process_task, }; static int __cmd_annotate(void) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index e164b3d45cd4..1cbecaf029fa 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -66,12 +66,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi } static struct perf_event_ops event_ops = { - .process_sample_event = diff__process_sample_event, - .process_mmap_event = event__process_mmap, - .process_comm_event = event__process_comm, - .process_exit_event = event__process_task, - .process_fork_event = event__process_task, - .process_lost_event = event__process_lost, + .sample = diff__process_sample_event, + .mmap = event__process_mmap, + .comm = event__process_comm, + .exit = event__process_task, + .fork = event__process_task, + .lost = event__process_lost, }; static void perf_session__insert_hist_entry_by_name(struct rb_root *root, diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 73b065022e27..4c06828fe39d 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -343,8 +343,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) } static struct perf_event_ops event_ops = { - .process_sample_event = process_sample_event, - .process_comm_event = event__process_comm, + .sample = process_sample_event, + .comm = event__process_comm, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f695084910c0..508934b0140a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -184,13 +184,13 @@ static int perf_session__setup_sample_type(struct perf_session *self) } static struct perf_event_ops event_ops = { - .process_sample_event = process_sample_event, - .process_mmap_event = event__process_mmap, - .process_comm_event = event__process_comm, - .process_exit_event = event__process_task, - .process_fork_event = event__process_task, - .process_lost_event = event__process_lost, - .process_read_event = process_read_event, + .sample = process_sample_event, + .mmap = event__process_mmap, + .comm = event__process_comm, + .exit = event__process_task, + .fork = event__process_task, + .lost = event__process_lost, + .read = process_read_event, }; static int __cmd_report(void) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e862e71f4e68..702322f8fec1 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1654,9 +1654,9 @@ static int process_lost_event(event_t *event __used, } static struct perf_event_ops event_ops = { - .process_sample_event = process_sample_event, - .process_comm_event = event__process_comm, - .process_lost_event = process_lost_event, + .sample = process_sample_event, + .comm = event__process_comm, + .lost = process_lost_event, }; static int read_events(void) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 825283794985..5b68d81d93a1 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1030,10 +1030,10 @@ static void process_samples(struct perf_session *session) } static struct perf_event_ops event_ops = { - .process_comm_event = process_comm_event, - .process_fork_event = process_fork_event, - .process_exit_event = process_exit_event, - .process_sample_event = queue_sample_event, + .comm = process_comm_event, + .fork = process_fork_event, + .exit = process_exit_event, + .sample = queue_sample_event, }; static int __cmd_timechart(void) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e94f34631585..1831434aa938 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -104,8 +104,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) } static struct perf_event_ops event_ops = { - .process_sample_event = process_sample_event, - .process_comm_event = event__process_comm, + .sample = process_sample_event, + .comm = event__process_comm, }; static int __cmd_trace(struct perf_session *session) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4f2eeb584da8..7f0537d1add8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -161,24 +161,24 @@ static int process_event_stub(event_t *event __used, static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) { - if (handler->process_sample_event == NULL) - handler->process_sample_event = process_event_stub; - if (handler->process_mmap_event == NULL) - handler->process_mmap_event = process_event_stub; - if (handler->process_comm_event == NULL) - handler->process_comm_event = process_event_stub; - if (handler->process_fork_event == NULL) - handler->process_fork_event = process_event_stub; - if (handler->process_exit_event == NULL) - handler->process_exit_event = process_event_stub; - if (handler->process_lost_event == NULL) - handler->process_lost_event = process_event_stub; - if (handler->process_read_event == NULL) - handler->process_read_event = process_event_stub; - if (handler->process_throttle_event == NULL) - handler->process_throttle_event = process_event_stub; - if (handler->process_unthrottle_event == NULL) - handler->process_unthrottle_event = process_event_stub; + if (handler->sample == NULL) + handler->sample = process_event_stub; + if (handler->mmap == NULL) + handler->mmap = process_event_stub; + if (handler->comm == NULL) + handler->comm = process_event_stub; + if (handler->fork == NULL) + handler->fork = process_event_stub; + if (handler->exit == NULL) + handler->exit = process_event_stub; + if (handler->lost == NULL) + handler->lost = process_event_stub; + if (handler->read == NULL) + handler->read = process_event_stub; + if (handler->throttle == NULL) + handler->throttle = process_event_stub; + if (handler->unthrottle == NULL) + handler->unthrottle = process_event_stub; } static const char *event__name[] = { @@ -222,23 +222,23 @@ static int perf_session__process_event(struct perf_session *self, switch (event->header.type) { case PERF_RECORD_SAMPLE: - return ops->process_sample_event(event, self); + return ops->sample(event, self); case PERF_RECORD_MMAP: - return ops->process_mmap_event(event, self); + return ops->mmap(event, self); case PERF_RECORD_COMM: - return ops->process_comm_event(event, self); + return ops->comm(event, self); case PERF_RECORD_FORK: - return ops->process_fork_event(event, self); + return ops->fork(event, self); case PERF_RECORD_EXIT: - return ops->process_exit_event(event, self); + return ops->exit(event, self); case PERF_RECORD_LOST: - return ops->process_lost_event(event, self); + return ops->lost(event, self); case PERF_RECORD_READ: - return ops->process_read_event(event, self); + return ops->read(event, self); case PERF_RECORD_THROTTLE: - return ops->process_throttle_event(event, self); + return ops->throttle(event, self); case PERF_RECORD_UNTHROTTLE: - return ops->process_unthrottle_event(event, self); + return ops->unthrottle(event, self); default: self->unknown_events++; return -1; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 2ff77fea06ef..77c5ee2993c2 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -32,15 +32,15 @@ struct perf_session { typedef int (*event_op)(event_t *self, struct perf_session *session); struct perf_event_ops { - event_op process_sample_event; - event_op process_mmap_event; - event_op process_comm_event; - event_op process_fork_event; - event_op process_exit_event; - event_op process_lost_event; - event_op process_read_event; - event_op process_throttle_event; - event_op process_unthrottle_event; + event_op sample, + mmap, + comm, + fork, + exit, + lost, + read, + throttle, + unthrottle; }; struct perf_session *perf_session__new(const char *filename, int mode, bool force); From 4cf40131a5cf4918e83b3756e58a1fc9e984f8ef Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 27 Dec 2009 21:37:06 -0200 Subject: [PATCH 009/640] perf record: Introduce a symtab cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now a cache will be created in a ~/.debug debuginfo like hierarchy, so that at the end of a 'perf record' session all the binaries (with build-ids) involved get collected and indexed by their build-ids, so that perf report can find them. This is interesting when developing software where you want to do a 'perf diff' with the previous build and opens avenues for lots more interesting tools, like a 'perf diff --graph' that takes more than two binaries into account. Tunables for collecting just the symtabs can be added if one doesn't want to have the full binary, but having the full binary allows things like 'perf rerecord' or other tools that can re-run the tests by having access to the exact binary in some perf.data file, so it may well be interesting to keep the full binary there. Space consumption is minimised by trying to use hard links, a 'perf cache' tool to manage the space used, a la ccache is required to purge older entries. With this in place it will be possible also to introduce new commands, 'perf archive' and 'perf restore' (or some more suitable and future proof names) to create a cpio/tar file with the perf data and the files in the cache that _had_ perf hits of interest. There are more aspects to polish, like finding the right vmlinux file to cache, etc, but this is enough for a first step. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1261957026-15580-10-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/util/header.c | 82 +++++++++++++++++++++++++++++++++++++--- tools/perf/util/symbol.c | 17 +++++++-- tools/perf/util/symbol.h | 2 + tools/perf/util/util.c | 69 +++++++++++++++++++++++++++++++++ tools/perf/util/util.h | 3 ++ 6 files changed, 165 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/util.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index fafea0b6f323..7c846424aebf 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -425,6 +425,7 @@ LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/probe-event.o +LIB_OBJS += util/util.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 8a0bca55106f..df237c3a041b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -169,20 +169,23 @@ static int do_write(int fd, const void *buf, size_t size) return 0; } +#define dsos__for_each_with_build_id(pos, head) \ + list_for_each_entry(pos, head, node) \ + if (!pos->has_build_id) \ + continue; \ + else + static int __dsos__write_buildid_table(struct list_head *head, int fd) { #define NAME_ALIGN 64 struct dso *pos; static const char zero_buf[NAME_ALIGN]; - list_for_each_entry(pos, head, node) { + dsos__for_each_with_build_id(pos, head) { int err; struct build_id_event b; - size_t len; + size_t len = pos->long_name_len + 1; - if (!pos->has_build_id) - continue; - len = pos->long_name_len + 1; len = ALIGN(len, NAME_ALIGN); memset(&b, 0, sizeof(b)); memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); @@ -209,6 +212,74 @@ static int dsos__write_buildid_table(int fd) return err; } +static int dso__cache_build_id(struct dso *self, const char *debugdir) +{ + const size_t size = PATH_MAX; + char *filename = malloc(size), + *linkname = malloc(size), *targetname, *sbuild_id; + int len, err = -1; + + if (filename == NULL || linkname == NULL) + goto out_free; + + len = snprintf(filename, size, "%s%s", debugdir, self->long_name); + if (mkdir_p(filename, 0755)) + goto out_free; + + len += snprintf(filename + len, sizeof(filename) - len, "/"); + sbuild_id = filename + len; + build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id); + + if (access(filename, F_OK) && link(self->long_name, filename) && + copyfile(self->long_name, filename)) + goto out_free; + + len = snprintf(linkname, size, "%s/.build-id/%.2s", + debugdir, sbuild_id); + + if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) + goto out_free; + + snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); + targetname = filename + strlen(debugdir) - 5; + memcpy(targetname, "../..", 5); + + if (symlink(targetname, linkname) == 0) + err = 0; +out_free: + free(filename); + free(linkname); + return err; +} + +static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) +{ + struct dso *pos; + int err = 0; + + dsos__for_each_with_build_id(pos, head) + if (dso__cache_build_id(pos, debugdir)) + err = -1; + + return err; +} + +static int dsos__cache_build_ids(void) +{ + int err_kernel, err_user; + char debugdir[PATH_MAX]; + + snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), + DEBUG_CACHE_DIR); + + if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) + return -1; + + err_kernel = __dsos__cache_build_ids(&dsos__kernel, debugdir); + err_user = __dsos__cache_build_ids(&dsos__user, debugdir); + return err_kernel || err_user ? -1 : 0; +} + static int perf_header__adds_write(struct perf_header *self, int fd) { int nr_sections; @@ -258,6 +329,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd) goto out_free; } buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; + dsos__cache_build_ids(); } lseek(fd, sec_start, SEEK_SET); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ab92763edb03..79ca6a099f96 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -22,6 +22,7 @@ enum dso_origin { DSO__ORIG_KERNEL = 0, DSO__ORIG_JAVA_JIT, + DSO__ORIG_BUILD_ID_CACHE, DSO__ORIG_FEDORA, DSO__ORIG_UBUNTU, DSO__ORIG_BUILDID, @@ -1191,6 +1192,7 @@ char dso__symtab_origin(const struct dso *self) static const char origin[] = { [DSO__ORIG_KERNEL] = 'k', [DSO__ORIG_JAVA_JIT] = 'j', + [DSO__ORIG_BUILD_ID_CACHE] = 'B', [DSO__ORIG_FEDORA] = 'f', [DSO__ORIG_UBUNTU] = 'u', [DSO__ORIG_BUILDID] = 'b', @@ -1209,6 +1211,7 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session, int size = PATH_MAX; char *name; u8 build_id[BUILD_ID_SIZE]; + char build_id_hex[BUILD_ID_SIZE * 2 + 1]; int ret = -1; int fd; @@ -1230,8 +1233,16 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session, return ret; } - self->origin = DSO__ORIG_FEDORA - 1; + self->origin = DSO__ORIG_BUILD_ID_CACHE; + if (self->has_build_id) { + build_id__sprintf(self->build_id, sizeof(self->build_id), + build_id_hex); + snprintf(name, size, "%s/%s/.build-id/%.2s/%s", + getenv("HOME"), DEBUG_CACHE_DIR, + build_id_hex, build_id_hex + 2); + goto open_file; + } more: do { self->origin++; @@ -1247,8 +1258,6 @@ more: case DSO__ORIG_BUILDID: if (filename__read_build_id(self->long_name, build_id, sizeof(build_id))) { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; - build_id__sprintf(build_id, sizeof(build_id), build_id_hex); snprintf(name, size, @@ -1276,7 +1285,7 @@ compare_build_id: if (!dso__build_id_equal(self, build_id)) goto more; } - +open_file: fd = open(name, O_RDONLY); } while (fd < 0); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 9eabd60f819d..f27e158943e9 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -8,6 +8,8 @@ #include #include "event.h" +#define DEBUG_CACHE_DIR ".debug" + #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c new file mode 100644 index 000000000000..f3c0798a5e78 --- /dev/null +++ b/tools/perf/util/util.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include +#include +#include "util.h" + +int mkdir_p(char *path, mode_t mode) +{ + struct stat st; + int err; + char *d = path; + + if (*d != '/') + return -1; + + if (stat(path, &st) == 0) + return 0; + + while (*++d == '/'); + + while ((d = strchr(d, '/'))) { + *d = '\0'; + err = stat(path, &st) && mkdir(path, mode); + *d++ = '/'; + if (err) + return -1; + while (*d == '/') + ++d; + } + return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; +} + +int copyfile(const char *from, const char *to) +{ + int fromfd, tofd; + struct stat st; + void *addr; + int err = -1; + + if (stat(from, &st)) + goto out; + + fromfd = open(from, O_RDONLY); + if (fromfd < 0) + goto out; + + tofd = creat(to, 0755); + if (tofd < 0) + goto out_close_from; + + addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0); + if (addr == MAP_FAILED) + goto out_close_to; + + if (write(tofd, addr, st.st_size) == st.st_size) + err = 0; + + munmap(addr, st.st_size); +out_close_to: + close(tofd); + if (err) + unlink(to); +out_close_from: + close(fromfd); +out: + return err; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c673d8825883..0f5b2a6f1080 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -403,4 +403,7 @@ void git_qsort(void *base, size_t nmemb, size_t size, #endif #endif +int mkdir_p(char *path, mode_t mode); +int copyfile(const char *from, const char *to); + #endif From 49f474331e563a6ecf3b1e87ec27ec5482b3e4f1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 27 Dec 2009 11:51:52 +0100 Subject: [PATCH 010/640] perf events: Remove arg from perf sched hooks Since we only ever schedule the local cpu, there is no need to pass the cpu number to the perf sched hooks. This micro-optimizes things a bit. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 12 ++++++------ kernel/perf_event.c | 27 ++++++++++++++------------- kernel/sched.c | 6 +++--- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c66b34f75eea..a494e7501292 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -746,10 +746,10 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); -extern void perf_event_task_sched_in(struct task_struct *task, int cpu); +extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu); -extern void perf_event_task_tick(struct task_struct *task, int cpu); + struct task_struct *next); +extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -870,12 +870,12 @@ extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); #else static inline void -perf_event_task_sched_in(struct task_struct *task, int cpu) { } +perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) { } + struct task_struct *next) { } static inline void -perf_event_task_tick(struct task_struct *task, int cpu) { } +perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 03cc061398d1..099bd662daa6 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1170,9 +1170,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, * not restart the event. */ void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) + struct task_struct *next) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_event_context *next_ctx; struct perf_event_context *parent; @@ -1252,8 +1252,9 @@ static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) static void __perf_event_sched_in(struct perf_event_context *ctx, - struct perf_cpu_context *cpuctx, int cpu) + struct perf_cpu_context *cpuctx) { + int cpu = smp_processor_id(); struct perf_event *event; int can_add_hw = 1; @@ -1326,24 +1327,24 @@ __perf_event_sched_in(struct perf_event_context *ctx, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -void perf_event_task_sched_in(struct task_struct *task, int cpu) +void perf_event_task_sched_in(struct task_struct *task) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; if (likely(!ctx)) return; if (cpuctx->task_ctx == ctx) return; - __perf_event_sched_in(ctx, cpuctx, cpu); + __perf_event_sched_in(ctx, cpuctx); cpuctx->task_ctx = ctx; } -static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) +static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx) { struct perf_event_context *ctx = &cpuctx->ctx; - __perf_event_sched_in(ctx, cpuctx, cpu); + __perf_event_sched_in(ctx, cpuctx); } #define MAX_INTERRUPTS (~0ULL) @@ -1461,7 +1462,7 @@ static void rotate_ctx(struct perf_event_context *ctx) raw_spin_unlock(&ctx->lock); } -void perf_event_task_tick(struct task_struct *curr, int cpu) +void perf_event_task_tick(struct task_struct *curr) { struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; @@ -1469,7 +1470,7 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) if (!atomic_read(&nr_events)) return; - cpuctx = &per_cpu(perf_cpu_context, cpu); + cpuctx = &__get_cpu_var(perf_cpu_context); ctx = curr->perf_event_ctxp; perf_ctx_adjust_freq(&cpuctx->ctx); @@ -1484,9 +1485,9 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) if (ctx) rotate_ctx(ctx); - perf_event_cpu_sched_in(cpuctx, cpu); + perf_event_cpu_sched_in(cpuctx); if (ctx) - perf_event_task_sched_in(curr, cpu); + perf_event_task_sched_in(curr); } /* @@ -1527,7 +1528,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) raw_spin_unlock(&ctx->lock); - perf_event_task_sched_in(task, smp_processor_id()); + perf_event_task_sched_in(task); out: local_irq_restore(flags); } diff --git a/kernel/sched.c b/kernel/sched.c index 18cceeecce35..d6527ac0f6e7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2752,7 +2752,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); - perf_event_task_sched_in(current, cpu_of(rq)); + perf_event_task_sched_in(current); finish_lock_switch(rq, prev); fire_sched_in_preempt_notifiers(current); @@ -5266,7 +5266,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); raw_spin_unlock(&rq->lock); - perf_event_task_tick(curr, cpu); + perf_event_task_tick(curr); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); @@ -5480,7 +5480,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); - perf_event_task_sched_out(prev, next, cpu); + perf_event_task_sched_out(prev, next); rq->nr_switches++; rq->curr = next; From fd2a50a0240f5f5b59070474eabd83a85720a406 Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Thu, 24 Dec 2009 01:54:47 +0000 Subject: [PATCH 011/640] x86, perfctr: Remove unused func avail_to_resrv_perfctr_nmi() avail_to_resrv_perfctr_nmi() is neither EXPORT'd, nor used in the file. So remove it. Signed-off-by: Naga Chumbalkar Acked-by: Cyrill Gorcunov Cc: oprofile-list@lists.sf.net LKML-Reference: <20091224015441.6005.4408.sendpatchset@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 1 - arch/x86/kernel/cpu/perfctr-watchdog.c | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 139d4c1a33a7..93da9c3f3341 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); -extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 898df9719afb..74f4e85a5727 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) return !test_bit(counter, perfctr_nmi_owner); } - -/* checks the an msr for availability */ -int avail_to_resrv_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return !test_bit(counter, perfctr_nmi_owner); -} EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); int reserve_perfctr_nmi(unsigned int msr) From 659d8cfbb225f1fa5a4f8671a847ef3ab5a89660 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 19 Dec 2009 16:40:28 -0500 Subject: [PATCH 012/640] perf tools: Do a few more directory handling optimizations A few more optimizations for perf when dealing with directories. Some of them significantly cut down the work which has to be done. d_type should always be set; otherwise fix the kernel code. And there are functions available to parse fstab-like files, so use them. Signed-off-by: Ulrich Drepper Acked-by: Pekka Enberg Cc: a.p.zijlstra@chello.nl Cc: acme@redhat.com Cc: eranian@google.com Cc: fweisbec@gmail.com Cc: lizf@cn.fujitsu.com Cc: paulus@samba.org Cc: xiaoguangrong@cn.fujitsu.com LKML-Reference: <200912192140.nBJLeSfA028905@hs20-bc2-1.build.redhat.com> [ v2: two small stylistic fixlets ] Signed-off-by: Ingo Molnar --- tools/perf/builtin-kmem.c | 17 ++++------ tools/perf/util/trace-event-info.c | 50 ++++++++++++++---------------- 2 files changed, 29 insertions(+), 38 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 4c06828fe39d..05dc5a735039 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -92,23 +92,18 @@ static void setup_cpunode_map(void) if (!dir1) return; - while (true) { - dent1 = readdir(dir1); - if (!dent1) - break; - - if (sscanf(dent1->d_name, "node%u", &mem) < 1) + while ((dent1 = readdir(dir1)) != NULL) { + if (dent1->d_type != DT_DIR || + sscanf(dent1->d_name, "node%u", &mem) < 1) continue; snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); dir2 = opendir(buf); if (!dir2) continue; - while (true) { - dent2 = readdir(dir2); - if (!dent2) - break; - if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) + while ((dent2 = readdir(dir2)) != NULL) { + if (dent2->d_type != DT_LNK || + sscanf(dent2->d_name, "cpu%u", &cpu) < 1) continue; cpunode_map[cpu] = mem; } diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index cace35595530..dfef238ce158 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -20,6 +20,7 @@ */ #define _GNU_SOURCE #include +#include #include #include #include @@ -103,29 +104,29 @@ static const char *find_debugfs(void) { static char debugfs[MAX_PATH+1]; static int debugfs_found; - char type[100]; FILE *fp; + struct mntent *m; if (debugfs_found) return debugfs; - if ((fp = fopen("/proc/mounts","r")) == NULL) + fp = setmntent("/proc/mounts", "r"); + if (!fp) die("Can't open /proc/mounts for read"); - while (fscanf(fp, "%*s %" - STR(MAX_PATH) - "s %99s %*s %*d %*d\n", - debugfs, type) == 2) { - if (strcmp(type, "debugfs") == 0) + while ((m = getmntent(fp)) != NULL) { + if (strcmp(m->mnt_type, "debugfs") == 0) { + strcpy(debugfs, m->mnt_dir); + debugfs_found = 1; break; + } } - fclose(fp); - if (strcmp(type, "debugfs") != 0) + endmntent(fp); + + if (!debugfs_found) die("debugfs not mounted, please mount"); - debugfs_found = 1; - return debugfs; } @@ -317,7 +318,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps) die("can't read directory '%s'", sys); while ((dent = readdir(dir))) { - if (strcmp(dent->d_name, ".") == 0 || + if (dent->d_type != DT_DIR || + strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || !name_in_tp_list(dent->d_name, tps)) continue; @@ -334,7 +336,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps) rewinddir(dir); while ((dent = readdir(dir))) { - if (strcmp(dent->d_name, ".") == 0 || + if (dent->d_type != DT_DIR || + strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || !name_in_tp_list(dent->d_name, tps)) continue; @@ -394,26 +397,21 @@ static void read_event_files(struct tracepoint_path *tps) die("can't read directory '%s'", path); while ((dent = readdir(dir))) { - if (strcmp(dent->d_name, ".") == 0 || + if (dent->d_type != DT_DIR || + strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || strcmp(dent->d_name, "ftrace") == 0 || !system_in_tp_list(dent->d_name, tps)) continue; - sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); - sprintf(sys, "%s/%s", path, dent->d_name); - ret = stat(sys, &st); - free(sys); - if (ret < 0) - continue; - if (S_ISDIR(st.st_mode)) - count++; + count++; } write_or_die(&count, 4); rewinddir(dir); while ((dent = readdir(dir))) { - if (strcmp(dent->d_name, ".") == 0 || + if (dent->d_type != DT_DIR || + strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || strcmp(dent->d_name, "ftrace") == 0 || !system_in_tp_list(dent->d_name, tps)) @@ -422,10 +420,8 @@ static void read_event_files(struct tracepoint_path *tps) sprintf(sys, "%s/%s", path, dent->d_name); ret = stat(sys, &st); if (ret >= 0) { - if (S_ISDIR(st.st_mode)) { - write_or_die(dent->d_name, strlen(dent->d_name) + 1); - copy_event_system(sys, tps); - } + write_or_die(dent->d_name, strlen(dent->d_name) + 1); + copy_event_system(sys, tps); } free(sys); } From 07b139c8c81b97bbe55c68daf0cbeca8b1c609ca Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 21 Dec 2009 14:27:35 +0800 Subject: [PATCH 013/640] perf events: Remove CONFIG_EVENT_PROFILE Quoted from Ingo: | This reminds me - i think we should eliminate CONFIG_EVENT_PROFILE - | it's an unnecessary Kconfig complication. If both PERF_EVENTS and | EVENT_TRACING is enabled we should expose generic tracepoints. | | Nor is it limited to event 'profiling', so it has become a misnomer as | well. Signed-off-by: Li Zefan Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B2F1557.2050705@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- include/linux/perf_event.h | 2 +- include/linux/syscalls.h | 4 ++-- include/trace/ftrace.h | 12 ++++++------ include/trace/syscall.h | 4 ++-- init/Kconfig | 13 ------------- kernel/perf_event.c | 4 ++-- kernel/trace/Makefile | 4 +++- kernel/trace/trace_events_filter.c | 4 ++-- kernel/trace/trace_kprobe.c | 14 +++++++------- kernel/trace/trace_syscalls.c | 5 ++--- 11 files changed, 28 insertions(+), 40 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2233c98d80df..0a09e758c7d3 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -188,7 +188,7 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS struct perf_event; extern int ftrace_profile_enable(int event_id); extern void ftrace_profile_disable(int event_id); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a494e7501292..9a1d276db754 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -658,7 +658,7 @@ struct perf_event { perf_overflow_handler_t overflow_handler; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_EVENT_TRACING struct event_filter *filter; #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e90d6f6..b7c7fcf7790b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,7 +99,7 @@ struct perf_event_attr; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_enable = prof_sysenter_enable, \ @@ -113,7 +113,7 @@ struct perf_event_attr; #define TRACE_SYS_ENTER_PROFILE_INIT(sname) #define TRACE_SYS_EXIT_PROFILE(sname) #define TRACE_SYS_EXIT_PROFILE_INIT(sname) -#endif +#endif /* CONFIG_PERF_EVENTS */ #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL1(t, a) #a diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 73523151a731..2fdd36df41f6 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -498,7 +498,7 @@ static inline int ftrace_get_offsets_##call( \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS /* * Generate the functions needed for tracepoint perf_event support. @@ -541,7 +541,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#endif +#endif /* CONFIG_PERF_EVENTS */ /* * Stage 4 of the trace events. @@ -626,7 +626,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * */ -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #define _TRACE_PROFILE_INIT(call) \ .profile_enable = ftrace_profile_enable_##call, \ @@ -634,7 +634,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #else #define _TRACE_PROFILE_INIT(call) -#endif +#endif /* CONFIG_PERF_EVENTS */ #undef __entry #define __entry entry @@ -834,7 +834,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * } */ -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #undef __perf_addr #define __perf_addr(a) __addr = (a) @@ -926,7 +926,7 @@ static void ftrace_profile_##call(proto) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ #undef _TRACE_PROFILE_INIT diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 961fda3556bb..3d463dcef298 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -49,12 +49,12 @@ ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -#ifdef CONFIG_EVENT_PROFILE + +#ifdef CONFIG_PERF_EVENTS int prof_sysenter_enable(struct ftrace_event_call *call); void prof_sysenter_disable(struct ftrace_event_call *call); int prof_sysexit_enable(struct ftrace_event_call *call); void prof_sysexit_disable(struct ftrace_event_call *call); - #endif #endif /* _TRACE_SYSCALL_H */ diff --git a/init/Kconfig b/init/Kconfig index a23da9f01803..06dab27c18d9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -966,19 +966,6 @@ config PERF_EVENTS Say Y if unsure. -config EVENT_PROFILE - bool "Tracepoint profiling sources" - depends on PERF_EVENTS && EVENT_TRACING - default y - help - Allow the use of tracepoints as software performance events. - - When this is enabled, you can create perf events based on - tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID - found in debugfs://tracing/events/*/*/id. (The -e/--events - option to the perf tool can parse and interpret symbolic - tracepoints, in the subsystem:tracepoint_name format.) - config PERF_COUNTERS bool "Kernel performance counters (old config option)" depends on HAVE_PERF_EVENTS diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 099bd662daa6..5b987b4a98a8 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4177,7 +4177,7 @@ static const struct pmu perf_ops_task_clock = { .read = task_clock_perf_event_read, }; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_EVENT_TRACING void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size) @@ -4282,7 +4282,7 @@ static void perf_event_free_filter(struct perf_event *event) { } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT static void bp_perf_event_destroy(struct perf_event *event) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cd9ecd89ec77..d00c6fe23f54 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -51,7 +51,9 @@ endif obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o -obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o +ifeq ($(CONFIG_PERF_EVENTS),y) +obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o +endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 50504cb228de..74563d7e102e 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1360,7 +1360,7 @@ out_unlock: return err; } -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS void ftrace_profile_free_filter(struct perf_event *event) { @@ -1428,5 +1428,5 @@ out_unlock: return err; } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 375f81a568dc..75d75dec226a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1249,7 +1249,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, ", REC->" FIELD_STRING_RETIP); } -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ static __kprobes int kprobe_profile_func(struct kprobe *kp, @@ -1407,7 +1407,7 @@ static void probe_profile_disable(struct ftrace_event_call *call) disable_kprobe(&tp->rp.kp); } } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ static __kprobes @@ -1417,10 +1417,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) if (tp->flags & TP_FLAG_TRACE) kprobe_trace_func(kp, regs); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) kprobe_profile_func(kp, regs); -#endif /* CONFIG_EVENT_PROFILE */ +#endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1431,10 +1431,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) if (tp->flags & TP_FLAG_TRACE) kretprobe_trace_func(ri, regs); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) kretprobe_profile_func(ri, regs); -#endif /* CONFIG_EVENT_PROFILE */ +#endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1463,7 +1463,7 @@ static int register_probe_event(struct trace_probe *tp) call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS call->profile_enable = probe_profile_enable; call->profile_disable = probe_profile_disable; #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 75289f372dd2..f694f66d75b0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void) } core_initcall(init_ftrace_syscalls); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); @@ -626,6 +626,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -#endif - +#endif /* CONFIG_PERF_EVENTS */ From 29c52aa2300173dd45df04dae1f5acc81a2c93b1 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 28 Dec 2009 16:47:12 +0800 Subject: [PATCH 014/640] perf tools: Mount debugfs automatically Mount debugfs filesystem under '/sys/kernel/debug', if it's not mounted. Signed-off-by: Xiao Guangrong Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Clark Williams Cc: John Kacur LKML-Reference: <4B387090.7080407@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- tools/perf/perf.c | 2 +- tools/perf/util/debugfs.c | 16 +++++++--------- tools/perf/util/debugfs.h | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 873e55fab375..fc89005c3e51 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -388,7 +388,7 @@ static int run_argv(int *argcp, const char ***argv) /* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ static void get_debugfs_mntpt(void) { - const char *path = debugfs_find_mountpoint(); + const char *path = debugfs_mount(NULL); if (path) strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt)); diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c index 06b73ee02c49..1f805fde5fd4 100644 --- a/tools/perf/util/debugfs.c +++ b/tools/perf/util/debugfs.c @@ -106,16 +106,14 @@ int debugfs_valid_entry(const char *path) return 0; } -/* mount the debugfs somewhere */ +/* mount the debugfs somewhere if it's not mounted */ -int debugfs_mount(const char *mountpoint) +char *debugfs_mount(const char *mountpoint) { - char mountcmd[128]; - /* see if it's already mounted */ if (debugfs_find_mountpoint()) { debugfs_premounted = 1; - return 0; + return debugfs_mountpoint; } /* if not mounted and no argument */ @@ -127,13 +125,13 @@ int debugfs_mount(const char *mountpoint) mountpoint = "/sys/kernel/debug"; } + if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0) + return NULL; + /* save the mountpoint */ strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); - /* mount it */ - snprintf(mountcmd, sizeof(mountcmd), - "/bin/mount -t debugfs debugfs %s", mountpoint); - return system(mountcmd); + return debugfs_mountpoint; } /* umount the debugfs */ diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h index 3cd14f9ae784..83a02879745f 100644 --- a/tools/perf/util/debugfs.h +++ b/tools/perf/util/debugfs.h @@ -15,7 +15,7 @@ extern const char *debugfs_find_mountpoint(void); extern int debugfs_valid_mountpoint(const char *debugfs); extern int debugfs_valid_entry(const char *path); -extern int debugfs_mount(const char *mountpoint); +extern char *debugfs_mount(const char *mountpoint); extern int debugfs_umount(void); extern int debugfs_write(const char *entry, const char *value); extern int debugfs_read(const char *entry, char *buffer, size_t size); From 61be3e59ba7a6dbd39f92fd1f107285a0caeb008 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 28 Dec 2009 16:48:30 +0800 Subject: [PATCH 015/640] perf trace: Clean up find_debugfs() Remove redundant code for 'perf trace' Signed-off-by: Xiao Guangrong Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Clark Williams Cc: John Kacur LKML-Reference: <4B3870DE.7090500@cn.fujitsu.com> [ v2: resolved conflicts with recent changes ] Signed-off-by: Ingo Molnar --- tools/perf/util/debugfs.c | 1 + tools/perf/util/trace-event-info.c | 29 +++++------------------------ 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c index 1f805fde5fd4..a88fefc0cc0a 100644 --- a/tools/perf/util/debugfs.c +++ b/tools/perf/util/debugfs.c @@ -130,6 +130,7 @@ char *debugfs_mount(const char *mountpoint) /* save the mountpoint */ strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); + debugfs_found = 1; return debugfs_mountpoint; } diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index dfef238ce158..535176dc95b6 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -38,6 +38,7 @@ #include "../perf.h" #include "trace-event.h" +#include "debugfs.h" #define VERSION "0.5" @@ -102,32 +103,12 @@ void *malloc_or_die(unsigned int size) static const char *find_debugfs(void) { - static char debugfs[MAX_PATH+1]; - static int debugfs_found; - FILE *fp; - struct mntent *m; + const char *path = debugfs_mount(NULL); - if (debugfs_found) - return debugfs; + if (!path) + die("Your kernel not support debugfs filesystem"); - fp = setmntent("/proc/mounts", "r"); - if (!fp) - die("Can't open /proc/mounts for read"); - - while ((m = getmntent(fp)) != NULL) { - if (strcmp(m->mnt_type, "debugfs") == 0) { - strcpy(debugfs, m->mnt_dir); - debugfs_found = 1; - break; - } - } - - endmntent(fp); - - if (!debugfs_found) - die("debugfs not mounted, please mount"); - - return debugfs; + return path; } /* From 9967411e5b324a908e344d6ce66b77bd5d372c3e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 28 Dec 2009 16:49:38 +0800 Subject: [PATCH 016/640] perf trace: Fix forgotten close of file/dir Signed-off-by: Xiao Guangrong Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Clark Williams Cc: John Kacur LKML-Reference: <4B387122.7090801@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-info.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 535176dc95b6..407fd65b6cdb 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -253,6 +253,8 @@ static void read_header_files(void) write_or_die("header_page", 12); write_or_die(&size, 8); check_size = copy_file_fd(fd); + close(fd); + if (size != check_size) die("wrong size for '%s' size=%lld read=%lld", path, size, check_size); @@ -271,6 +273,7 @@ static void read_header_files(void) if (size != check_size) die("wrong size for '%s'", path); put_tracing_file(path); + close(fd); } static bool name_in_tp_list(char *sys, struct tracepoint_path *tps) @@ -337,6 +340,7 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps) free(format); } + closedir(dir); } static void read_ftrace_files(struct tracepoint_path *tps) @@ -407,6 +411,7 @@ static void read_event_files(struct tracepoint_path *tps) free(sys); } + closedir(dir); put_tracing_file(path); } From 754a00aeb26bcc8bf82897538a078bc84a6d95c7 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Fri, 25 Dec 2009 20:08:45 +0100 Subject: [PATCH 017/640] arch/avr32: Fix build failure for avr32 caused by typo This patch fixes a build failure introduced by the patch atmel-mci: change use of dma slave interface by Nicolas Ferre by changing mci_dma_slave to the correct name of mci_dma_data This should make the avr32 tree build again. References: http://kisskb.ellerman.id.au/kisskb/buildresult/1893610/ http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git;a=commitdiff;h=2635d1ba711560d521f6218c585a3e0401f566e1 Patch against Linus' tree. Signed-off-by: Peter Huewe Signed-off-by: Haavard Skinnemoen --- arch/avr32/mach-at32ap/at32ap700x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 1aa1ea5e9212..7d4ed4291985 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1325,7 +1325,7 @@ struct platform_device *__init at32_add_device_mci(unsigned int id, struct mci_platform_data *data) { struct platform_device *pdev; - struct mci_dma_slave *slave; + struct mci_dma_data *slave; u32 pioa_mask; u32 piob_mask; @@ -1344,7 +1344,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) ARRAY_SIZE(atmel_mci0_resource))) goto fail; - slave = kzalloc(sizeof(struct mci_dma_slave), GFP_KERNEL); + slave = kzalloc(sizeof(struct mci_dma_data), GFP_KERNEL); slave->sdata.dma_dev = &dw_dmac0_device.dev; slave->sdata.reg_width = DW_DMA_SLAVE_WIDTH_32BIT; From cbf8de1620cdb1abb5b0618ff561004f816064fc Mon Sep 17 00:00:00 2001 From: Hans-Christian Egtvedt Date: Mon, 28 Dec 2009 12:22:06 +0100 Subject: [PATCH 018/640] avr32: clean up memory allocation in at32_add_device_mci This patch will check if the kzalloc for the MCI DMA struct actually returns a valid address, and also clean up properly if it fails or the function fails at a later stage. This also silences a compiler warning about using the slave variable uninitialized. Signed-off-by: Hans-Christian Egtvedt Signed-off-by: Haavard Skinnemoen --- arch/avr32/mach-at32ap/at32ap700x.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 7d4ed4291985..b13d1879e51b 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1345,6 +1345,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) goto fail; slave = kzalloc(sizeof(struct mci_dma_data), GFP_KERNEL); + if (!slave) + goto fail; slave->sdata.dma_dev = &dw_dmac0_device.dev; slave->sdata.reg_width = DW_DMA_SLAVE_WIDTH_32BIT; @@ -1357,7 +1359,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) if (platform_device_add_data(pdev, data, sizeof(struct mci_platform_data))) - goto fail; + goto fail_free; /* CLK line is common to both slots */ pioa_mask = 1 << 10; @@ -1381,7 +1383,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) /* Slot is unused */ break; default: - goto fail; + goto fail_free; } select_peripheral(PIOA, pioa_mask, PERIPH_A, 0); @@ -1408,7 +1410,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) break; default: if (!data->slot[0].bus_width) - goto fail; + goto fail_free; data->slot[1].bus_width = 0; break; @@ -1419,9 +1421,10 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) platform_device_add(pdev); return pdev; +fail_free: + kfree(slave); fail: data->dma_slave = NULL; - kfree(slave); platform_device_put(pdev); return NULL; } From 41bdcb23dab22bf27361c5f2d89fe895d8904915 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Tue, 29 Dec 2009 16:37:07 +0800 Subject: [PATCH 019/640] perf tools: Unify event type description make event type description to a unified array and the array index consistent to perf_type_id. Signed-off-by: Liming Wang Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1262075829-16257-1-git-send-email-liming.wang@windriver.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e5bc0fb016b2..dc585a835cab 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -835,11 +835,12 @@ int parse_filter(const struct option *opt __used, const char *str, } static const char * const event_type_descriptors[] = { - "", "Hardware event", "Software event", "Tracepoint event", "Hardware cache event", + "Raw hardware event descriptor", + "Hardware breakpoint", }; /* @@ -872,7 +873,7 @@ static void print_tracepoint_events(void) snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); printf(" %-42s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT+1]); + event_type_descriptors[PERF_TYPE_TRACEPOINT]); } closedir(evt_dir); } @@ -892,9 +893,7 @@ void print_events(void) printf("List of pre-defined events (to be used in -e):\n"); for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { - type = syms->type + 1; - if (type >= ARRAY_SIZE(event_type_descriptors)) - type = 0; + type = syms->type; if (type != prev_type) printf("\n"); @@ -919,17 +918,19 @@ void print_events(void) for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { printf(" %-42s [%s]\n", event_cache_name(type, op, i), - event_type_descriptors[4]); + event_type_descriptors[PERF_TYPE_HW_CACHE]); } } } printf("\n"); - printf(" %-42s [raw hardware event descriptor]\n", - "rNNN"); + printf(" %-42s [%s]\n", + "rNNN", event_type_descriptors[PERF_TYPE_RAW]); printf("\n"); - printf(" %-42s [hardware breakpoint]\n", "mem:[:access]"); + printf(" %-42s [%s]\n", + "mem:[:access]", + event_type_descriptors[PERF_TYPE_BREAKPOINT]); printf("\n"); print_tracepoint_events(); From 63bbd5e2d539c9290b229c832f62d42aac23db94 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Tue, 29 Dec 2009 16:37:09 +0800 Subject: [PATCH 020/640] perf probe: Change CONFIG_KPROBE_TRACER to CONFIG_KPROBE_EVENT make the config name consistent Signed-off-by: Liming Wang Acked-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1262075829-16257-3-git-send-email-liming.wang@windriver.com> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 29465d440043..8e532d9824f0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -368,7 +368,7 @@ static int open_kprobe_events(int flags, int mode) if (ret < 0) { if (errno == ENOENT) die("kprobe_events file does not exist -" - " please rebuild with CONFIG_KPROBE_TRACER."); + " please rebuild with CONFIG_KPROBE_EVENT."); else die("Could not open kprobe_events file: %s", strerror(errno)); From 769885f372300a7fcfb9e54e4e2990718d40b529 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Dec 2009 22:48:32 -0200 Subject: [PATCH 021/640] perf header: Do_read shouldn't die MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Propagate the errors instead, its callers already propagate other errors. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262047716-23171-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index df237c3a041b..6b3cb94e8a2b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -432,19 +432,19 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit) return 0; } -static void do_read(int fd, void *buf, size_t size) +static int do_read(int fd, void *buf, size_t size) { while (size) { int ret = read(fd, buf, size); - if (ret < 0) - die("failed to read"); - if (ret == 0) - die("failed to read: missing data"); + if (ret <= 0) + return -1; size -= ret; buf += ret; } + + return 0; } int perf_header__process_sections(struct perf_header *self, int fd, @@ -455,7 +455,7 @@ int perf_header__process_sections(struct perf_header *self, int fd, int nr_sections; int sec_size; int idx = 0; - int err = 0, feat = 1; + int err = -1, feat = 1; nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); if (!nr_sections) @@ -469,8 +469,10 @@ int perf_header__process_sections(struct perf_header *self, int fd, lseek(fd, self->data_offset + self->data_size, SEEK_SET); - do_read(fd, feat_sec, sec_size); + if (do_read(fd, feat_sec, sec_size)) + goto out_free; + err = 0; while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { if (perf_header__has_feat(self, feat)) { struct perf_file_section *sec = &feat_sec[idx++]; @@ -481,18 +483,18 @@ int perf_header__process_sections(struct perf_header *self, int fd, } ++feat; } - +out_free: free(feat_sec); return err; -}; +} int perf_file_header__read(struct perf_file_header *self, struct perf_header *ph, int fd) { lseek(fd, 0, SEEK_SET); - do_read(fd, self, sizeof(*self)); - if (self->magic != PERF_MAGIC || + if (do_read(fd, self, sizeof(*self)) || + self->magic != PERF_MAGIC || self->attr_size != sizeof(struct perf_file_attr)) return -1; @@ -558,7 +560,8 @@ int perf_header__read(struct perf_header *self, int fd) struct perf_header_attr *attr; off_t tmp; - do_read(fd, &f_attr, sizeof(f_attr)); + if (do_read(fd, &f_attr, sizeof(f_attr))) + goto out_errno; tmp = lseek(fd, 0, SEEK_CUR); attr = perf_header_attr__new(&f_attr.attr); @@ -569,7 +572,8 @@ int perf_header__read(struct perf_header *self, int fd) lseek(fd, f_attr.ids.offset, SEEK_SET); for (j = 0; j < nr_ids; j++) { - do_read(fd, &f_id, sizeof(f_id)); + if (do_read(fd, &f_id, sizeof(f_id))) + goto out_errno; if (perf_header_attr__add_id(attr, f_id) < 0) { perf_header_attr__delete(attr); @@ -589,7 +593,8 @@ int perf_header__read(struct perf_header *self, int fd) events = malloc(f_header.event_types.size); if (events == NULL) return -ENOMEM; - do_read(fd, events, f_header.event_types.size); + if (do_read(fd, events, f_header.event_types.size)) + goto out_errno; event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } @@ -599,6 +604,8 @@ int perf_header__read(struct perf_header *self, int fd) self->frozen = 1; return 0; +out_errno: + return -errno; } u64 perf_header__sample_type(struct perf_header *header) From ae99fb2c335ef018520950ddc9692faacab39cf2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Dec 2009 22:48:33 -0200 Subject: [PATCH 022/640] perf header: perf_header__push_event() shouldn't die MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just propagate eventual errors. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262047716-23171-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 16 ++++++++++------ tools/perf/util/header.h | 2 +- tools/perf/util/parse-events.c | 18 +++++++++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 6b3cb94e8a2b..709e3252f049 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -105,24 +105,28 @@ struct perf_trace_event_type { static int event_count; static struct perf_trace_event_type *events; -void perf_header__push_event(u64 id, const char *name) +int perf_header__push_event(u64 id, const char *name) { if (strlen(name) > MAX_EVENT_NAME) pr_warning("Event %s will be truncated\n", name); if (!events) { events = malloc(sizeof(struct perf_trace_event_type)); - if (!events) - die("nomem"); + if (events == NULL) + return -ENOMEM; } else { - events = realloc(events, (event_count + 1) * sizeof(struct perf_trace_event_type)); - if (!events) - die("nomem"); + struct perf_trace_event_type *nevents; + + nevents = realloc(events, (event_count + 1) * sizeof(*events)); + if (nevents == NULL) + return -ENOMEM; + events = nevents; } memset(&events[event_count], 0, sizeof(struct perf_trace_event_type)); events[event_count].event_id = id; strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1); event_count++; + return 0; } char *perf_header__find_event(u64 id) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d118d05d3abe..2b69aab67e35 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -64,7 +64,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit); int perf_header__add_attr(struct perf_header *self, struct perf_header_attr *attr); -void perf_header__push_event(u64 id, const char *name); +int perf_header__push_event(u64 id, const char *name); char *perf_header__find_event(u64 id); struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index dc585a835cab..609d5a9470c5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -753,11 +753,11 @@ modifier: return ret; } -static void store_event_type(const char *orgname) +static int store_event_type(const char *orgname) { char filename[PATH_MAX], *c; FILE *file; - int id; + int id, n; sprintf(filename, "%s/", debugfs_path); strncat(filename, orgname, strlen(orgname)); @@ -769,11 +769,14 @@ static void store_event_type(const char *orgname) file = fopen(filename, "r"); if (!file) - return; - if (fscanf(file, "%i", &id) < 1) - die("cannot store event ID"); + return 0; + n = fscanf(file, "%i", &id); fclose(file); - perf_header__push_event(id, orgname); + if (n < 1) { + pr_err("cannot store event ID\n"); + return -EINVAL; + } + return perf_header__push_event(id, orgname); } int parse_events(const struct option *opt __used, const char *str, int unset __used) @@ -782,7 +785,8 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u enum event_result ret; if (strchr(str, ':')) - store_event_type(str); + if (store_event_type(str) < 0) + return -1; for (;;) { if (nr_counters == MAX_COUNTERS) From 71289be7630fb97f2de6bb2e18a50289dc869f9d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Dec 2009 22:48:34 -0200 Subject: [PATCH 023/640] perf report: Add --hide-unresolved/-U command line option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Useful to match the 'overhead' column in 'perf report' with the 'baseline' one in 'perf diff'. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262047716-23171-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 508934b0140a..4292d7afcd60 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -34,6 +34,7 @@ static char const *input_name = "perf.data"; static int force; +static bool hide_unresolved; static int show_threads; static struct perf_read_values show_threads_values; @@ -121,7 +122,7 @@ static int process_sample_event(event_t *event, struct perf_session *session) return -1; } - if (al.filtered) + if (al.filtered || (hide_unresolved && al.sym == NULL)) return 0; if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) { @@ -342,6 +343,8 @@ static const struct option options[] = { OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), + OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, + "Only display entries resolved to a symbol"), OPT_END() }; From cdbae31408cf39372402076cf2e189ec693daa71 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Dec 2009 22:48:35 -0200 Subject: [PATCH 024/640] perf diff: Don't add the period for unresolved symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we don't add histograms buckets for them, this way the sum of baselines should be 100%. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262047716-23171-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1cbecaf029fa..876a4b981be8 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -51,12 +51,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi return -1; } - if (al.filtered) + if (al.filtered || al.sym == NULL) return 0; event__parse_sample(event, session->sample_type, &data); - if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) { + if (perf_session__add_hist_entry(session, &al, data.period)) { pr_warning("problem incrementing symbol count, skipping event\n"); return -1; } From 9c443dfdd31eddea6cbe6ee0ca469fbcc4e1dc3b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Dec 2009 22:48:36 -0200 Subject: [PATCH 025/640] perf diff: Fix support for all --sort combinations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we finish creating the hist_entries we _already_ have them sorted "by name", in fact by what is in --sort, that is exactly how we can find the pairs in perf_session__match_hists as 'comm', 'dso' & 'symbol' all are strings we need to find the matches in the baseline session. So only do the sort by hits followed by a resort by --sort if we need to find the position for shwowing the --displacement of hist entries. Now all these modes work correctly: Example is a simple 'perf record -f find / > /dev/null' ran twice then followed by the following commands: $ perf diff -f --sort comm # Baseline Delta Command # ........ .......... ....... # 0.00% +100.00% find $ perf diff -f --sort dso # Baseline Delta Shared Object # ........ .......... .................. # 59.97% -0.44% [kernel] 21.17% +0.28% libc-2.5.so 18.49% +0.16% [ext3] 0.37% find $ perf diff -f --sort symbol | head -8 # Baseline Delta Symbol # ........ .......... ...... # 6.21% +0.36% [k] ext3fs_dirhash 3.43% +0.41% [.] __GI_strlen 3.53% +0.16% [k] __kmalloc 3.17% +0.49% [k] system_call 3.06% +0.37% [k] ext3_htree_store_dirent $ perf diff -f --sort dso,symbol | head -8 # Baseline Delta Shared Object Symbol # ........ .......... .................. ...... # 6.21% +0.36% [ext3] [k] ext3fs_dirhash 3.43% +0.41% libc-2.5.so [.] __GI_strlen 3.53% +0.16% [kernel] [k] __kmalloc 3.17% +0.49% [kernel] [k] system_call 3.06% +0.37% [ext3] [k] ext3_htree_store_dirent $ And we don't have to do two expensive resorts in the common, non --displacement case. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262047716-23171-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-diff.c | 52 ++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 876a4b981be8..924bfb77a6ab 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -82,29 +82,19 @@ static void perf_session__insert_hist_entry_by_name(struct rb_root *root, struct hist_entry *iter; while (*p != NULL) { - int cmp; parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node); - - cmp = strcmp(he->map->dso->name, iter->map->dso->name); - if (cmp > 0) + if (hist_entry__cmp(he, iter) < 0) p = &(*p)->rb_left; - else if (cmp < 0) + else p = &(*p)->rb_right; - else { - cmp = strcmp(he->sym->name, iter->sym->name); - if (cmp > 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } } rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, root); } -static void perf_session__resort_by_name(struct perf_session *self) +static void perf_session__resort_hist_entries(struct perf_session *self) { unsigned long position = 1; struct rb_root tmp = RB_ROOT; @@ -122,29 +112,28 @@ static void perf_session__resort_by_name(struct perf_session *self) self->hists = tmp; } +static void perf_session__set_hist_entries_positions(struct perf_session *self) +{ + perf_session__output_resort(self, self->events_stats.total); + perf_session__resort_hist_entries(self); +} + static struct hist_entry * -perf_session__find_hist_entry_by_name(struct perf_session *self, - struct hist_entry *he) +perf_session__find_hist_entry(struct perf_session *self, + struct hist_entry *he) { struct rb_node *n = self->hists.rb_node; while (n) { struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); - int cmp = strcmp(he->map->dso->name, iter->map->dso->name); + int64_t cmp = hist_entry__cmp(he, iter); - if (cmp > 0) + if (cmp < 0) n = n->rb_left; - else if (cmp < 0) + else if (cmp > 0) n = n->rb_right; - else { - cmp = strcmp(he->sym->name, iter->sym->name); - if (cmp > 0) - n = n->rb_left; - else if (cmp < 0) - n = n->rb_right; - else - return iter; - } + else + return iter; } return NULL; @@ -155,11 +144,9 @@ static void perf_session__match_hists(struct perf_session *old_session, { struct rb_node *nd; - perf_session__resort_by_name(old_session); - for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) { struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node); - pos->pair = perf_session__find_hist_entry_by_name(old_session, pos); + pos->pair = perf_session__find_hist_entry(old_session, pos); } } @@ -177,9 +164,12 @@ static int __cmd_diff(void) ret = perf_session__process_events(session[i], &event_ops); if (ret) goto out_delete; - perf_session__output_resort(session[i], session[i]->events_stats.total); } + perf_session__output_resort(session[1], session[1]->events_stats.total); + if (show_displacement) + perf_session__set_hist_entries_positions(session[0]); + perf_session__match_hists(session[0], session[1]); perf_session__fprintf_hists(session[1], session[0], show_displacement, stdout); From 6a5fa2362b628ee950080bef8895a6fb62f58ab4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 1 Jan 2010 01:28:43 +0000 Subject: [PATCH 026/640] [CIFS] Add support for TCP_NODELAY mount option sockopt=TCP_NODELAY helpful for faster networks boosting performance. Kernel bugzilla bug number 14032. Signed-off-by: Steve French --- fs/cifs/CHANGES | 4 ++++ fs/cifs/cifsfs.h | 2 +- fs/cifs/cifsglob.h | 1 + fs/cifs/connect.c | 30 ++++++++++++++++++++++++++---- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 7b2600b380d7..49503d2edc7e 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,7 @@ +Version 1.62 +------------ +Add sockopt=TCP_NODELAY mount option. + Version 1.61 ------------ Fix append problem to Samba servers (files opened with O_APPEND could diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index ac2b24c192f8..78c1b86d55f6 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.61" +#define CIFS_VERSION "1.62" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4b35f7ec0583..ed751bb657db 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -149,6 +149,7 @@ struct TCP_Server_Info { bool svlocal:1; /* local server or remote */ bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ + bool tcp_nodelay; atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 atomic_t inSend; /* requests trying to send */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3bbcaa716b3c..2e9e09ca0e30 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -98,7 +98,7 @@ struct smb_vol { bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ unsigned int rsize; unsigned int wsize; - unsigned int sockopt; + bool sockopt_tcp_nodelay:1; unsigned short int port; char *prepath; }; @@ -1142,9 +1142,11 @@ cifs_parse_mount_options(char *options, const char *devname, simple_strtoul(value, &value, 0); } } else if (strnicmp(data, "sockopt", 5) == 0) { - if (value && *value) { - vol->sockopt = - simple_strtoul(value, &value, 0); + if (!value || !*value) { + cERROR(1, ("no socket option specified")); + continue; + } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) { + vol->sockopt_tcp_nodelay = 1; } } else if (strnicmp(data, "netbiosname", 4) == 0) { if (!value || !*value || (*value == ' ')) { @@ -1514,6 +1516,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->noblocksnd = volume_info->noblocksnd; tcp_ses->noautotune = volume_info->noautotune; + tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; atomic_set(&tcp_ses->inFlight, 0); init_waitqueue_head(&tcp_ses->response_q); init_waitqueue_head(&tcp_ses->request_q); @@ -1764,6 +1767,7 @@ static int ipv4_connect(struct TCP_Server_Info *server) { int rc = 0; + int val; bool connected = false; __be16 orig_port = 0; struct socket *socket = server->ssocket; @@ -1845,6 +1849,14 @@ ipv4_connect(struct TCP_Server_Info *server) socket->sk->sk_rcvbuf = 140 * 1024; } + if (server->tcp_nodelay) { + val = 1; + rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, + (char *)&val, sizeof(val)); + if (rc) + cFYI(1, ("set TCP_NODELAY socket option error %d", rc)); + } + cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo)); @@ -1916,6 +1928,7 @@ static int ipv6_connect(struct TCP_Server_Info *server) { int rc = 0; + int val; bool connected = false; __be16 orig_port = 0; struct socket *socket = server->ssocket; @@ -1987,6 +2000,15 @@ ipv6_connect(struct TCP_Server_Info *server) */ socket->sk->sk_rcvtimeo = 7 * HZ; socket->sk->sk_sndtimeo = 5 * HZ; + + if (server->tcp_nodelay) { + val = 1; + rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, + (char *)&val, sizeof(val)); + if (rc) + cFYI(1, ("set TCP_NODELAY socket option error %d", rc)); + } + server->ssocket = socket; return rc; From 9f0727f5f41d21eb3da67e50965d1f30e054795f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 Jan 2010 18:24:50 +0000 Subject: [PATCH 027/640] mx31ads: Allow enable/disable of switchable supplies They will be automatically powered off at startup so users will need to enable them for use. Signed-off-by: Mark Brown Signed-off-by: Sascha Hauer --- arch/arm/mach-mx3/mx31ads.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c index 3e7bafa2ddbb..609d402cadba 100644 --- a/arch/arm/mach-mx3/mx31ads.c +++ b/arch/arm/mach-mx3/mx31ads.c @@ -302,6 +302,7 @@ static struct regulator_init_data ldo1_data = { .min_uV = 2800000, .max_uV = 2800000, .valid_modes_mask = REGULATOR_MODE_NORMAL, + .valid_ops_mask = REGULATOR_CHANGE_STATUS, .apply_uV = 1, }, }; @@ -322,6 +323,7 @@ static struct regulator_init_data ldo2_data = { .min_uV = 3300000, .max_uV = 3300000, .valid_modes_mask = REGULATOR_MODE_NORMAL, + .valid_ops_mask = REGULATOR_CHANGE_STATUS, .apply_uV = 1, }, .num_consumer_supplies = ARRAY_SIZE(ldo2_consumers), From bd02acdbb28be99ed87ec11c3ef61a5eec4e2dd7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Jan 2010 16:05:15 +0000 Subject: [PATCH 028/640] mx31ads: Provide a name for EXPIO interrupt chip This makes it a bit more obvious in genirq diagnostics that they aren't handled by the i.MX interrupt controller. Signed-off-by: Mark Brown Signed-off-by: Sascha Hauer --- arch/arm/mach-mx3/mx31ads.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c index 609d402cadba..e2054563027f 100644 --- a/arch/arm/mach-mx3/mx31ads.c +++ b/arch/arm/mach-mx3/mx31ads.c @@ -173,6 +173,7 @@ static void expio_unmask_irq(u32 irq) } static struct irq_chip expio_irq_chip = { + .name = "EXPIO(CPLD)", .ack = expio_ack_irq, .mask = expio_mask_irq, .unmask = expio_unmask_irq, From 3d661ac187e72af71d3bb7d48a46012180a6fc46 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Jan 2010 16:05:16 +0000 Subject: [PATCH 029/640] mx31ads: Provide an IRQ range to the WM835x on the 1133-EV1 module The WM8350 core won't actually use the range yet, but it will in future and the platform data to configure it is there now. Signed-off-by: Mark Brown Signed-off-by: Sascha Hauer --- arch/arm/mach-mx3/mx31ads.c | 1 + arch/arm/plat-mxc/include/mach/irqs.h | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c index e2054563027f..938c549767dc 100644 --- a/arch/arm/mach-mx3/mx31ads.c +++ b/arch/arm/mach-mx3/mx31ads.c @@ -462,6 +462,7 @@ static int mx31_wm8350_init(struct wm8350 *wm8350) static struct wm8350_platform_data __initdata mx31_wm8350_pdata = { .init = mx31_wm8350_init, + .irq_base = MXC_BOARD_IRQ_START + MXC_MAX_EXP_IO_LINES, }; #endif diff --git a/arch/arm/plat-mxc/include/mach/irqs.h b/arch/arm/plat-mxc/include/mach/irqs.h index ead9d592168d..0cb347645db4 100644 --- a/arch/arm/plat-mxc/include/mach/irqs.h +++ b/arch/arm/plat-mxc/include/mach/irqs.h @@ -37,7 +37,12 @@ * within sensible limits. */ #define MXC_BOARD_IRQ_START (MXC_INTERNAL_IRQS + MXC_GPIO_IRQS) + +#ifdef CONFIG_MACH_MX31ADS_WM1133_EV1 +#define MXC_BOARD_IRQS 80 +#else #define MXC_BOARD_IRQS 16 +#endif #define MXC_IPU_IRQ_START (MXC_BOARD_IRQ_START + MXC_BOARD_IRQS) From 1dd473fdf1d8a7531e0955480cd129f9c1e8b8a3 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 12 Jan 2010 03:37:45 +0900 Subject: [PATCH 030/640] ocfs2: Fix refcnt leak on ocfs2_fast_follow_link() error path If ->follow_link handler returns an error, it should decrement nd->path refcnt. But ocfs2_fast_follow_link() doesn't decrement. This patch fixes the problem by using nd_set_link() style error handling instead of playing with nd->path. Signed-off-by: OGAWA Hirofumi Signed-off-by: Joel Becker --- fs/ocfs2/symlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 49b133ccbf11..32499d213fc4 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -137,20 +137,20 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, } memcpy(link, target, len); - nd_set_link(nd, link); bail: + nd_set_link(nd, status ? ERR_PTR(status) : link); brelse(bh); mlog_exit(status); - return status ? ERR_PTR(status) : link; + return NULL; } static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { - char *link = cookie; - - kfree(link); + char *link = nd_get_link(nd); + if (!IS_ERR(link)) + kfree(link); } const struct inode_operations ocfs2_symlink_inode_operations = { From 7b4e08a77f0cbfe31b47faf082caa02f9c252266 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 11 Jan 2010 16:33:18 +0000 Subject: [PATCH 031/640] MXC: Add AUDMUXv2 register decode to debugfs Since AUDMUX configuration appears to be one of the common stumbling blocks for people setting up i.MX audio try to provide some diagnostic information describing the current setup to assisist people in working out what's going on. Signed-off-by: Mark Brown Signed-off-by: Sascha Hauer --- arch/arm/plat-mxc/audmux-v2.c | 137 ++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/arch/arm/plat-mxc/audmux-v2.c b/arch/arm/plat-mxc/audmux-v2.c index 6f21096086fd..b06954a84436 100644 --- a/arch/arm/plat-mxc/audmux-v2.c +++ b/arch/arm/plat-mxc/audmux-v2.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,140 @@ static void __iomem *audmux_base; #define MXC_AUDMUX_V2_PTCR(x) ((x) * 8) #define MXC_AUDMUX_V2_PDCR(x) ((x) * 8 + 4) +#ifdef CONFIG_DEBUG_FS +static struct dentry *audmux_debugfs_root; + +static int audmux_open_file(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +/* There is an annoying discontinuity in the SSI numbering with regard + * to the Linux number of the devices */ +static const char *audmux_port_string(int port) +{ + switch (port) { + case MX31_AUDMUX_PORT1_SSI0: + return "imx-ssi.0"; + case MX31_AUDMUX_PORT2_SSI1: + return "imx-ssi.1"; + case MX31_AUDMUX_PORT3_SSI_PINS_3: + return "SSI3"; + case MX31_AUDMUX_PORT4_SSI_PINS_4: + return "SSI4"; + case MX31_AUDMUX_PORT5_SSI_PINS_5: + return "SSI5"; + case MX31_AUDMUX_PORT6_SSI_PINS_6: + return "SSI6"; + default: + return "UNKNOWN"; + } +} + +static ssize_t audmux_read_file(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + ssize_t ret; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + int port = (int)file->private_data; + u32 pdcr, ptcr; + + if (!buf) + return -ENOMEM; + + if (audmux_clk) + clk_enable(audmux_clk); + + ptcr = readl(audmux_base + MXC_AUDMUX_V2_PTCR(port)); + pdcr = readl(audmux_base + MXC_AUDMUX_V2_PDCR(port)); + + if (audmux_clk) + clk_disable(audmux_clk); + + ret = snprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n", + pdcr, ptcr); + + if (ptcr & MXC_AUDMUX_V2_PTCR_TFSDIR) + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "TxFS output from %s, ", + audmux_port_string((ptcr >> 27) & 0x7)); + else + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "TxFS input, "); + + if (ptcr & MXC_AUDMUX_V2_PTCR_TCLKDIR) + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "TxClk output from %s", + audmux_port_string((ptcr >> 22) & 0x7)); + else + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "TxClk input"); + + ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n"); + + if (ptcr & MXC_AUDMUX_V2_PTCR_SYN) { + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "Port is symmetric"); + } else { + if (ptcr & MXC_AUDMUX_V2_PTCR_RFSDIR) + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "RxFS output from %s, ", + audmux_port_string((ptcr >> 17) & 0x7)); + else + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "RxFS input, "); + + if (ptcr & MXC_AUDMUX_V2_PTCR_RCLKDIR) + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "RxClk output from %s", + audmux_port_string((ptcr >> 12) & 0x7)); + else + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "RxClk input"); + } + + ret += snprintf(buf + ret, PAGE_SIZE - ret, + "\nData received from %s\n", + audmux_port_string((pdcr >> 13) & 0x7)); + + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); + + kfree(buf); + + return ret; +} + +static const struct file_operations audmux_debugfs_fops = { + .open = audmux_open_file, + .read = audmux_read_file, +}; + +static void audmux_debugfs_init(void) +{ + int i; + char buf[20]; + + audmux_debugfs_root = debugfs_create_dir("audmux", NULL); + if (!audmux_debugfs_root) { + pr_warning("Failed to create AUDMUX debugfs root\n"); + return; + } + + for (i = 1; i < 8; i++) { + snprintf(buf, sizeof(buf), "ssi%d", i); + if (!debugfs_create_file(buf, 0444, audmux_debugfs_root, + (void *)i, &audmux_debugfs_fops)) + pr_warning("Failed to create AUDMUX port %d debugfs file\n", + i); + } +} +#else +static inline void audmux_debugfs_init(void) +{ +} +#endif + int mxc_audmux_v2_configure_port(unsigned int port, unsigned int ptcr, unsigned int pdcr) { @@ -68,6 +203,8 @@ static int mxc_audmux_v2_init(void) if (cpu_is_mx31() || cpu_is_mx35()) audmux_base = IO_ADDRESS(AUDMUX_BASE_ADDR); + audmux_debugfs_init(); + return 0; } From 0fb8ee48d9dfff6a0913ceb0be2068d8be203763 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Dec 2009 05:53:03 +0100 Subject: [PATCH 032/640] perf: Drop useless check for ignored frame The check that ignores the debug and nmi stack frames is useless now that we have a frame pointer that makes us start at the right place. We don't anymore have to deal with these. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1262235183-5320-2-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 2 -- arch/x86/kernel/cpu/perf_event.c | 8 -------- arch/x86/kernel/dumpstack_32.c | 5 ----- arch/x86/kernel/dumpstack_64.c | 5 ----- 4 files changed, 20 deletions(-) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 35e89122a42f..4dab78edbad9 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -3,8 +3,6 @@ extern int kstack_depth_to_print; -int x86_is_stack_id(int id, char *name); - struct thread_info; struct stacktrace_ops; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d616c06e99b4..b1bb8c550526 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2297,7 +2297,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_ignored_frame); static void @@ -2313,10 +2312,6 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - per_cpu(in_ignored_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name) || - x86_is_stack_id(DEBUG_STACK, name); - return 0; } @@ -2324,9 +2319,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (per_cpu(in_ignored_frame, smp_processor_id())) - return; - if (reliable) callchain_store(entry, addr); } diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index ae775ca47b25..11540a189d93 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -18,11 +18,6 @@ #include "dumpstack.h" -/* Just a stub for now */ -int x86_is_stack_id(int id, char *name) -{ - return 0; -} void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 0ad9597073f5..676bc051252e 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = { #endif }; -int x86_is_stack_id(int id, char *name) -{ - return x86_stack_ids[id - 1] == name; -} - static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, char **idp) { From 60666c630bdb33983a894b050b588b663f38f368 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Thu, 31 Dec 2009 16:05:50 +0800 Subject: [PATCH 033/640] perf tools: Fix --pid option for stat current pid option doesn't work for perf stat. Change it to what perf record --pid acts as. Signed-off-by: Liming Wang Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1262246750-2191-1-git-send-email-liming.wang@windriver.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 106 ++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 45 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c70d72003557..e8c85d5aec41 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -44,6 +44,7 @@ #include "util/parse-events.h" #include "util/event.h" #include "util/debug.h" +#include "util/header.h" #include #include @@ -79,6 +80,8 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static int event_scaled[MAX_COUNTERS]; +static volatile int done = 0; + struct stats { double n, mean, M2; @@ -247,61 +250,64 @@ static int run_perf_stat(int argc __used, const char **argv) unsigned long long t0, t1; int status = 0; int counter; - int pid; + int pid = target_pid; int child_ready_pipe[2], go_pipe[2]; + const bool forks = (target_pid == -1 && argc > 0); char buf; if (!system_wide) nr_cpus = 1; - if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { + if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { perror("failed to create pipes"); exit(1); } - if ((pid = fork()) < 0) - perror("failed to fork"); + if (forks) { + if ((pid = fork()) < 0) + perror("failed to fork"); - if (!pid) { - close(child_ready_pipe[0]); - close(go_pipe[1]); - fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + if (!pid) { + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + if (read(go_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + exit(-1); + } + + child_pid = pid; /* - * Do a dummy execvp to get the PLT entry resolved, - * so we avoid the resolver overhead on the real - * execvp call. - */ - execvp("", (char **)argv); - - /* - * Tell the parent we're ready to go + * Wait for the child to be ready to exec. */ close(child_ready_pipe[1]); - - /* - * Wait until the parent tells us to go. - */ - if (read(go_pipe[0], &buf, 1) == -1) + close(go_pipe[0]); + if (read(child_ready_pipe[0], &buf, 1) == -1) perror("unable to read pipe"); - - execvp(argv[0], (char **)argv); - - perror(argv[0]); - exit(-1); + close(child_ready_pipe[0]); } - child_pid = pid; - - /* - * Wait for the child to be ready to exec. - */ - close(child_ready_pipe[1]); - close(go_pipe[0]); - if (read(child_ready_pipe[0], &buf, 1) == -1) - perror("unable to read pipe"); - close(child_ready_pipe[0]); - for (counter = 0; counter < nr_counters; counter++) create_perf_stat_counter(counter, pid); @@ -310,8 +316,12 @@ static int run_perf_stat(int argc __used, const char **argv) */ t0 = rdclock(); - close(go_pipe[1]); - wait(&status); + if (forks) { + close(go_pipe[1]); + wait(&status); + } else { + while(!done); + } t1 = rdclock(); @@ -417,10 +427,13 @@ static void print_stat(int argc, const char **argv) fflush(stdout); fprintf(stderr, "\n"); - fprintf(stderr, " Performance counter stats for \'%s", argv[0]); - - for (i = 1; i < argc; i++) - fprintf(stderr, " %s", argv[i]); + fprintf(stderr, " Performance counter stats for "); + if(target_pid == -1) { + fprintf(stderr, "\'%s", argv[0]); + for (i = 1; i < argc; i++) + fprintf(stderr, " %s", argv[i]); + }else + fprintf(stderr, "task pid \'%d", target_pid); fprintf(stderr, "\'"); if (run_count > 1) @@ -445,6 +458,9 @@ static volatile int signr = -1; static void skip_signal(int signo) { + if(target_pid != -1) + done = 1; + signr = signo; } @@ -461,7 +477,7 @@ static void sig_atexit(void) } static const char * const stat_usage[] = { - "perf stat [] ", + "perf stat [] []", NULL }; @@ -492,7 +508,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc) + if (!argc && target_pid == -1) usage_with_options(stat_usage, options); if (run_count <= 0) usage_with_options(stat_usage, options); From 682b335a5bccf9e5b7e74380784aa2f145d04444 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Jan 2010 16:19:26 -0200 Subject: [PATCH 034/640] perf symbols: Generalise the kallsyms parsing routine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used to find an specific symbol by name on 'perf record' to support relocation reference symbols to support relocatable kernels. Still have to conver the perf trace tools to use it instead of their current reimplementation. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262629169-22797-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 74 ++++++++++++++++++++++++++-------------- tools/perf/util/symbol.h | 2 ++ 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 79ca6a099f96..b9e0da57d84b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -383,16 +383,12 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) return ret; } -/* - * Loads the function entries in /proc/kallsyms into kernel_map->dso, - * so that we can in the next step set the symbol ->end address and then - * call kernel_maps__split_kallsyms. - */ -static int dso__load_all_kallsyms(struct dso *self, struct map *map) +int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name, + char type, u64 start)) { char *line = NULL; size_t n; - struct rb_root *root = &self->symbols[map->type]; + int err = 0; FILE *file = fopen("/proc/kallsyms", "r"); if (file == NULL) @@ -400,7 +396,6 @@ static int dso__load_all_kallsyms(struct dso *self, struct map *map) while (!feof(file)) { u64 start; - struct symbol *sym; int line_len, len; char symbol_type; char *symbol_name; @@ -421,35 +416,62 @@ static int dso__load_all_kallsyms(struct dso *self, struct map *map) continue; symbol_type = toupper(line[len]); - if (!symbol_type__is_a(symbol_type, map->type)) - continue; - symbol_name = line + len + 2; - /* - * Will fix up the end later, when we have all symbols sorted. - */ - sym = symbol__new(start, 0, symbol_name); - if (sym == NULL) - goto out_delete_line; - /* - * We will pass the symbols to the filter later, in - * map__split_kallsyms, when we have split the maps per module - */ - symbols__insert(root, sym); + err = process_symbol(arg, symbol_name, symbol_type, start); + if (err) + break; } free(line); fclose(file); + return err; - return 0; - -out_delete_line: - free(line); out_failure: return -1; } +struct process_kallsyms_args { + struct map *map; + struct dso *dso; +}; + +static int map__process_kallsym_symbol(void *arg, const char *name, + char type, u64 start) +{ + struct symbol *sym; + struct process_kallsyms_args *a = arg; + struct rb_root *root = &a->dso->symbols[a->map->type]; + + if (!symbol_type__is_a(type, a->map->type)) + return 0; + + /* + * Will fix up the end later, when we have all symbols sorted. + */ + sym = symbol__new(start, 0, name); + + if (sym == NULL) + return -ENOMEM; + /* + * We will pass the symbols to the filter later, in + * map__split_kallsyms, when we have split the maps per module + */ + symbols__insert(root, sym); + return 0; +} + +/* + * Loads the function entries in /proc/kallsyms into kernel_map->dso, + * so that we can in the next step set the symbol ->end address and then + * call kernel_maps__split_kallsyms. + */ +static int dso__load_all_kallsyms(struct dso *self, struct map *map) +{ + struct process_kallsyms_args args = { .map = map, .dso = self, }; + return kallsyms__parse(&args, map__process_kallsym_symbol); +} + /* * Split the symbols into maps, making sure there are no overlaps, i.e. the * kernel range is broken in several maps, named [kernel].N, as we don't have diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f27e158943e9..21313e87c37b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -135,6 +135,8 @@ int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); +int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name, + char type, u64 start)); int symbol__init(void); int perf_session__create_kernel_maps(struct perf_session *self); From 36a3e6461a0dac8e84b8c94877365324010c151b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Jan 2010 16:19:27 -0200 Subject: [PATCH 035/640] perf symbols: Export symbol_type__is_a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be needed by the new HEADER_DSO_INFO feature that will be a HEADER_BUILD_ID superset, replacing it. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262629169-22797-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- tools/perf/util/symbol.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b9e0da57d84b..5dffcd132d15 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -64,7 +64,7 @@ static void dso__set_sorted_by_name(struct dso *self, enum map_type type) self->sorted_by_name |= (1 << type); } -static bool symbol_type__is_a(char symbol_type, enum map_type map_type) +bool symbol_type__is_a(char symbol_type, enum map_type map_type) { switch (map_type) { case MAP__FUNCTION: diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 21313e87c37b..b2b5330a82a0 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -139,6 +139,8 @@ int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name char type, u64 start)); int symbol__init(void); +bool symbol_type__is_a(char symbol_type, enum map_type map_type); + int perf_session__create_kernel_maps(struct perf_session *self); extern struct list_head dsos__user, dsos__kernel; From f92cb24c78a7c853435e46a20d1bd5c894378132 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Jan 2010 16:19:28 -0200 Subject: [PATCH 036/640] perf tools: Create write_padded routine out of __dsos__write_buildid_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used by other options where padding is needed. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262629169-22797-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 709e3252f049..942f7da8bf84 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -173,6 +173,20 @@ static int do_write(int fd, const void *buf, size_t size) return 0; } +#define NAME_ALIGN 64 + +static int write_padded(int fd, const void *bf, size_t count, + size_t count_aligned) +{ + static const char zero_buf[NAME_ALIGN]; + int err = do_write(fd, bf, count); + + if (!err) + err = do_write(fd, zero_buf, count_aligned - count); + + return err; +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ @@ -181,9 +195,7 @@ static int do_write(int fd, const void *buf, size_t size) static int __dsos__write_buildid_table(struct list_head *head, int fd) { -#define NAME_ALIGN 64 struct dso *pos; - static const char zero_buf[NAME_ALIGN]; dsos__for_each_with_build_id(pos, head) { int err; @@ -197,10 +209,8 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd) err = do_write(fd, &b, sizeof(b)); if (err < 0) return err; - err = do_write(fd, pos->long_name, pos->long_name_len + 1); - if (err < 0) - return err; - err = do_write(fd, zero_buf, len - pos->long_name_len - 1); + err = write_padded(fd, pos->long_name, + pos->long_name_len + 1, len); if (err < 0) return err; } From de1764892a61a3ed212973cc028c80dd083179dd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Jan 2010 16:19:29 -0200 Subject: [PATCH 037/640] perf session: Keep pointers to the vmlinux maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that tools such as 'perf probe' don't have to lookup '[kernel.kallsyms]' but instead access them directly after perf_session__create_kernel_maps or map_groups__create_kernel_maps. Signed-off-by: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262629169-22797-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-probe.c | 4 +--- tools/perf/util/session.h | 1 + tools/perf/util/symbol.c | 29 +++++++++++++---------------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index c1e6774fd3ed..ffdd3fe87b4a 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -235,9 +235,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) session.psession = perf_session__new(NULL, O_WRONLY, false); if (session.psession == NULL) die("Failed to init perf_session."); - session.kmap = map_groups__find_by_name(&session.psession->kmaps, - MAP__FUNCTION, - "[kernel.kallsyms]"); + session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION]; if (!session.kmap) die("Could not find kernel map.\n"); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 77c5ee2993c2..8db37bbf0e62 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -18,6 +18,7 @@ struct perf_session { struct map_groups kmaps; struct rb_root threads; struct thread *last_match; + struct map *vmlinux_maps[MAP__NR_TYPES]; struct events_stats events_stats; unsigned long event_total[PERF_RECORD_MAX]; unsigned long unknown_events; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5dffcd132d15..e290429e9c00 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1662,7 +1662,7 @@ size_t dsos__fprintf_buildid(FILE *fp) __dsos__fprintf_buildid(&dsos__user, fp)); } -static struct dso *dsos__create_kernel( const char *vmlinux) +static struct dso *dsos__create_kernel(const char *vmlinux) { struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]"); @@ -1691,29 +1691,26 @@ out_delete_kernel_dso: return NULL; } -static int map_groups__create_kernel_maps(struct map_groups *self, const char *vmlinux) +static int map_groups__create_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES], + const char *vmlinux) { - struct map *functions, *variables; struct dso *kernel = dsos__create_kernel(vmlinux); + enum map_type type; if (kernel == NULL) return -1; - functions = map__new2(0, kernel, MAP__FUNCTION); - if (functions == NULL) - return -1; + for (type = 0; type < MAP__NR_TYPES; ++type) { + vmlinux_maps[type] = map__new2(0, kernel, type); + if (vmlinux_maps[type] == NULL) + return -1; - variables = map__new2(0, kernel, MAP__VARIABLE); - if (variables == NULL) { - map__delete(functions); - return -1; + vmlinux_maps[type]->map_ip = + vmlinux_maps[type]->unmap_ip = identity__map_ip; + map_groups__insert(self, vmlinux_maps[type]); } - functions->map_ip = functions->unmap_ip = - variables->map_ip = variables->unmap_ip = identity__map_ip; - map_groups__insert(self, functions); - map_groups__insert(self, variables); - return 0; } @@ -1824,7 +1821,7 @@ out_free_comm_list: int perf_session__create_kernel_maps(struct perf_session *self) { - if (map_groups__create_kernel_maps(&self->kmaps, + if (map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps, symbol_conf.vmlinux_name) < 0) return -1; From b9a63b9b56d6910a25e3d4905525aef150420a9b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Jan 2010 11:54:45 -0200 Subject: [PATCH 038/640] perf report: Fix --no-call-chain option handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid the funny: [root@doppio ~]# perf record -a -f sleep 2s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.334 MB perf.data (~14572 samples) ] [root@doppio ~]# perf report --no-call-graph selected -g but no callchain data. Did you call perf record without -g? And fix the bug reported by peterz when we do indeed record with callchains and then ask for a report without: [root@doppio ~]# perf record -a -g -f sleep 2s [root@doppio ~]# perf report --no-call-graph Segmentation fault [root@doppio ~]# Reported-by: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262699685-27820-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4292d7afcd60..80d691a4191f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -35,6 +35,7 @@ static char const *input_name = "perf.data"; static int force; static bool hide_unresolved; +static bool dont_use_callchains; static int show_threads; static struct perf_read_values show_threads_values; @@ -172,7 +173,8 @@ static int perf_session__setup_sample_type(struct perf_session *self) " -g?\n"); return -1; } - } else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) { + } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && + !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; if (register_callchain_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain" @@ -246,11 +248,19 @@ out_delete: static int parse_callchain_opt(const struct option *opt __used, const char *arg, - int unset __used) + int unset) { char *tok; char *endptr; + /* + * --no-call-graph + */ + if (unset) { + dont_use_callchains = true; + return 0; + } + symbol_conf.use_callchain = true; if (!arg) From 56b03f3c4d641dbdbce2e52a2969712e85b0e030 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Jan 2010 16:50:31 -0200 Subject: [PATCH 039/640] perf tools: Handle relocatable kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DSOs don't have this problem because the kernel emits a PERF_MMAP for each new executable mapping it performs on monitored threads. To fix the kernel case we simulate the same behaviour, by having 'perf record' to synthesize a PERF_MMAP for the kernel, encoded like this: [root@doppio ~]# perf record -a -f sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.344 MB perf.data (~15038 samples) ] [root@doppio ~]# perf report -D | head -10 0xd0 [0x40]: event: 1 . . ... raw event: size 64 bytes . 0000: 01 00 00 00 00 00 40 00 00 00 00 00 00 00 00 00 ......@........ . 0010: 00 00 00 81 ff ff ff ff 00 00 00 00 00 00 00 00 ............... . 0020: 00 00 00 00 00 00 00 00 5b 6b 65 72 6e 65 6c 2e ........ [kernel . 0030: 6b 61 6c 6c 73 79 6d 73 2e 5f 74 65 78 74 5d 00 kallsyms._text] . 0xd0 [0x40]: PERF_RECORD_MMAP 0/0: [0xffffffff81000000((nil)) @ (nil)]: [kernel.kallsyms._text] I.e. we identify such event as having: .pid = 0 .filename = [kernel.kallsyms.REFNAME] .start = REFNAME addr in /proc/kallsyms at 'perf record' time and use now a hardcoded value of '.text' for REFNAME. Then, later, in 'perf report', if there are any kernel hits and thus we need to resolve kernel symbols, we search for REFNAME and if its address changed, relocation happened and we thus must change the kernel mapping routines to one that uses .pgoff as the relocation to apply. This way we use the same mechanism used for the other DSOs and don't have to do a two pass in all the kernel symbols. Reported-by: Xiao Guangrong Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: "H. Peter Anvin" Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Xiao Guangrong LKML-Reference: <1262717431-1246-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 7 ++++ tools/perf/util/event.c | 64 +++++++++++++++++++++++++++++++++++-- tools/perf/util/event.h | 4 +++ tools/perf/util/session.c | 46 ++++++++++++++++++++++++++ tools/perf/util/session.h | 10 ++++++ tools/perf/util/symbol.c | 7 ++-- 6 files changed, 133 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 265425322734..8f88420e066b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -551,6 +551,13 @@ static int __cmd_record(int argc, const char **argv) return err; } + err = event__synthesize_kernel_mmap(process_synthesized_event, + session, "_text"); + if (err < 0) { + pr_err("Couldn't record kernel reference relocation symbol.\n"); + return err; + } + if (!system_wide && profile_cpu == -1) event__synthesize_thread(pid, process_synthesized_event, session); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bb0fd6da2d56..1a31feb9999f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -189,6 +189,50 @@ void event__synthesize_threads(int (*process)(event_t *event, closedir(proc); } +struct process_symbol_args { + const char *name; + u64 start; +}; + +static int find_symbol_cb(void *arg, const char *name, char type, u64 start) +{ + struct process_symbol_args *args = arg; + + if (!symbol_type__is_a(type, MAP__FUNCTION) || strcmp(name, args->name)) + return 0; + + args->start = start; + return 1; +} + +int event__synthesize_kernel_mmap(int (*process)(event_t *event, + struct perf_session *session), + struct perf_session *session, + const char *symbol_name) +{ + size_t size; + event_t ev = { + .header = { .type = PERF_RECORD_MMAP }, + }; + /* + * We should get this from /sys/kernel/sections/.text, but till that is + * available use this, and after it is use this as a fallback for older + * kernels. + */ + struct process_symbol_args args = { .name = symbol_name, }; + + if (kallsyms__parse(&args, find_symbol_cb) <= 0) + return -ENOENT; + + size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), + "[kernel.kallsyms.%s]", symbol_name) + 1; + size = ALIGN(size, sizeof(u64)); + ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size)); + ev.mmap.start = args.start; + + return process(&ev, session); +} + static void thread__comm_adjust(struct thread *self) { char *comm = self->comm; @@ -240,9 +284,9 @@ int event__process_lost(event_t *self, struct perf_session *session) int event__process_mmap(event_t *self, struct perf_session *session) { - struct thread *thread = perf_session__findnew(session, self->mmap.pid); - struct map *map = map__new(&self->mmap, MAP__FUNCTION, - session->cwd, session->cwdlen); + struct thread *thread; + struct map *map; + static const char kmmap_prefix[] = "[kernel.kallsyms."; dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n", self->mmap.pid, self->mmap.tid, @@ -251,6 +295,20 @@ int event__process_mmap(event_t *self, struct perf_session *session) (void *)(long)self->mmap.pgoff, self->mmap.filename); + if (self->mmap.pid == 0 && + memcmp(self->mmap.filename, kmmap_prefix, + sizeof(kmmap_prefix) - 1) == 0) { + const char *symbol_name = (self->mmap.filename + + sizeof(kmmap_prefix) - 1); + perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name, + self->mmap.start); + return 0; + } + + thread = perf_session__findnew(session, self->mmap.pid); + map = map__new(&self->mmap, MAP__FUNCTION, + session->cwd, session->cwdlen); + if (thread == NULL || map == NULL) dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); else diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 80fb3653c809..61fc0dc658c2 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -110,6 +110,10 @@ int event__synthesize_thread(pid_t pid, void event__synthesize_threads(int (*process)(event_t *event, struct perf_session *session), struct perf_session *session); +int event__synthesize_kernel_mmap(int (*process)(event_t *event, + struct perf_session *session), + struct perf_session *session, + const char *symbol_name); int event__process_comm(event_t *self, struct perf_session *session); int event__process_lost(event_t *self, struct perf_session *session); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7f0537d1add8..e0e6a075489e 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -401,3 +401,49 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg) return true; } + +int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, + const char *symbol_name, + u64 addr) +{ + char *bracket; + + self->ref_reloc_sym.name = strdup(symbol_name); + if (self->ref_reloc_sym.name == NULL) + return -ENOMEM; + + bracket = strchr(self->ref_reloc_sym.name, ']'); + if (bracket) + *bracket = '\0'; + + self->ref_reloc_sym.addr = addr; + return 0; +} + +static u64 map__reloc_map_ip(struct map *map, u64 ip) +{ + return ip + (s64)map->pgoff; +} + +static u64 map__reloc_unmap_ip(struct map *map, u64 ip) +{ + return ip - (s64)map->pgoff; +} + +void perf_session__reloc_vmlinux_maps(struct perf_session *self, + u64 unrelocated_addr) +{ + enum map_type type; + s64 reloc = unrelocated_addr - self->ref_reloc_sym.addr; + + if (!reloc) + return; + + for (type = 0; type < MAP__NR_TYPES; ++type) { + struct map *map = self->vmlinux_maps[type]; + + map->map_ip = map__reloc_map_ip; + map->unmap_ip = map__reloc_unmap_ip; + map->pgoff = reloc; + } +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 8db37bbf0e62..d4a9d20f8d44 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -24,6 +24,10 @@ struct perf_session { unsigned long unknown_events; struct rb_root hists; u64 sample_type; + struct { + const char *name; + u64 addr; + } ref_reloc_sym; int fd; int cwdlen; char *cwd; @@ -59,4 +63,10 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg); int perf_header__read_build_ids(int input, u64 offset, u64 file_size); +int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, + const char *symbol_name, + u64 addr); +void perf_session__reloc_vmlinux_maps(struct perf_session *self, + u64 unrelocated_addr); + #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e290429e9c00..da2f07f1af8f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -956,11 +956,15 @@ static int dso__load_sym(struct dso *self, struct map *map, elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; - const char *elf_name; + const char *elf_name = elf_sym__name(&sym, symstrs); char *demangled = NULL; int is_label = elf_sym__is_label(&sym); const char *section_name; + if (kernel && session->ref_reloc_sym.name != NULL && + strcmp(elf_name, session->ref_reloc_sym.name) == 0) + perf_session__reloc_vmlinux_maps(session, sym.st_value); + if (!is_label && !elf_sym__is_a(&sym, map->type)) continue; @@ -973,7 +977,6 @@ static int dso__load_sym(struct dso *self, struct map *map, if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) continue; - elf_name = elf_sym__name(&sym, symstrs); section_name = elf_sec__name(&shdr, secstrs); if (kernel || kmodule) { From ec3a9039601af210fca4650d229621fe5a21df0b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:46:41 -0500 Subject: [PATCH 040/640] tracing/kprobe: Update example output in documentation Update example output in documentation according to current implementation. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20100105224641.19431.34967.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 47aabeebbdf6..c3eff6ff945f 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -97,23 +97,24 @@ recording return value as "myretprobe" event. cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format name: myprobe -ID: 75 +ID: 780 format: - field:unsigned short common_type; offset:0; size:2; - field:unsigned char common_flags; offset:2; size:1; - field:unsigned char common_preempt_count; offset:3; size:1; - field:int common_pid; offset:4; size:4; - field:int common_tgid; offset:8; size:4; + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1;signed:0; + field:int common_pid; offset:4; size:4; signed:1; + field:int common_lock_depth; offset:8; size:4; signed:1; - field: unsigned long ip; offset:16;tsize:8; - field: int nargs; offset:24;tsize:4; - field: unsigned long dfd; offset:32;tsize:8; - field: unsigned long filename; offset:40;tsize:8; - field: unsigned long flags; offset:48;tsize:8; - field: unsigned long mode; offset:56;tsize:8; + field:unsigned long __probe_ip; offset:12; size:4; signed:0; + field:int __probe_nargs; offset:16; size:4; signed:1; + field:unsigned long dfd; offset:20; size:4; signed:0; + field:unsigned long filename; offset:24; size:4; signed:0; + field:unsigned long flags; offset:28; size:4; signed:0; + field:unsigned long mode; offset:32; size:4; signed:0; -print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip, +REC->dfd, REC->filename, REC->flags, REC->mode You can see that the event has 4 arguments as in the expressions you specified. From 14640106f243a3b29944d7198569090fa6546f2d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:46:48 -0500 Subject: [PATCH 041/640] tracing/kprobe: Drop function argument access syntax Drop function argument access syntax, because the function arguments depend on not only architecture but also compile-options and function API. And now, we have perf-probe for finding register/memory assigned to each argument. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Roland McGrath Cc: Oleg Nesterov Cc: Mahesh Salgaonkar Cc: Benjamin Herrenschmidt Cc: Michael Neuling Cc: linuxppc-dev@ozlabs.org LKML-Reference: <20100105224648.19431.52309.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 21 ++++++++++----------- kernel/trace/trace_kprobe.c | 18 +----------------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index c3eff6ff945f..f30978e001f8 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -37,15 +37,12 @@ Synopsis of kprobe_events @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address. - $argN : Fetch function argument. (N >= 0)(*) - $retval : Fetch return value.(**) - +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) + $retval : Fetch return value.(*) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) NAME=FETCHARG: Set NAME as the argument name of FETCHARG. - (*) aN may not correct on asmlinkaged functions and at the middle of - function body. - (**) only for return probe. - (***) this is useful for fetching a field of data structures. + (*) only for return probe. + (**) this is useful for fetching a field of data structures. Per-Probe Event Filtering @@ -82,11 +79,14 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events + echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording -1st to 4th arguments as "myprobe" event. As this example shows, users can -choose more familiar names for each arguments. +1st to 4th arguments as "myprobe" event. Note, which register/stack entry is +assigned to each function argument depends on arch-specific ABI. If you unsure +the ABI, please try to use probe subcommand of perf-tools (you can find it +under tools/perf/). +As this example shows, users can choose more familiar names for each arguments. echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events @@ -147,4 +147,3 @@ events, you need to enable it. returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel returns from do_sys_open to sys_open+0x1b). - diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 47f54ab57b68..7ac728ded964 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) return retval; } -static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) -{ - return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); -} - static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, void *dummy) { @@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) { int ret = -EINVAL; - if (ff->func == fetch_argument) - ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); - else if (ff->func == fetch_register) { + if (ff->func == fetch_register) { const char *name; name = regs_query_register_name((unsigned int)((long)ff->data)); ret = snprintf(buf, n, "%%%s", name); @@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) } } else ret = -EINVAL; - } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { - ret = strict_strtoul(arg + 3, 10, ¶m); - if (ret || param > PARAM_MAX_ARGS) - ret = -EINVAL; - else { - ff->func = fetch_argument; - ff->data = (void *)param; - } } else ret = -EINVAL; return ret; @@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv) * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] * Fetch args: - * $argN : fetch Nth of function argument. (N:0-) * $retval : fetch return value * $stack : fetch stack address * $stackN : fetch Nth of stack (N:0-) From aa5add93e92019018e905146f8c3d3f8e3c08300 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:46:56 -0500 Subject: [PATCH 042/640] x86/ptrace: Remove unused regs_get_argument_nth API Because of dropping function argument syntax from kprobe-tracer, we don't need this API anymore. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Roland McGrath Cc: Oleg Nesterov Cc: Mahesh Salgaonkar Cc: Benjamin Herrenschmidt Cc: Michael Neuling Cc: Steven Rostedt Cc: linuxppc-dev@ozlabs.org LKML-Reference: <20100105224656.19431.92588.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ptrace.h | 4 ---- arch/x86/kernel/ptrace.c | 24 ------------------------ 2 files changed, 28 deletions(-) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 9d369f680321..20102808b191 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, return 0; } -/* Get Nth argument at function call */ -extern unsigned long regs_get_argument_nth(struct pt_regs *regs, - unsigned int n); - /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 017d937639fe..73554a3aae8c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -140,30 +140,6 @@ static const int arg_offs_table[] = { #endif }; -/** - * regs_get_argument_nth() - get Nth argument at function call - * @regs: pt_regs which contains registers at function entry. - * @n: argument number. - * - * regs_get_argument_nth() returns @n th argument of a function call. - * Since usually the kernel stack will be changed right after function entry, - * you must use this at function entry. If the @n th entry is NOT in the - * kernel stack or pt_regs, this returns 0. - */ -unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) -{ - if (n < ARRAY_SIZE(arg_offs_table)) - return *(unsigned long *)((char *)regs + arg_offs_table[n]); - else { - /* - * The typical case: arg n is on the stack. - * (Note: stack[0] = return address, so skip it) - */ - n -= ARRAY_SIZE(arg_offs_table); - return regs_get_kernel_stack_nth(regs, 1 + n); - } -} - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. From bbaa46fac6d1c652bfa6282420d36a44bdc53b64 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:47:03 -0500 Subject: [PATCH 043/640] perf probe: Remove newline from die() Remove newline from die(), because it is automatically added. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20100105224703.19431.42475.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-finder.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 4b852c0d16a5..6402798337c8 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -402,11 +402,11 @@ static void show_location(Dwarf_Loc *loc, struct probe_finder *pf) } else if (op == DW_OP_regx) { regn = loc->lr_number; } else - die("Dwarf_OP %d is not supported.\n", op); + die("Dwarf_OP %d is not supported.", op); regs = get_arch_regstr(regn); if (!regs) - die("%lld exceeds max register number.\n", regn); + die("%lld exceeds max register number.", regn); if (deref) ret = snprintf(pf->buf, pf->len, @@ -438,7 +438,7 @@ static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf) return ; error: die("Failed to find the location of %s at this address.\n" - " Perhaps, it has been optimized out.\n", pf->var); + " Perhaps, it has been optimized out.", pf->var); } static int variable_callback(struct die_link *dlink, void *data) @@ -476,7 +476,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) /* Search child die for local variables and parameters. */ ret = search_die_from_children(sp_die, variable_callback, pf); if (!ret) - die("Failed to find '%s' in this function.\n", pf->var); + die("Failed to find '%s' in this function.", pf->var); } /* Get a frame base on the address */ @@ -602,7 +602,7 @@ static void find_by_line(struct probe_finder *pf) ret = search_die_from_children(pf->cu_die, probeaddr_callback, pf); if (ret == 0) - die("Probe point is not found in subprograms.\n"); + die("Probe point is not found in subprograms."); /* Continuing, because target line might be inlined. */ } dwarf_srclines_dealloc(__dw_debug, lines, cnt); @@ -661,7 +661,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) !die_inlined_subprogram(lk->die)) goto found; } - die("Failed to find real subprogram.\n"); + die("Failed to find real subprogram."); found: /* Get offset from subprogram */ ret = die_within_subprogram(lk->die, pf->addr, &offs); From 72041334b8c75ae7e1da2f17ba2b7afee8f2abd7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:47:10 -0500 Subject: [PATCH 044/640] perf probe: Show probe list in pager Show probe list in pager, because the list can be longer than a page. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20100105224710.19431.61542.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8e532d9824f0..a22141a773bc 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -37,6 +37,7 @@ #include "string.h" #include "strlist.h" #include "debug.h" +#include "cache.h" #include "parse-events.h" /* For debugfs_path */ #include "probe-event.h" @@ -455,6 +456,8 @@ void show_perf_probe_events(void) struct strlist *rawlist; struct str_node *ent; + setup_pager(); + fd = open_kprobe_events(O_RDONLY, 0); rawlist = get_trace_kprobe_event_rawlist(fd); close(fd); From fb1d2edf7ee25a26ad0b238d0ee335a3b28b7aa3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:47:17 -0500 Subject: [PATCH 045/640] perf tools: Support tracepoint glob matching Support glob wildcard when selecting tracepoint events by -e option. Without this patch, perf-tools supports 'GROUP:*:record' syntax for selecting all tracepoints under GROUP group. With this patch, user can choose tracepoints more flexibly by using partial wildcards, e.g. 'block:*bio*:record'. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20100105224717.19431.68972.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 609d5a9470c5..05d0c5c2030c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -450,7 +450,8 @@ parse_single_tracepoint_event(char *sys_name, /* sys + ':' + event + ':' + flags*/ #define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128) static enum event_result -parse_subsystem_tracepoint_event(char *sys_name, char *flags) +parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp, + char *flags) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; @@ -474,6 +475,9 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags) || !strcmp(evt_ent->d_name, "filter")) continue; + if (!strglobmatch(evt_ent->d_name, evt_exp)) + continue; + len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name, evt_ent->d_name, flags ? ":" : "", flags ?: ""); @@ -522,9 +526,10 @@ static enum event_result parse_tracepoint_event(const char **strp, if (evt_length >= MAX_EVENT_LENGTH) return EVT_FAILED; - if (!strcmp(evt_name, "*")) { + if (strpbrk(evt_name, "*?")) { *strp = evt_name + evt_length; - return parse_subsystem_tracepoint_event(sys_name, flags); + return parse_multiple_tracepoint_event(sys_name, evt_name, + flags); } else return parse_single_tracepoint_event(sys_name, evt_name, evt_length, flags, From 6964cd2c8efe6e048401f1fe3952a06c563c34c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:47:24 -0500 Subject: [PATCH 046/640] perf tools: Enhance glob string matching Enhance strglobmatch() for supporting character classes([CHARS], complementation and ranges are also supported) and escaped special characters (\*, \? etc). Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20100105224724.19431.56271.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/util/string.c | 65 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 5352d7dccc61..c397d4f6f748 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -227,16 +227,73 @@ fail: return NULL; } -/* Glob expression pattern matching */ +/* Character class matching */ +static bool __match_charclass(const char *pat, char c, const char **npat) +{ + bool complement = false, ret = true; + + if (*pat == '!') { + complement = true; + pat++; + } + if (*pat++ == c) /* First character is special */ + goto end; + + while (*pat && *pat != ']') { /* Matching */ + if (*pat == '-' && *(pat + 1) != ']') { /* Range */ + if (*(pat - 1) <= c && c <= *(pat + 1)) + goto end; + if (*(pat - 1) > *(pat + 1)) + goto error; + pat += 2; + } else if (*pat++ == c) + goto end; + } + if (!*pat) + goto error; + ret = false; + +end: + while (*pat && *pat != ']') /* Searching closing */ + pat++; + if (!*pat) + goto error; + *npat = pat + 1; + return complement ? !ret : ret; + +error: + return false; +} + +/** + * strglobmatch - glob expression pattern matching + * @str: the target string to match + * @pat: the pattern string to match + * + * This returns true if the @str matches @pat. @pat can includes wildcards + * ('*','?') and character classes ([CHARS], complementation and ranges are + * also supported). Also, this supports escape character ('\') to use special + * characters as normal character. + * + * Note: if @pat syntax is broken, this always returns false. + */ bool strglobmatch(const char *str, const char *pat) { while (*str && *pat && *pat != '*') { - if (*pat == '?') { + if (*pat == '?') { /* Matches any single character */ str++; pat++; - } else - if (*str++ != *pat++) + continue; + } else if (*pat == '[') /* Character classes/Ranges */ + if (__match_charclass(pat + 1, *str, &pat)) { + str++; + continue; + } else return false; + else if (*pat == '\\') /* Escaped char match as normal char */ + pat++; + if (*str++ != *pat++) + return false; } /* Check wild card */ if (*pat == '*') { From 631c9def804b2c92b5cca04fb9ff7b5df9e35094 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 6 Jan 2010 09:45:34 -0500 Subject: [PATCH 047/640] perf probe: Support --line option to show probable source-code lines Add --line option to support showing probable source-code lines. perf probe --line SRC:LN[-LN|+NUM] or perf probe --line FUNC[:LN[-LN|+NUM]] This option shows source-code with line number if the line can be probed. Lines without line number (and blue color) means that the line can not be probed, because debuginfo doesn't have the information of those lines. The argument specifies the range of lines, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. e.g. # ./perf probe --line kernel/sched.c:1080 * * called with rq->lock held and irqs disabled */ static void hrtick_start(struct rq *rq, u64 delay) { struct hrtimer *timer = &rq->hrtick_timer; 1086 ktime_t time = ktime_add_ns(timer->base->get_time(), delay); hrtimer_set_expires(timer, time); 1090 if (rq == this_rq()) { 1091 hrtimer_restart(timer); 1092 } else if (!rq->hrtick_csd_pending) { 1093 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 1094 rq->hrtick_csd_pending = 1; If you specifying function name, this shows function-relative line number. # ./perf probe --line schedule asmlinkage void __sched schedule(void) 1 { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; need_resched: preempt_disable(); 9 cpu = smp_processor_id(); 10 rq = cpu_rq(cpu); 11 rcu_sched_qs(cpu); 12 prev = rq->curr; 13 switch_count = &prev->nivcsw; Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20100106144534.27218.77939.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-probe.txt | 20 +++ tools/perf/builtin-probe.c | 76 ++++++++-- tools/perf/util/probe-event.c | 100 +++++++++++++ tools/perf/util/probe-event.h | 2 + tools/perf/util/probe-finder.c | 191 +++++++++++++++++++++++- tools/perf/util/probe-finder.h | 31 ++++ 6 files changed, 402 insertions(+), 18 deletions(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 250e391b4bc8..2de34075f6a4 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -15,6 +15,8 @@ or 'perf probe' [options] --del='[GROUP:]EVENT' [...] or 'perf probe' --list +or +'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' DESCRIPTION ----------- @@ -45,6 +47,11 @@ OPTIONS --list:: List up current probe events. +-L:: +--line=:: + Show source code lines which can be probed. This needs an argument + which specifies a range of the source code. + PROBE SYNTAX ------------ Probe points are defined by following syntax. @@ -56,6 +63,19 @@ Probe points are defined by following syntax. It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number. 'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). +LINE SYNTAX +----------- +Line range is descripted by following syntax. + + "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]" + +FUNC specifies the function name of showing lines. 'RLN' is the start line +number from function entry line, and 'RLN2' is the end line number. As same as +probe syntax, 'SRC' means the source file path, 'ALN' is start line number, +and 'ALN2' is end line number in the file. It is also possible to specify how +many lines to show by using 'NUM'. +So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. + SEE ALSO -------- linkperf:perf-trace[1], linkperf:perf-record[1] diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index ffdd3fe87b4a..1d3a99ea5ce1 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -55,11 +55,13 @@ static struct { bool need_dwarf; bool list_events; bool force_add; + bool show_lines; int nr_probe; struct probe_point probes[MAX_PROBES]; struct strlist *dellist; struct perf_session *psession; struct map *kmap; + struct line_range line_range; } session; @@ -116,6 +118,15 @@ static int opt_del_probe_event(const struct option *opt __used, return 0; } +static int opt_show_lines(const struct option *opt __used, + const char *str, int unset __used) +{ + if (str) + parse_line_range_desc(str, &session.line_range); + INIT_LIST_HEAD(&session.line_range.line_list); + session.show_lines = true; + return 0; +} /* Currently just checking function name from symbol map */ static void evaluate_probe_point(struct probe_point *pp) { @@ -144,6 +155,7 @@ static const char * const probe_usage[] = { "perf probe [] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", "perf probe [] --del '[GROUP:]EVENT' ...", "perf probe --list", + "perf probe --line 'LINEDESC'", NULL }; @@ -182,9 +194,32 @@ static const struct option options[] = { opt_add_probe_event), OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events" " with existing name"), +#ifndef NO_LIBDWARF + OPT_CALLBACK('L', "line", NULL, + "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]", + "Show source code lines.", opt_show_lines), +#endif OPT_END() }; +/* Initialize symbol maps for vmlinux */ +static void init_vmlinux(void) +{ + symbol_conf.sort_by_name = true; + if (symbol_conf.vmlinux_name == NULL) + symbol_conf.try_vmlinux_path = true; + else + pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); + if (symbol__init() < 0) + die("Failed to init symbol map."); + session.psession = perf_session__new(NULL, O_WRONLY, false); + if (session.psession == NULL) + die("Failed to init perf_session."); + session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION]; + if (!session.kmap) + die("Could not find kernel map.\n"); +} + int cmd_probe(int argc, const char **argv, const char *prefix __used) { int i, ret; @@ -203,7 +238,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) parse_probe_event_argv(argc, argv); } - if ((!session.nr_probe && !session.dellist && !session.list_events)) + if ((!session.nr_probe && !session.dellist && !session.list_events && + !session.show_lines)) usage_with_options(probe_usage, options); if (debugfs_valid_mountpoint(debugfs_path) < 0) @@ -215,10 +251,34 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) " --add/--del.\n"); usage_with_options(probe_usage, options); } + if (session.show_lines) { + pr_warning(" Error: Don't use --list with --line.\n"); + usage_with_options(probe_usage, options); + } show_perf_probe_events(); return 0; } +#ifndef NO_LIBDWARF + if (session.show_lines) { + if (session.nr_probe != 0 || session.dellist) { + pr_warning(" Error: Don't use --line with" + " --add/--del.\n"); + usage_with_options(probe_usage, options); + } + init_vmlinux(); + fd = open_vmlinux(); + if (fd < 0) + die("Could not open debuginfo file."); + ret = find_line_range(fd, &session.line_range); + if (ret <= 0) + die("Source line is not found.\n"); + close(fd); + show_line_range(&session.line_range); + return 0; + } +#endif + if (session.dellist) { del_trace_kprobe_events(session.dellist); strlist__delete(session.dellist); @@ -226,18 +286,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) return 0; } - /* Initialize symbol maps for vmlinux */ - symbol_conf.sort_by_name = true; - if (symbol_conf.vmlinux_name == NULL) - symbol_conf.try_vmlinux_path = true; - if (symbol__init() < 0) - die("Failed to init symbol map."); - session.psession = perf_session__new(NULL, O_WRONLY, false); - if (session.psession == NULL) - die("Failed to init perf_session."); - session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION]; - if (!session.kmap) - die("Could not find kernel map.\n"); + /* Add probes */ + init_vmlinux(); if (session.need_dwarf) #ifdef NO_LIBDWARF diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a22141a773bc..71b0dd590a37 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -38,6 +38,7 @@ #include "strlist.h" #include "debug.h" #include "cache.h" +#include "color.h" #include "parse-events.h" /* For debugfs_path */ #include "probe-event.h" @@ -63,6 +64,42 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) return ret; } +void parse_line_range_desc(const char *arg, struct line_range *lr) +{ + const char *ptr; + char *tmp; + /* + * + * SRC:SLN[+NUM|-ELN] + * FUNC[:SLN[+NUM|-ELN]] + */ + ptr = strchr(arg, ':'); + if (ptr) { + lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0); + if (*tmp == '+') + lr->end = lr->start + (unsigned int)strtoul(tmp + 1, + &tmp, 0); + else if (*tmp == '-') + lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0); + else + lr->end = 0; + pr_debug("Line range is %u to %u\n", lr->start, lr->end); + if (lr->end && lr->start > lr->end) + semantic_error("Start line must be smaller" + " than end line."); + if (*tmp != '\0') + semantic_error("Tailing with invalid character '%d'.", + *tmp); + tmp = strndup(arg, (ptr - arg)); + } else + tmp = strdup(arg); + + if (strchr(tmp, '.')) + lr->file = tmp; + else + lr->function = tmp; +} + /* Check the name is good for event/group */ static bool check_event_name(const char *name) { @@ -678,3 +715,66 @@ void del_trace_kprobe_events(struct strlist *dellist) close(fd); } +#define LINEBUF_SIZE 256 + +static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num) +{ + char buf[LINEBUF_SIZE]; + const char *color = PERF_COLOR_BLUE; + + if (fgets(buf, LINEBUF_SIZE, fp) == NULL) + goto error; + if (!skip) { + if (show_num) + fprintf(stdout, "%7u %s", l, buf); + else + color_fprintf(stdout, color, " %s", buf); + } + + while (strlen(buf) == LINEBUF_SIZE - 1 && + buf[LINEBUF_SIZE - 2] != '\n') { + if (fgets(buf, LINEBUF_SIZE, fp) == NULL) + goto error; + if (!skip) { + if (show_num) + fprintf(stdout, "%s", buf); + else + color_fprintf(stdout, color, "%s", buf); + } + } + return; +error: + if (feof(fp)) + die("Source file is shorter than expected."); + else + die("File read error: %s", strerror(errno)); +} + +void show_line_range(struct line_range *lr) +{ + unsigned int l = 1; + struct line_node *ln; + FILE *fp; + + setup_pager(); + + if (lr->function) + fprintf(stdout, "<%s:%d>\n", lr->function, + lr->start - lr->offset); + else + fprintf(stdout, "<%s:%d>\n", lr->file, lr->start); + + fp = fopen(lr->path, "r"); + if (fp == NULL) + die("Failed to open %s: %s", lr->path, strerror(errno)); + /* Skip to starting line number */ + while (l < lr->start) + show_one_line(fp, l++, true, false); + + list_for_each_entry(ln, &lr->line_list, list) { + while (ln->line > l) + show_one_line(fp, (l++) - lr->offset, false, false); + show_one_line(fp, (l++) - lr->offset, false, true); + } + fclose(fp); +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 7f1d499118c0..711287d4baea 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -5,6 +5,7 @@ #include "probe-finder.h" #include "strlist.h" +extern void parse_line_range_desc(const char *arg, struct line_range *lr); extern void parse_perf_probe_event(const char *str, struct probe_point *pp, bool *need_dwarf); extern int synthesize_perf_probe_point(struct probe_point *pp); @@ -15,6 +16,7 @@ extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes, bool force_add); extern void del_trace_kprobe_events(struct strlist *dellist); extern void show_perf_probe_events(void); +extern void show_line_range(struct line_range *lr); /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 6402798337c8..1b2124d12f68 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -140,6 +140,31 @@ static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname) return found; } +static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf) +{ + Dwarf_Signed cnt, i; + char **srcs; + int ret = 0; + + if (!buf || !fno) + return -EINVAL; + + ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error); + if (ret == DW_DLV_OK) { + if ((Dwarf_Unsigned)cnt > fno - 1) { + *buf = strdup(srcs[fno - 1]); + ret = 0; + pr_debug("found filename: %s\n", *buf); + } else + ret = -ENOENT; + for (i = 0; i < cnt; i++) + dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); + dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST); + } else + ret = -EINVAL; + return ret; +} + /* Compare diename and tname */ static int die_compare_name(Dwarf_Die dw_die, const char *tname) { @@ -567,7 +592,7 @@ static int probeaddr_callback(struct die_link *dlink, void *data) } /* Find probe point from its line number */ -static void find_by_line(struct probe_finder *pf) +static void find_probe_point_by_line(struct probe_finder *pf) { Dwarf_Signed cnt, i, clm; Dwarf_Line *lines; @@ -626,7 +651,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) pf->fno = die_get_decl_file(dlink->die); pf->lno = die_get_decl_line(dlink->die) + pp->line; - find_by_line(pf); + find_probe_point_by_line(pf); return 1; } if (die_inlined_subprogram(dlink->die)) { @@ -673,7 +698,7 @@ found: return 0; } -static void find_by_func(struct probe_finder *pf) +static void find_probe_point_by_func(struct probe_finder *pf) { search_die_from_children(pf->cu_die, probefunc_callback, pf); } @@ -714,10 +739,10 @@ int find_probepoint(int fd, struct probe_point *pp) if (ret == DW_DLV_NO_ENTRY) pf.cu_base = 0; if (pp->function) - find_by_func(&pf); + find_probe_point_by_func(&pf); else { pf.lno = pp->line; - find_by_line(&pf); + find_probe_point_by_line(&pf); } } dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE); @@ -728,3 +753,159 @@ int find_probepoint(int fd, struct probe_point *pp) return pp->found; } + +static void line_range_add_line(struct line_range *lr, unsigned int line) +{ + struct line_node *ln; + struct list_head *p; + + /* Reverse search, because new line will be the last one */ + list_for_each_entry_reverse(ln, &lr->line_list, list) { + if (ln->line < line) { + p = &ln->list; + goto found; + } else if (ln->line == line) /* Already exist */ + return ; + } + /* List is empty, or the smallest entry */ + p = &lr->line_list; +found: + pr_debug("Debug: add a line %u\n", line); + ln = zalloc(sizeof(struct line_node)); + DIE_IF(ln == NULL); + ln->line = line; + INIT_LIST_HEAD(&ln->list); + list_add(&ln->list, p); +} + +/* Find line range from its line number */ +static void find_line_range_by_line(struct line_finder *lf) +{ + Dwarf_Signed cnt, i; + Dwarf_Line *lines; + Dwarf_Unsigned lineno = 0; + Dwarf_Unsigned fno; + Dwarf_Addr addr; + int ret; + + ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + for (i = 0; i < cnt; i++) { + ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (fno != lf->fno) + continue; + + ret = dwarf_lineno(lines[i], &lineno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (lf->lno_s > lineno || lf->lno_e < lineno) + continue; + + /* Filter line in the function address range */ + if (lf->addr_s && lf->addr_e) { + ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (lf->addr_s > addr || lf->addr_e <= addr) + continue; + } + line_range_add_line(lf->lr, (unsigned int)lineno); + } + dwarf_srclines_dealloc(__dw_debug, lines, cnt); + if (!list_empty(&lf->lr->line_list)) + lf->found = 1; +} + +/* Search function from function name */ +static int linefunc_callback(struct die_link *dlink, void *data) +{ + struct line_finder *lf = (struct line_finder *)data; + struct line_range *lr = lf->lr; + Dwarf_Half tag; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (tag == DW_TAG_subprogram && + die_compare_name(dlink->die, lr->function) == 0) { + /* Get the address range of this function */ + ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error); + if (ret == DW_DLV_OK) + ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) { + lf->addr_s = 0; + lf->addr_e = 0; + } + + lf->fno = die_get_decl_file(dlink->die); + lr->offset = die_get_decl_line(dlink->die);; + lf->lno_s = lr->offset + lr->start; + if (!lr->end) + lf->lno_e = (Dwarf_Unsigned)-1; + else + lf->lno_e = lr->offset + lr->end; + lr->start = lf->lno_s; + lr->end = lf->lno_e; + find_line_range_by_line(lf); + /* If we find a target function, this should be end. */ + lf->found = 1; + return 1; + } + return 0; +} + +static void find_line_range_by_func(struct line_finder *lf) +{ + search_die_from_children(lf->cu_die, linefunc_callback, lf); +} + +int find_line_range(int fd, struct line_range *lr) +{ + Dwarf_Half addr_size = 0; + Dwarf_Unsigned next_cuh = 0; + int ret; + struct line_finder lf = {.lr = lr}; + + ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); + if (ret != DW_DLV_OK) + return -ENOENT; + + while (!lf.found) { + /* Search CU (Compilation Unit) */ + ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL, + &addr_size, &next_cuh, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + break; + + /* Get the DIE(Debugging Information Entry) of this CU */ + ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + /* Check if target file is included. */ + if (lr->file) + lf.fno = cu_find_fileno(lf.cu_die, lr->file); + + if (!lr->file || lf.fno) { + if (lr->function) + find_line_range_by_func(&lf); + else { + lf.lno_s = lr->start; + if (!lr->end) + lf.lno_e = (Dwarf_Unsigned)-1; + else + lf.lno_e = lr->end; + find_line_range_by_line(&lf); + } + /* Get the real file path */ + if (lf.found) + cu_get_filename(lf.cu_die, lf.fno, &lr->path); + } + dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE); + } + ret = dwarf_finish(__dw_debug, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + return lf.found; +} + diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index e3f396806e6e..972b386116f1 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -34,8 +34,26 @@ struct probe_point { char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ }; +/* Line number container */ +struct line_node { + struct list_head list; + unsigned int line; +}; + +/* Line range */ +struct line_range { + char *file; /* File name */ + char *function; /* Function name */ + unsigned int start; /* Start line number */ + unsigned int end; /* End line number */ + unsigned int offset; /* Start line offset */ + char *path; /* Real path name */ + struct list_head line_list; /* Visible lines */ +}; + #ifndef NO_LIBDWARF extern int find_probepoint(int fd, struct probe_point *pp); +extern int find_line_range(int fd, struct line_range *lr); /* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */ #ifndef _MIPS_SZLONG @@ -62,6 +80,19 @@ struct probe_finder { char *buf; /* Current output buffer */ int len; /* Length of output buffer */ }; + +struct line_finder { + struct line_range *lr; /* Target line range */ + + Dwarf_Unsigned fno; /* File number */ + Dwarf_Unsigned lno_s; /* Start line number */ + Dwarf_Unsigned lno_e; /* End line number */ + Dwarf_Addr addr_s; /* Start address */ + Dwarf_Addr addr_e; /* End address */ + Dwarf_Die cu_die; /* Current CU */ + int found; +}; + #endif /* NO_LIBDWARF */ #endif /*_PROBE_FINDER_H */ From 8d9e503928638fc95317be42c416fb7907322aff Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Thu, 7 Jan 2010 19:40:47 +0300 Subject: [PATCH 048/640] perf: Fix memory leak: counterwidth Signed-off-by: Alexander Beregalov Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <1262882447-23776-2-git-send-email-a.beregalov@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/values.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 1c15e39f99e3..cfa55d686e3b 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -169,6 +169,7 @@ static void perf_read_values__display_pretty(FILE *fp, counterwidth[j], values->value[i][j]); fprintf(fp, "\n"); } + free(counterwidth); } static void perf_read_values__display_raw(FILE *fp, From fed5af61dc0d9402d26e7fb8fb9731a60a8e05ca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jan 2010 19:59:38 -0200 Subject: [PATCH 049/640] perf buildid-list: No need to process the header sections again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it is already processed by: perf_session__new perf_session__open perf_session__read This was harmless, because we use dsos__findnew, that would already find it, but is unnecessary work and removing it makes builtin-buildid-list.c even shorter. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262901583-8074-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-buildid-list.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 1e99ac806913..4229c2c213cc 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -31,26 +31,6 @@ static const struct option options[] = { OPT_END() }; -static int perf_file_section__process_buildids(struct perf_file_section *self, - int feat, int fd) -{ - if (feat != HEADER_BUILD_ID) - return 0; - - if (lseek(fd, self->offset, SEEK_SET) < 0) { - pr_warning("Failed to lseek to %Ld offset for buildids!\n", - self->offset); - return -1; - } - - if (perf_header__read_build_ids(fd, self->offset, self->size)) { - pr_warning("Failed to read buildids!\n"); - return -1; - } - - return 0; -} - static int __cmd_buildid_list(void) { int err = -1; @@ -60,10 +40,7 @@ static int __cmd_buildid_list(void) if (session == NULL) return -1; - err = perf_header__process_sections(&session->header, session->fd, - perf_file_section__process_buildids); - if (err >= 0) - dsos__fprintf_buildid(stdout); + dsos__fprintf_buildid(stdout); perf_session__delete(session); return err; From a89e5abe3efcc7facc666d3985769278937f86b0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jan 2010 19:59:39 -0200 Subject: [PATCH 050/640] perf symbols: Record the domain of DSOs in HEADER_BUILD_ID header table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that we can restore them to the right DSO list (either dsos__kernel or dsos__user). We do that just like the kernel does for the other events, encoding PERF_RECORD_MISC_{KERNEL,USER} in perf_event_header. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262901583-8074-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 9 ++++++--- tools/perf/util/session.c | 6 +++++- tools/perf/util/symbol.c | 6 +++--- tools/perf/util/symbol.h | 11 +++++++++-- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 942f7da8bf84..ec96321eb9e4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -193,7 +193,7 @@ static int write_padded(int fd, const void *bf, size_t count, continue; \ else -static int __dsos__write_buildid_table(struct list_head *head, int fd) +static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd) { struct dso *pos; @@ -205,6 +205,7 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd) len = ALIGN(len, NAME_ALIGN); memset(&b, 0, sizeof(b)); memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); + b.header.misc = misc; b.header.size = sizeof(b) + len; err = do_write(fd, &b, sizeof(b)); if (err < 0) @@ -220,9 +221,11 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd) static int dsos__write_buildid_table(int fd) { - int err = __dsos__write_buildid_table(&dsos__kernel, fd); + int err = __dsos__write_buildid_table(&dsos__kernel, + PERF_RECORD_MISC_KERNEL, fd); if (err == 0) - err = __dsos__write_buildid_table(&dsos__user, fd); + err = __dsos__write_buildid_table(&dsos__user, + PERF_RECORD_MISC_USER, fd); return err; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e0e6a075489e..378ac5422bcf 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -255,6 +255,7 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size) while (offset < limit) { struct dso *dso; ssize_t len; + struct list_head *head = &dsos__user; if (read(input, &bev, sizeof(bev)) != sizeof(bev)) goto out; @@ -263,7 +264,10 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size) if (read(input, filename, len) != len) goto out; - dso = dsos__findnew(filename); + if (bev.header.misc & PERF_RECORD_MISC_KERNEL) + head = &dsos__kernel; + + dso = __dsos__findnew(head, filename); if (dso != NULL) dso__set_build_id(dso, &bev.build_id); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index da2f07f1af8f..8e6627e6b778 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1615,14 +1615,14 @@ static struct dso *dsos__find(struct list_head *head, const char *name) return NULL; } -struct dso *dsos__findnew(const char *name) +struct dso *__dsos__findnew(struct list_head *head, const char *name) { - struct dso *dso = dsos__find(&dsos__user, name); + struct dso *dso = dsos__find(head, name); if (!dso) { dso = dso__new(name); if (dso != NULL) { - dsos__add(&dsos__user, dso); + dsos__add(head, dso); dso__set_basename(dso); } } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b2b5330a82a0..ee0b4593db7b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -115,9 +115,17 @@ bool dso__sorted_by_name(const struct dso *self, enum map_type type); void dso__sort_by_name(struct dso *self, enum map_type type); +extern struct list_head dsos__user, dsos__kernel; + +struct dso *__dsos__findnew(struct list_head *head, const char *name); + +static inline struct dso *dsos__findnew(const char *name) +{ + return __dsos__findnew(&dsos__user, name); +} + struct perf_session; -struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, struct perf_session *session, symbol_filter_t filter); void dsos__fprintf(FILE *fp); @@ -143,6 +151,5 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type); int perf_session__create_kernel_maps(struct perf_session *self); -extern struct list_head dsos__user, dsos__kernel; extern struct dso *vdso; #endif /* __PERF_SYMBOL */ From cf5531148ff34938840d6da775c0a4ace442d573 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jan 2010 19:59:40 -0200 Subject: [PATCH 051/640] perf tools: Create typedef for common event synthesizing callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1262901583-8074-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.c | 16 +++++----------- tools/perf/util/event.h | 12 +++++------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1a31feb9999f..bfb3d872b9f5 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -8,8 +8,7 @@ #include "thread.h" static pid_t event__synthesize_comm(pid_t pid, int full, - int (*process)(event_t *event, - struct perf_session *session), + event__handler_t process, struct perf_session *session) { event_t ev; @@ -91,8 +90,7 @@ out_failure: } static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, - int (*process)(event_t *event, - struct perf_session *session), + event__handler_t process, struct perf_session *session) { char filename[PATH_MAX]; @@ -156,9 +154,7 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, return 0; } -int event__synthesize_thread(pid_t pid, - int (*process)(event_t *event, - struct perf_session *session), +int event__synthesize_thread(pid_t pid, event__handler_t process, struct perf_session *session) { pid_t tgid = event__synthesize_comm(pid, 1, process, session); @@ -167,8 +163,7 @@ int event__synthesize_thread(pid_t pid, return event__synthesize_mmap_events(pid, tgid, process, session); } -void event__synthesize_threads(int (*process)(event_t *event, - struct perf_session *session), +void event__synthesize_threads(event__handler_t process, struct perf_session *session) { DIR *proc; @@ -205,8 +200,7 @@ static int find_symbol_cb(void *arg, const char *name, char type, u64 start) return 1; } -int event__synthesize_kernel_mmap(int (*process)(event_t *event, - struct perf_session *session), +int event__synthesize_kernel_mmap(event__handler_t process, struct perf_session *session, const char *symbol_name) { diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 61fc0dc658c2..80356da8216c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -103,15 +103,13 @@ void event__print_totals(void); struct perf_session; -int event__synthesize_thread(pid_t pid, - int (*process)(event_t *event, - struct perf_session *session), +typedef int (*event__handler_t)(event_t *event, struct perf_session *session); + +int event__synthesize_thread(pid_t pid, event__handler_t process, struct perf_session *session); -void event__synthesize_threads(int (*process)(event_t *event, - struct perf_session *session), +void event__synthesize_threads(event__handler_t process, struct perf_session *session); -int event__synthesize_kernel_mmap(int (*process)(event_t *event, - struct perf_session *session), +int event__synthesize_kernel_mmap(event__handler_t process, struct perf_session *session, const char *symbol_name); From 8381f65d097dad90416808314737dd7d3ae38ea9 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 8 Jan 2010 15:27:33 +0000 Subject: [PATCH 052/640] sched/perf: Make sure irqs are disabled for perf_event_task_sched_in() perf_event_task_sched_in() expects interrupts to be disabled, but on architectures with __ARCH_WANT_INTERRUPTS_ON_CTXSW defined, this isn't true. If this is defined, disable irqs around the call in finish_task_switch(). Signed-off-by: Jamie Iles Acked-by: Peter Zijlstra Cc: Russell King - ARM Linux LKML-Reference: <1262964453-27370-1-git-send-email-jamie.iles@picochip.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index e507af086b42..c3ad3427a2a5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2783,7 +2783,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_disable(); +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ perf_event_task_sched_in(current); +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_enable(); +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); fire_sched_in_preempt_notifiers(current); From ff314d3903c2843de65c2148f66f277f2440ed26 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 13 Jan 2010 17:01:38 +0800 Subject: [PATCH 053/640] perf: Make cmd_to_page() function more compact Remove branch for is_perf_command. Signed-off-by: Wenji Huang Cc: fweisbec@gmail.com Cc: jkacur@redhat.com Cc: acme@redhat.com LKML-Reference: <1263373298-13282-1-git-send-email-wenji.huang@oracle.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-help.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index e427d6965e0c..215b584007b1 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -313,8 +313,6 @@ static const char *cmd_to_page(const char *perf_cmd) return "perf"; else if (!prefixcmp(perf_cmd, "perf")) return perf_cmd; - else if (is_perf_command(perf_cmd)) - return prepend("perf-", perf_cmd); else return prepend("perf-", perf_cmd); } From 292e0041c3b22c5347092152504d814119554b57 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 9 Dec 2009 06:56:40 -0500 Subject: [PATCH 054/640] [CPUFREQ] fix default value for ondemand governor Signed-off-by: Mike Frysinger Signed-off-by: Dave Jones --- Documentation/cpu-freq/governors.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index aed082f49d09..737988fca64d 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -145,8 +145,8 @@ show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT. up_threshold: defines what the average CPU usage between the samplings of 'sampling_rate' needs to be for the kernel to make a decision on whether it should increase the frequency. For example when it is set -to its default value of '80' it means that between the checking -intervals the CPU needs to be on average more than 80% in use to then +to its default value of '95' it means that between the checking +intervals the CPU needs to be on average more than 95% in use to then decide that the CPU frequency needs to be increased. ignore_nice_load: this parameter takes a value of '0' or '1'. When From 557a701c16553b0b691dbb64ef30361115a80f64 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 14 Dec 2009 11:44:15 +0100 Subject: [PATCH 055/640] [CPUFREQ] Fix use after free of struct powernow_k8_data Easy fix for a regression introduced in 2.6.31. On managed CPUs the cpufreq.c core will call driver->exit(cpu) on the managed cpus and powernow_k8 will free the core's data. Later driver->get(cpu) function might get called trying to read out the current freq of a managed cpu and the NULL pointer check does not work on the freed object -> better set it to NULL. ->get() is unsigned and must return 0 as invalid frequency. Reference: http://bugzilla.kernel.org/show_bug.cgi?id=14391 Signed-off-by: Thomas Renninger Tested-by: Michal Schmidt CC: stable@kernel.org Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a9df9441a9a2..2da4fa3bf6e9 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) kfree(data->powernow_table); kfree(data); + per_cpu(powernow_data, pol->cpu) = NULL; return 0; } @@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu) int err; if (!data) - return -EINVAL; + return 0; smp_call_function_single(cpu, query_values_on_cpu, &err, true); if (err) From 1dbf58881f307e21a3df4b990a5bea401360d02e Mon Sep 17 00:00:00 2001 From: "Nagananda.Chumbalkar@hp.com" Date: Mon, 21 Dec 2009 23:40:52 +0100 Subject: [PATCH 056/640] [CPUFREQ] Fix ondemand to not request targets outside policy limits Dominik said: target_freq cannot be below policy->min or above policy->max. If it were, the whole cpufreq subsystem is broken. But (answer): I think the "ondemand" governor can ask for a target frequency that is below policy->min. ... A patch such as below may be needed to sanitize the target frequency requested by "ondemand". The "conservative" governor already has this check: Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones # diff -bur x/drivers/cpufreq/cpufreq_ondemand.c.orig y/drivers/cpufreq/cpufreq_ondemand.c --- drivers/cpufreq/cpufreq_ondemand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 4b34ade2332b..bd444dc93cf2 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -554,6 +554,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential); + if (freq_next < policy->min) + freq_next = policy->min; + if (!dbs_tuners_ins.powersave_bias) { __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); From b7cece76783c68fb391f9882235b4b0c9c300c46 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Jan 2010 13:22:17 -0200 Subject: [PATCH 057/640] perf tools: Encode kernel module mappings in perf.data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were always looking at the running machine /proc/modules, even when processing a perf.data file, which only makes sense when we're doing 'perf record' and 'perf report' on the same machine, and in close sucession, or if we don't use modules at all, right Peter? ;-) Now, at 'perf record' time we read /proc/modules, find the long path for modules, and put them as PERF_MMAP events, just like we did to encode the reloc reference symbol for vmlinux. Talking about that now it is encoded in .pgoff, so that we can use .{start,len} to store the address boundaries for the kernel so that when we reconstruct the kmaps tree we can do lookups right away, without having to fixup the end of the kernel maps like we did in the past (and now only in perf record). One more step in the 'perf archive' direction when we'll finally be able to collect data in one machine and analyse in another. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-kmem.c | 5 ++ tools/perf/builtin-record.c | 11 ++++ tools/perf/builtin-top.c | 5 ++ tools/perf/util/event.c | 106 ++++++++++++++++++++++++++++---- tools/perf/util/event.h | 2 + tools/perf/util/session.c | 8 +-- tools/perf/util/symbol.c | 116 ++++++++++++++++++++++++++---------- tools/perf/util/symbol.h | 3 + tools/perf/util/thread.h | 4 ++ 9 files changed, 212 insertions(+), 48 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 88c570c18e3e..4af7199c5af7 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -494,6 +494,11 @@ static int __cmd_kmem(void) if (!perf_session__has_traces(session, "kmem record")) goto out_delete; + if (perf_session__create_kernel_maps(session) < 0) { + pr_err("Problems creating kernel maps\n"); + return -1; + } + setup_pager(); err = perf_session__process_events(session, &event_ops); if (err != 0) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8f88420e066b..c130df2676f1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -465,6 +465,11 @@ static int __cmd_record(int argc, const char **argv) return -1; } + if (perf_session__create_kernel_maps(session) < 0) { + pr_err("Problems creating kernel maps\n"); + return -1; + } + if (!file_new) { err = perf_header__read(&session->header, output); if (err < 0) @@ -558,6 +563,12 @@ static int __cmd_record(int argc, const char **argv) return err; } + err = event__synthesize_modules(process_synthesized_event, session); + if (err < 0) { + pr_err("Couldn't record kernel reference relocation symbol.\n"); + return err; + } + if (!system_wide && profile_cpu == -1) event__synthesize_thread(pid, process_synthesized_event, session); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ddc584b64871..6822b44ca4f9 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1165,6 +1165,11 @@ static int __cmd_top(void) if (session == NULL) return -ENOMEM; + if (perf_session__create_kernel_maps(session) < 0) { + pr_err("Problems creating kernel maps\n"); + return -1; + } + if (target_pid != -1) event__synthesize_thread(target_pid, event__process, session); else diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bfb3d872b9f5..4f3e7ef33b83 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -154,6 +154,36 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, return 0; } +int event__synthesize_modules(event__handler_t process, + struct perf_session *session) +{ + struct rb_node *nd; + + for (nd = rb_first(&session->kmaps.maps[MAP__FUNCTION]); + nd; nd = rb_next(nd)) { + event_t ev; + size_t size; + struct map *pos = rb_entry(nd, struct map, rb_node); + + if (pos->dso->kernel) + continue; + + size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + memset(&ev, 0, sizeof(ev)); + ev.mmap.header.type = PERF_RECORD_MMAP; + ev.mmap.header.size = (sizeof(ev.mmap) - + (sizeof(ev.mmap.filename) - size)); + ev.mmap.start = pos->start; + ev.mmap.len = pos->end - pos->start; + + memcpy(ev.mmap.filename, pos->dso->long_name, + pos->dso->long_name_len + 1); + process(&ev, session); + } + + return 0; +} + int event__synthesize_thread(pid_t pid, event__handler_t process, struct perf_session *session) { @@ -222,7 +252,9 @@ int event__synthesize_kernel_mmap(event__handler_t process, "[kernel.kallsyms.%s]", symbol_name) + 1; size = ALIGN(size, sizeof(u64)); ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size)); - ev.mmap.start = args.start; + ev.mmap.pgoff = args.start; + ev.mmap.start = session->vmlinux_maps[MAP__FUNCTION]->start; + ev.mmap.len = session->vmlinux_maps[MAP__FUNCTION]->end - ev.mmap.start ; return process(&ev, session); } @@ -280,7 +312,6 @@ int event__process_mmap(event_t *self, struct perf_session *session) { struct thread *thread; struct map *map; - static const char kmmap_prefix[] = "[kernel.kallsyms."; dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n", self->mmap.pid, self->mmap.tid, @@ -289,13 +320,61 @@ int event__process_mmap(event_t *self, struct perf_session *session) (void *)(long)self->mmap.pgoff, self->mmap.filename); - if (self->mmap.pid == 0 && - memcmp(self->mmap.filename, kmmap_prefix, - sizeof(kmmap_prefix) - 1) == 0) { - const char *symbol_name = (self->mmap.filename + - sizeof(kmmap_prefix) - 1); - perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name, - self->mmap.start); + if (self->mmap.pid == 0) { + static const char kmmap_prefix[] = "[kernel.kallsyms."; + + if (self->mmap.filename[0] == '/') { + char short_module_name[1024]; + char *name = strrchr(self->mmap.filename, '/'), *dot; + + if (name == NULL) + goto out_problem; + + ++name; /* skip / */ + dot = strrchr(name, '.'); + if (dot == NULL) + goto out_problem; + + snprintf(short_module_name, sizeof(short_module_name), + "[%.*s]", (int)(dot - name), name); + strxfrchar(short_module_name, '-', '_'); + + map = perf_session__new_module_map(session, + self->mmap.start, + short_module_name); + if (map == NULL) + goto out_problem; + + name = strdup(self->mmap.filename); + if (name == NULL) + goto out_problem; + + dso__set_long_name(map->dso, name); + map->end = map->start + self->mmap.len; + } else if (memcmp(self->mmap.filename, kmmap_prefix, + sizeof(kmmap_prefix) - 1) == 0) { + const char *symbol_name = (self->mmap.filename + + sizeof(kmmap_prefix) - 1); + /* + * Should be there already, from the build-id table in + * the header. + */ + struct dso *kernel = __dsos__findnew(&dsos__kernel, + "[kernel.kallsyms]"); + if (kernel == NULL) + goto out_problem; + + if (__map_groups__create_kernel_maps(&session->kmaps, + session->vmlinux_maps, + kernel) < 0) + goto out_problem; + + session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start; + session->vmlinux_maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len; + + perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name, + self->mmap.pgoff); + } return 0; } @@ -304,10 +383,13 @@ int event__process_mmap(event_t *self, struct perf_session *session) session->cwd, session->cwdlen); if (thread == NULL || map == NULL) - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); - else - thread__insert_map(thread, map); + goto out_problem; + thread__insert_map(thread, map); + return 0; + +out_problem: + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 80356da8216c..50a7132887f5 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -112,6 +112,8 @@ void event__synthesize_threads(event__handler_t process, int event__synthesize_kernel_mmap(event__handler_t process, struct perf_session *session, const char *symbol_name); +int event__synthesize_modules(event__handler_t process, + struct perf_session *session); int event__process_comm(event_t *self, struct perf_session *session); int event__process_lost(event_t *self, struct perf_session *session); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 378ac5422bcf..fd1c5a39a5bb 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -69,9 +69,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc self->unknown_events = 0; map_groups__init(&self->kmaps); - if (perf_session__create_kernel_maps(self) < 0) - goto out_delete; - if (mode == O_RDONLY && perf_session__open(self, force) < 0) goto out_delete; @@ -268,8 +265,11 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size) head = &dsos__kernel; dso = __dsos__findnew(head, filename); - if (dso != NULL) + if (dso != NULL) { dso__set_build_id(dso, &bev.build_id); + if (head == &dsos__kernel && filename[0] == '[') + dso->kernel = 1; + } offset += bev.header.size; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8e6627e6b778..381999dd5c1f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -161,7 +161,7 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp) self->start, self->end, self->name); } -static void dso__set_long_name(struct dso *self, char *name) +void dso__set_long_name(struct dso *self, char *name) { if (name == NULL) return; @@ -176,7 +176,7 @@ static void dso__set_basename(struct dso *self) struct dso *dso__new(const char *name) { - struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); + struct dso *self = zalloc(sizeof(*self) + strlen(name) + 1); if (self != NULL) { int i; @@ -500,13 +500,17 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, *module++ = '\0'; - if (strcmp(self->name, module)) { + if (strcmp(curr_map->dso->short_name, module)) { curr_map = map_groups__find_by_name(&session->kmaps, map->type, module); if (curr_map == NULL) { pr_debug("/proc/{kallsyms,modules} " - "inconsistency!\n"); + "inconsistency while looking " + "for \"%s\" module!\n", module); return -1; } + + if (curr_map->dso->loaded) + goto discard_symbol; } /* * So that we look just like we get from .ko files, @@ -1343,13 +1347,33 @@ struct map *map_groups__find_by_name(struct map_groups *self, for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) { struct map *map = rb_entry(nd, struct map, rb_node); - if (map->dso && strcmp(map->dso->name, name) == 0) + if (map->dso && strcmp(map->dso->short_name, name) == 0) return map; } return NULL; } +static int dso__kernel_module_get_build_id(struct dso *self) +{ + char filename[PATH_MAX]; + /* + * kernel module short names are of the form "[module]" and + * we need just "module" here. + */ + const char *name = self->short_name + 1; + + snprintf(filename, sizeof(filename), + "/sys/module/%.*s/notes/.note.gnu.build-id", + (int)strlen(name - 1), name); + + if (sysfs__read_build_id(filename, self->build_id, + sizeof(self->build_id)) == 0) + self->has_build_id = true; + + return 0; +} + static int perf_session__set_modules_path_dir(struct perf_session *self, char *dirname) { struct dirent *dent; @@ -1395,6 +1419,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d if (long_name == NULL) goto failure; dso__set_long_name(map->dso, long_name); + dso__kernel_module_get_build_id(map->dso); } } @@ -1437,6 +1462,24 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return self; } +struct map *perf_session__new_module_map(struct perf_session *self, u64 start, + const char *filename) +{ + struct map *map; + struct dso *dso = __dsos__findnew(&dsos__kernel, filename); + + if (dso == NULL) + return NULL; + + map = map__new2(start, dso, MAP__FUNCTION); + if (map == NULL) + return NULL; + + dso->origin = DSO__ORIG_KMODULE; + map_groups__insert(&self->kmaps, map); + return map; +} + static int perf_session__create_module_maps(struct perf_session *self) { char *line = NULL; @@ -1450,7 +1493,6 @@ static int perf_session__create_module_maps(struct perf_session *self) while (!feof(file)) { char name[PATH_MAX]; u64 start; - struct dso *dso; char *sep; int line_len; @@ -1476,26 +1518,10 @@ static int perf_session__create_module_maps(struct perf_session *self) *sep = '\0'; snprintf(name, sizeof(name), "[%s]", line); - dso = dso__new(name); - - if (dso == NULL) + map = perf_session__new_module_map(self, start, name); + if (map == NULL) goto out_delete_line; - - map = map__new2(start, dso, MAP__FUNCTION); - if (map == NULL) { - dso__delete(dso); - goto out_delete_line; - } - - snprintf(name, sizeof(name), - "/sys/module/%s/notes/.note.gnu.build-id", line); - if (sysfs__read_build_id(name, dso->build_id, - sizeof(dso->build_id)) == 0) - dso->has_build_id = true; - - dso->origin = DSO__ORIG_KMODULE; - map_groups__insert(&self->kmaps, map); - dsos__add(&dsos__kernel, dso); + dso__kernel_module_get_build_id(map->dso); } free(line); @@ -1573,10 +1599,28 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, } } + /* + * Say the kernel DSO was created when processing the build-id header table, + * we have a build-id, so check if it is the same as the running kernel, + * using it if it is. + */ + if (self->has_build_id) { + u8 kallsyms_build_id[BUILD_ID_SIZE]; + + if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, + sizeof(kallsyms_build_id)) == 0) + + is_kallsyms = dso__build_id_equal(self, kallsyms_build_id); + if (is_kallsyms) + goto do_kallsyms; + goto do_vmlinux; + } + is_kallsyms = self->long_name[0] == '['; if (is_kallsyms) goto do_kallsyms; +do_vmlinux: err = dso__load_vmlinux(self, map, session, self->long_name, filter); if (err <= 0) { pr_info("The file %s cannot be used, " @@ -1694,16 +1738,12 @@ out_delete_kernel_dso: return NULL; } -static int map_groups__create_kernel_maps(struct map_groups *self, - struct map *vmlinux_maps[MAP__NR_TYPES], - const char *vmlinux) +int __map_groups__create_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES], + struct dso *kernel) { - struct dso *kernel = dsos__create_kernel(vmlinux); enum map_type type; - if (kernel == NULL) - return -1; - for (type = 0; type < MAP__NR_TYPES; ++type) { vmlinux_maps[type] = map__new2(0, kernel, type); if (vmlinux_maps[type] == NULL) @@ -1717,6 +1757,18 @@ static int map_groups__create_kernel_maps(struct map_groups *self, return 0; } +static int map_groups__create_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES], + const char *vmlinux) +{ + struct dso *kernel = dsos__create_kernel(vmlinux); + + if (kernel == NULL) + return -1; + + return __map_groups__create_kernel_maps(self, vmlinux_maps, kernel); +} + static void vmlinux_path__exit(void) { while (--vmlinux_path__nr_entries >= 0) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ee0b4593db7b..594156e43b10 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -134,6 +134,7 @@ size_t dsos__fprintf_buildid(FILE *fp); size_t dso__fprintf_buildid(struct dso *self, FILE *fp); size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); char dso__symtab_origin(const struct dso *self); +void dso__set_long_name(struct dso *self, char *name); void dso__set_build_id(struct dso *self, void *build_id); struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type, @@ -151,5 +152,7 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type); int perf_session__create_kernel_maps(struct perf_session *self); +struct map *perf_session__new_module_map(struct perf_session *self, u64 start, + const char *filename); extern struct dso *vdso; #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index c206f72c8881..c06c13535a70 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -67,4 +67,8 @@ map_groups__find_function(struct map_groups *self, struct perf_session *session, struct map *map_groups__find_by_name(struct map_groups *self, enum map_type type, const char *name); + +int __map_groups__create_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES], + struct dso *kernel); #endif /* __PERF_THREAD_H */ From 0895cf0a823e03ea6d79736611e90186006c805e Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 13 Jan 2010 13:22:18 -0200 Subject: [PATCH 058/640] perf: Fix few typos + cosmetics Signed-off-by: Kirill Smelkov Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1263396139-4798-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf.txt | 2 +- tools/perf/design.txt | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 69c832557199..0eeb247dc7d2 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -12,7 +12,7 @@ SYNOPSIS DESCRIPTION ----------- -Performance counters for Linux are are a new kernel-based subsystem +Performance counters for Linux are a new kernel-based subsystem that provide a framework for all things performance analysis. It covers hardware level (CPU/PMU, Performance Monitoring Unit) features and software features (software counters, tracepoints) as well. diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 8d0de5130db3..bd0bb1b1279b 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -101,10 +101,10 @@ enum hw_event_ids { */ PERF_COUNT_HW_CPU_CYCLES = 0, PERF_COUNT_HW_INSTRUCTIONS = 1, - PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_REFERENCES = 2, PERF_COUNT_HW_CACHE_MISSES = 3, PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, }; @@ -131,8 +131,8 @@ software events, selected by 'event_id': */ enum sw_event_ids { PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, PERF_COUNT_SW_CONTEXT_SWITCHES = 3, PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, From 66aeb6d5cb701aedd508187e08612bfd1e108e2e Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 13 Jan 2010 13:22:19 -0200 Subject: [PATCH 059/640] perf top: Fix code typo in prompt_symbol() sym_filter is what was (if ever) passed with -s option. What was typed by user, and what we were looking for, is in buf. Signed-off-by: Kirill Smelkov Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1263396139-4798-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6822b44ca4f9..7a8a77ec2c9d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -667,7 +667,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) } if (!found) { - fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); + fprintf(stderr, "Sorry, %s is not active.\n", buf); sleep(1); return; } else From 0d755034dbd01e240eadf2d31f4f75d3088ccd21 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 12:23:09 -0200 Subject: [PATCH 060/640] perf tools: Don't cast RIP to pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since they can come from another architecture with bigger pointers, i.e. processing a 64-bit perf.data on a 32-bit arch. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263478990-8200-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-diff.c | 4 ++-- tools/perf/builtin-kmem.c | 7 ++----- tools/perf/builtin-report.c | 7 ++----- tools/perf/builtin-sched.c | 7 ++----- tools/perf/builtin-trace.c | 7 ++----- tools/perf/util/event.c | 9 +++------ tools/perf/util/session.c | 16 ++++++---------- 8 files changed, 21 insertions(+), 40 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 117bbae844bf..73c202ee0882 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -132,8 +132,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) { struct addr_location al; - dump_printf("(IP, %d): %d: %p\n", event->header.misc, - event->ip.pid, (void *)(long)event->ip.ip); + dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, + event->ip.pid, event->ip.ip); if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 924bfb77a6ab..18b3f505f9db 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -42,8 +42,8 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi struct addr_location al; struct sample_data data = { .period = 1, }; - dump_printf("(IP, %d): %d: %p\n", event->header.misc, - event->ip.pid, (void *)(long)event->ip.ip); + dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, + event->ip.pid, event->ip.ip); if (event__preprocess_sample(event, session, &al, NULL) < 0) { pr_warning("problem processing %d event, skipping it.\n", diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 4af7199c5af7..7323d9dfbce8 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -316,11 +316,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) event__parse_sample(event, session->sample_type, &data); - dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", - event->header.misc, - data.pid, data.tid, - (void *)(long)data.ip, - (long long)data.period); + dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, + data.pid, data.tid, data.ip, data.period); thread = perf_session__findnew(session, event->ip.pid); if (thread == NULL) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 80d691a4191f..4c3d6997995b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -93,11 +93,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) event__parse_sample(event, session->sample_type, &data); - dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", - event->header.misc, - data.pid, data.tid, - (void *)(long)data.ip, - (long long)data.period); + dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, + data.pid, data.tid, data.ip, data.period); if (session->sample_type & PERF_SAMPLE_CALLCHAIN) { unsigned int i; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 702322f8fec1..4f5a03e43444 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1621,11 +1621,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) event__parse_sample(event, session->sample_type, &data); - dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", - event->header.misc, - data.pid, data.tid, - (void *)(long)data.ip, - (long long)data.period); + dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, + data.pid, data.tid, data.ip, data.period); thread = perf_session__findnew(session, data.pid); if (thread == NULL) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1831434aa938..8e9cbfe608d6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -75,11 +75,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) event__parse_sample(event, session->sample_type, &data); - dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", - event->header.misc, - data.pid, data.tid, - (void *)(long)data.ip, - (long long)data.period); + dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, + data.pid, data.tid, data.ip, data.period); thread = perf_session__findnew(session, event->ip.pid); if (thread == NULL) { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4f3e7ef33b83..24ec5be4a1c0 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -313,12 +313,9 @@ int event__process_mmap(event_t *self, struct perf_session *session) struct thread *thread; struct map *map; - dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n", - self->mmap.pid, self->mmap.tid, - (void *)(long)self->mmap.start, - (void *)(long)self->mmap.len, - (void *)(long)self->mmap.pgoff, - self->mmap.filename); + dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n", + self->mmap.pid, self->mmap.tid, self->mmap.start, + self->mmap.len, self->mmap.pgoff, self->mmap.filename); if (self->mmap.pid == 0) { static const char kmmap_prefix[] = "[kernel.kallsyms."; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fd1c5a39a5bb..e3ccdb46d6c4 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -209,9 +209,8 @@ static int perf_session__process_event(struct perf_session *self, trace_event(event); if (event->header.type < PERF_RECORD_MAX) { - dump_printf("%p [%p]: PERF_RECORD_%s", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("%#lx [%#x]: PERF_RECORD_%s", + offset + head, event->header.size, event__name[event->header.type]); ++event__total[0]; ++event__total[event->header.type]; @@ -362,16 +361,13 @@ more: size = event->header.size; - dump_printf("\n%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); + dump_printf("\n%#lx [%#x]: event: %d\n", + offset + head, event->header.size, event->header.type); if (size == 0 || perf_session__process_event(self, event, ops, offset, head) < 0) { - dump_printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("%#lx [%#x]: skipping unknown header type: %d\n", + offset + head, event->header.size, event->header.type); /* * assume we lost track of the stream, check alignment, and From ba21594cddee0a3af582971656702b1c4509d8f5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 12:23:10 -0200 Subject: [PATCH 061/640] perf tools: Cross platform perf.data analysis support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are still some problems related to loading vmlinux files, but those are unrelated to the feature implemented in this patch, so will get fixed in the next patches, but here are some results: 1. collect perf.data file on a Fedora 12 machine, x86_64, 64-bit userland 2. transfer it to a Debian Testing machine, PARISC64, 32-bit userland acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | head -5 74f9930ee94475b6b3238caf3725a50d59cb994b [kernel.kallsyms] 55fdd56670453ea66c011158c4b9d30179c1d049 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/ipt_MASQUERADE.ko 41adff63c730890480980d5d8ba513f1c216a858 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/iptable_nat.ko 90a33def1077bb8e97b8a78546dc96c2de62df46 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/nf_nat.ko 984c7bea90ce1376d5c8e7ef43a781801286e62d /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/tun.ko acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | tail -5 22492f3753c6a67de5c7ccbd6b863390c92c0723 /usr/lib64/libXt.so.6.0.0 353802bb7e1b895ba43507cc678f951e778e4c6f /usr/lib64/libMagickCore.so.2.0.0 d10c2897558595efe7be8b0584cf7e6398bc776c /usr/lib64/libfprint.so.0.0.0 a83ecfb519a788774a84d5ddde633c9ba56c03ab /home/acme/bin/perf d3ca765a8ecf257d263801d7ad8c49c189082317 /usr/lib64/libdwarf.so.0.0 acme@parisc:~/git/linux-2.6-tip$ acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm The file [kernel.kallsyms] cannot be used, trying to use /proc/kallsyms... ^^^^ The problem related to vmlinux handling, it shouldn't be trying this ^^^^ rather alien /proc/kallsyms at all... /lib64/libpthread-2.10.2.so with build id 5c68f7afeb33309c78037e374b0deee84dd441f6 not found, continuing without symbols /lib64/libc-2.10.2.so with build id eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 not found, continuing without symbols /home/acme/bin/perf with build id a83ecfb519a788774a84d5ddde633c9ba56c03ab not found, continuing without symbols /usr/sbin/openvpn with build id f2037a091ef36b591187a858d75e203690ea9409 not found, continuing without symbols Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko, continuing without symbols Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwlcore.ko, continuing without symbols # Samples: 293085637 # # Overhead Command # ........ ............... # 61.70% find 23.50% perf 5.86% swapper 3.12% sshd 2.39% init 0.87% bash 0.86% sleep 0.59% dbus-daemon 0.25% hald 0.24% NetworkManager 0.19% hald-addon-rfki 0.15% openvpn 0.07% phy0 0.07% events/0 0.05% iwl3945 0.05% events/1 0.03% kondemand/0 acme@parisc:~/git/linux-2.6-tip$ Which matches what we get when running the same command for the same perf.data file on the F12, x86_64, source machine: [root@doppio linux-2.6-tip]# perf report --sort comm # Samples: 293085637 # # Overhead Command # ........ ............... # 61.70% find 23.50% perf 5.86% swapper 3.12% sshd 2.39% init 0.87% bash 0.86% sleep 0.59% dbus-daemon 0.25% hald 0.24% NetworkManager 0.19% hald-addon-rfki 0.15% openvpn 0.07% phy0 0.07% events/0 0.05% iwl3945 0.05% events/1 0.03% kondemand/0 [root@doppio linux-2.6-tip]# The other modes work as well, modulo the problem with vmlinux: acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso 2> /dev/null | head -15 # Samples: 293085637 # # Overhead Command Shared Object # ........ ............... ................................. # 35.11% find ffffffff81002b5a 18.25% perf ffffffff8102235f 16.17% find libc-2.10.2.so 9.07% find find 5.80% swapper ffffffff8102235f 3.95% perf libc-2.10.2.so 2.33% init ffffffff810091b9 1.65% sshd libcrypto.so.0.9.8k 1.35% find [e1000e] 0.68% sleep libc-2.10.2.so acme@parisc:~/git/linux-2.6-tip$ And the lack of the right buildids: acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso,symbol 2> /dev/null | head -15 # Samples: 293085637 # # Overhead Command Shared Object Symbol # ........ ............... ................................. ...... # 35.11% find ffffffff81002b5a [k] 0xffffffff81002b5a 18.25% perf ffffffff8102235f [k] 0xffffffff8102235f 16.17% find libc-2.10.2.so [.] 0x00000000045782 9.07% find find [.] 0x0000000000fb0e 5.80% swapper ffffffff8102235f [k] 0xffffffff8102235f 3.95% perf libc-2.10.2.so [.] 0x0000000007f398 2.33% init ffffffff810091b9 [k] 0xffffffff810091b9 1.65% sshd libcrypto.so.0.9.8k [.] 0x00000000105440 1.35% find [e1000e] [k] 0x00000000010948 0.68% sleep libc-2.10.2.so [.] 0x0000000011ad5b acme@parisc:~/git/linux-2.6-tip$ But if we: acme@parisc:~/git/linux-2.6-tip$ ls ~/.debug ls: cannot access /home/acme/.debug: No such file or directory acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/lib64/libc-2.10.2.so/ acme@parisc:~/git/linux-2.6-tip$ scp doppio:.debug/lib64/libc-2.10.2.so/* ~/.debug/lib64/libc-2.10.2.so/ acme@doppio's password: eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 100% 1783KB 714.7KB/s 00:02 acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/.build-id/eb acme@parisc:~/git/linux-2.6-tip$ ln -s ../../lib64/libc-2.10.2.so/eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 ~/.debug/.build-id/eb/4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 acme@parisc:~/git/linux-2.6-tip$ perf report --dsos libc-2.10.2.so 2> /dev/null # dso: libc-2.10.2.so # Samples: 64281170 # # Overhead Command Symbol # ........ ............... ...... # 14.98% perf [.] __GI_strcmp 12.30% find [.] __GI_memmove 9.25% find [.] _int_malloc 7.60% find [.] _IO_vfprintf_internal 6.10% find [.] _IO_new_file_xsputn 6.02% find [.] __GI_close 3.08% find [.] _IO_file_overflow_internal 3.08% find [.] malloc_consolidate 3.08% find [.] _int_free 3.08% find [.] __strchrnul 3.08% find [.] __getdents64 3.08% find [.] __write_nocancel 3.08% sleep [.] __GI__dl_addr 3.08% sshd [.] __libc_select 3.08% find [.] _IO_new_file_write 3.07% find [.] _IO_new_do_write 3.06% find [.] __GI___errno_location 3.05% find [.] __GI___libc_malloc 3.04% perf [.] __GI_memcpy 1.71% find [.] __fprintf_chk 1.29% bash [.] __gconv_transform_utf8_internal 0.79% dbus-daemon [.] __GI_strlen # # (For a higher level overview, try: perf report --sort comm,dso) # acme@parisc:~/git/linux-2.6-tip$ Which matches what we get on the source, F12, x86_64 machine: [root@doppio linux-2.6-tip]# perf report --dsos libc-2.10.2.so # dso: libc-2.10.2.so # Samples: 64281170 # # Overhead Command Symbol # ........ ............... ...... # 14.98% perf [.] __GI_strcmp 12.30% find [.] __GI_memmove 9.25% find [.] _int_malloc 7.60% find [.] _IO_vfprintf_internal 6.10% find [.] _IO_new_file_xsputn 6.02% find [.] __GI_close 3.08% find [.] _IO_file_overflow_internal 3.08% find [.] malloc_consolidate 3.08% find [.] _int_free 3.08% find [.] __strchrnul 3.08% find [.] __getdents64 3.08% find [.] __write_nocancel 3.08% sleep [.] __GI__dl_addr 3.08% sshd [.] __libc_select 3.08% find [.] _IO_new_file_write 3.07% find [.] _IO_new_do_write 3.06% find [.] __GI___errno_location 3.05% find [.] __GI___libc_malloc 3.04% perf [.] __GI_memcpy 1.71% find [.] __fprintf_chk 1.29% bash [.] __gconv_transform_utf8_internal 0.79% dbus-daemon [.] __GI_strlen # # (For a higher level overview, try: perf report --sort comm,dso) # [root@doppio linux-2.6-tip]# So I think this is really, really nice in that it demonstrates the portability of perf.data files and the use of build-ids accross such aliens worlds :-) There are some things to fix tho, like the bitmap on the header, but things are looking good. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263478990-8200-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 63 +++++++++++++++++----- tools/perf/util/header.h | 2 + tools/perf/util/session.c | 108 ++++++++++++++++++++++++++++++++++---- tools/perf/util/session.h | 7 ++- 4 files changed, 157 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ec96321eb9e4..b31e0ae4b8db 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,8 +1,10 @@ #include +#include #include #include #include #include +#include #include "util.h" #include "header.h" @@ -464,8 +466,21 @@ static int do_read(int fd, void *buf, size_t size) return 0; } +static int perf_header__getbuffer64(struct perf_header *self, + int fd, void *buf, size_t size) +{ + if (do_read(fd, buf, size)) + return -1; + + if (self->needs_swap) + mem_bswap_64(buf, size); + + return 0; +} + int perf_header__process_sections(struct perf_header *self, int fd, int (*process)(struct perf_file_section *self, + struct perf_header *ph, int feat, int fd)) { struct perf_file_section *feat_sec; @@ -486,7 +501,7 @@ int perf_header__process_sections(struct perf_header *self, int fd, lseek(fd, self->data_offset + self->data_size, SEEK_SET); - if (do_read(fd, feat_sec, sec_size)) + if (perf_header__getbuffer64(self, fd, feat_sec, sec_size)) goto out_free; err = 0; @@ -494,7 +509,7 @@ int perf_header__process_sections(struct perf_header *self, int fd, if (perf_header__has_feat(self, feat)) { struct perf_file_section *sec = &feat_sec[idx++]; - err = process(sec, feat, fd); + err = process(sec, self, feat, fd); if (err < 0) break; } @@ -511,10 +526,20 @@ int perf_file_header__read(struct perf_file_header *self, lseek(fd, 0, SEEK_SET); if (do_read(fd, self, sizeof(*self)) || - self->magic != PERF_MAGIC || - self->attr_size != sizeof(struct perf_file_attr)) + memcmp(&self->magic, __perf_magic, sizeof(self->magic))) return -1; + if (self->attr_size != sizeof(struct perf_file_attr)) { + u64 attr_size = bswap_64(self->attr_size); + + if (attr_size != sizeof(struct perf_file_attr)) + return -1; + + mem_bswap_64(self, offsetof(struct perf_file_header, + adds_features)); + ph->needs_swap = true; + } + if (self->size != sizeof(*self)) { /* Support the previous format */ if (self->size == offsetof(typeof(*self), adds_features)) @@ -524,16 +549,28 @@ int perf_file_header__read(struct perf_file_header *self, } memcpy(&ph->adds_features, &self->adds_features, - sizeof(self->adds_features)); + sizeof(ph->adds_features)); + /* + * FIXME: hack that assumes that if we need swap the perf.data file + * may be coming from an arch with a different word-size, ergo different + * DEFINE_BITMAP format, investigate more later, but for now its mostly + * safe to assume that we have a build-id section. Trace files probably + * have several other issues in this realm anyway... + */ + if (ph->needs_swap) { + memset(&ph->adds_features, 0, sizeof(ph->adds_features)); + perf_header__set_feat(ph, HEADER_BUILD_ID); + } ph->event_offset = self->event_types.offset; - ph->event_size = self->event_types.size; - ph->data_offset = self->data.offset; + ph->event_size = self->event_types.size; + ph->data_offset = self->data.offset; ph->data_size = self->data.size; return 0; } static int perf_file_section__process(struct perf_file_section *self, + struct perf_header *ph, int feat, int fd) { if (lseek(fd, self->offset, SEEK_SET) < 0) { @@ -548,7 +585,7 @@ static int perf_file_section__process(struct perf_file_section *self, break; case HEADER_BUILD_ID: - if (perf_header__read_build_ids(fd, self->offset, self->size)) + if (perf_header__read_build_ids(ph, fd, self->offset, self->size)) pr_debug("Failed to read buildids, continuing...\n"); break; default: @@ -560,7 +597,7 @@ static int perf_file_section__process(struct perf_file_section *self, int perf_header__read(struct perf_header *self, int fd) { - struct perf_file_header f_header; + struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; int nr_attrs, nr_ids, i, j; @@ -577,8 +614,9 @@ int perf_header__read(struct perf_header *self, int fd) struct perf_header_attr *attr; off_t tmp; - if (do_read(fd, &f_attr, sizeof(f_attr))) + if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr))) goto out_errno; + tmp = lseek(fd, 0, SEEK_CUR); attr = perf_header_attr__new(&f_attr.attr); @@ -589,7 +627,7 @@ int perf_header__read(struct perf_header *self, int fd) lseek(fd, f_attr.ids.offset, SEEK_SET); for (j = 0; j < nr_ids; j++) { - if (do_read(fd, &f_id, sizeof(f_id))) + if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id))) goto out_errno; if (perf_header_attr__add_id(attr, f_id) < 0) { @@ -610,7 +648,8 @@ int perf_header__read(struct perf_header *self, int fd) events = malloc(f_header.event_types.size); if (events == NULL) return -ENOMEM; - if (do_read(fd, events, f_header.event_types.size)) + if (perf_header__getbuffer64(self, fd, events, + f_header.event_types.size)) goto out_errno; event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2b69aab67e35..ccc8540feccd 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -52,6 +52,7 @@ struct perf_header { u64 data_size; u64 event_offset; u64 event_size; + bool needs_swap; DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; @@ -80,6 +81,7 @@ bool perf_header__has_feat(const struct perf_header *self, int feat); int perf_header__process_sections(struct perf_header *self, int fd, int (*process)(struct perf_file_section *self, + struct perf_header *ph, int feat, int fd)); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e3ccdb46d6c4..604e14f6a6f9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,5 +1,6 @@ #include +#include #include #include @@ -201,21 +202,88 @@ void event__print_totals(void) event__name[i], event__total[i]); } +void mem_bswap_64(void *src, int byte_size) +{ + u64 *m = src; + + while (byte_size > 0) { + *m = bswap_64(*m); + byte_size -= sizeof(u64); + ++m; + } +} + +static void event__all64_swap(event_t *self) +{ + struct perf_event_header *hdr = &self->header; + mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr)); +} + +static void event__comm_swap(event_t *self) +{ + self->comm.pid = bswap_32(self->comm.pid); + self->comm.tid = bswap_32(self->comm.tid); +} + +static void event__mmap_swap(event_t *self) +{ + self->mmap.pid = bswap_32(self->mmap.pid); + self->mmap.tid = bswap_32(self->mmap.tid); + self->mmap.start = bswap_64(self->mmap.start); + self->mmap.len = bswap_64(self->mmap.len); + self->mmap.pgoff = bswap_64(self->mmap.pgoff); +} + +static void event__task_swap(event_t *self) +{ + self->fork.pid = bswap_32(self->fork.pid); + self->fork.tid = bswap_32(self->fork.tid); + self->fork.ppid = bswap_32(self->fork.ppid); + self->fork.ptid = bswap_32(self->fork.ptid); + self->fork.time = bswap_64(self->fork.time); +} + +static void event__read_swap(event_t *self) +{ + self->read.pid = bswap_32(self->read.pid); + self->read.tid = bswap_32(self->read.tid); + self->read.value = bswap_64(self->read.value); + self->read.time_enabled = bswap_64(self->read.time_enabled); + self->read.time_running = bswap_64(self->read.time_running); + self->read.id = bswap_64(self->read.id); +} + +typedef void (*event__swap_op)(event_t *self); + +static event__swap_op event__swap_ops[] = { + [PERF_RECORD_MMAP] = event__mmap_swap, + [PERF_RECORD_COMM] = event__comm_swap, + [PERF_RECORD_FORK] = event__task_swap, + [PERF_RECORD_EXIT] = event__task_swap, + [PERF_RECORD_LOST] = event__all64_swap, + [PERF_RECORD_READ] = event__read_swap, + [PERF_RECORD_SAMPLE] = event__all64_swap, + [PERF_RECORD_MAX] = NULL, +}; + static int perf_session__process_event(struct perf_session *self, event_t *event, struct perf_event_ops *ops, - unsigned long offset, unsigned long head) + u64 offset, u64 head) { trace_event(event); if (event->header.type < PERF_RECORD_MAX) { - dump_printf("%#lx [%#x]: PERF_RECORD_%s", + dump_printf("%#Lx [%#x]: PERF_RECORD_%s", offset + head, event->header.size, event__name[event->header.type]); ++event__total[0]; ++event__total[event->header.type]; } + if (self->header.needs_swap && event__swap_ops[event->header.type]) + event__swap_ops[event->header.type](event); + switch (event->header.type) { case PERF_RECORD_SAMPLE: return ops->sample(event, self); @@ -241,7 +309,15 @@ static int perf_session__process_event(struct perf_session *self, } } -int perf_header__read_build_ids(int input, u64 offset, u64 size) +void perf_event_header__bswap(struct perf_event_header *self) +{ + self->type = bswap_32(self->type); + self->misc = bswap_16(self->misc); + self->size = bswap_16(self->size); +} + +int perf_header__read_build_ids(struct perf_header *self, + int input, u64 offset, u64 size) { struct build_id_event bev; char filename[PATH_MAX]; @@ -256,6 +332,9 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size) if (read(input, &bev, sizeof(bev)) != sizeof(bev)) goto out; + if (self->needs_swap) + perf_event_header__bswap(&bev.header); + len = bev.header.size - sizeof(bev); if (read(input, filename, len) != len) goto out; @@ -292,9 +371,9 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se int perf_session__process_events(struct perf_session *self, struct perf_event_ops *ops) { - int err; - unsigned long head, shift; - unsigned long offset = 0; + int err, mmap_prot, mmap_flags; + u64 head, shift; + u64 offset = 0; size_t page_size; event_t *event; uint32_t size; @@ -330,9 +409,16 @@ out_getcwd_err: offset += shift; head -= shift; + mmap_prot = PROT_READ; + mmap_flags = MAP_SHARED; + + if (self->header.needs_swap) { + mmap_prot |= PROT_WRITE; + mmap_flags = MAP_PRIVATE; + } remap: - buf = mmap(NULL, page_size * self->mmap_window, PROT_READ, - MAP_SHARED, self->fd, offset); + buf = mmap(NULL, page_size * self->mmap_window, mmap_prot, + mmap_flags, self->fd, offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); err = -errno; @@ -342,6 +428,8 @@ remap: more: event = (event_t *)(buf + head); + if (self->header.needs_swap) + perf_event_header__bswap(&event->header); size = event->header.size; if (size == 0) size = 8; @@ -361,12 +449,12 @@ more: size = event->header.size; - dump_printf("\n%#lx [%#x]: event: %d\n", + dump_printf("\n%#Lx [%#x]: event: %d\n", offset + head, event->header.size, event->header.type); if (size == 0 || perf_session__process_event(self, event, ops, offset, head) < 0) { - dump_printf("%#lx [%#x]: skipping unknown header type: %d\n", + dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", offset + head, event->header.size, event->header.type); /* diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index d4a9d20f8d44..36d1a80c0b6c 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -51,6 +51,8 @@ struct perf_event_ops { struct perf_session *perf_session__new(const char *filename, int mode, bool force); void perf_session__delete(struct perf_session *self); +void perf_event_header__bswap(struct perf_event_header *self); + int perf_session__process_events(struct perf_session *self, struct perf_event_ops *event_ops); @@ -61,7 +63,8 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self, bool perf_session__has_traces(struct perf_session *self, const char *msg); -int perf_header__read_build_ids(int input, u64 offset, u64 file_size); +int perf_header__read_build_ids(struct perf_header *self, int input, + u64 offset, u64 file_size); int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, const char *symbol_name, @@ -69,4 +72,6 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, void perf_session__reloc_vmlinux_maps(struct perf_session *self, u64 unrelocated_addr); +void mem_bswap_64(void *src, int byte_size); + #endif /* __PERF_SESSION_H */ From 1b75962e92d48a41019d4b440e221638aa2a7238 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 18:30:04 -0200 Subject: [PATCH 062/640] perf tools: Convert getpagesize() uses to sysconf(_SC_GETPAGESIZE) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the more portable and equivalent sysconf call. Reported-by: Aristeu Rozanski Reported-by: Ulrich Drepper Signed-off-by: Arnaldo Carvalho de Melo Cc: Aristeu Rozanski Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ulrich Drepper LKML-Reference: <1263501006-14185-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/session.c | 2 +- tools/perf/util/trace-event-info.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 604e14f6a6f9..1951e330377c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -384,7 +384,7 @@ int perf_session__process_events(struct perf_session *self, perf_event_ops__fill_defaults(ops); - page_size = getpagesize(); + page_size = sysconf(_SC_PAGESIZE); head = self->header.data_offset; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 407fd65b6cdb..5ea8973ad331 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -515,7 +515,7 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) write_or_die(buf, 1); /* save page_size */ - page_size = getpagesize(); + page_size = sysconf(_SC_PAGESIZE); write_or_die(&page_size, 4); read_header_files(); From 8d0591f6ad9edf66697ce29de176fb6f3213b9e3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 18:30:05 -0200 Subject: [PATCH 063/640] perf symbols: Don't try to load kallsyms if doesn't match the record build-id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now a perf.data file collected on a x86_64 fedora 12 machine gets properly parsed on a Debian testing PARISC64 machine with 32-bit userland: acme@parisc:~/git/linux-2.6-tip$ perf report 2> /dev/null | head -15 # Samples: 293085637 # # Overhead Command Shared Object Symbol # ........ ............... ................................. ...... # 35.11% find [kernel.kallsyms] [k] 0xffffffff81002b5a 18.25% perf [kernel.kallsyms] [k] 0xffffffff8102235f 9.07% find find [.] 0x0000000000fb0e 5.80% swapper [kernel.kallsyms] [k] 0xffffffff8102235f 3.29% perf libc-2.10.2.so [.] __GI_strcmp 2.70% find libc-2.10.2.so [.] __GI_memmove 2.33% init [kernel.kallsyms] [k] 0xffffffff810091b9 2.03% find libc-2.10.2.so [.] _int_malloc 1.67% find libc-2.10.2.so [.] _IO_vfprintf_internal 1.65% sshd libcrypto.so.0.9.8k [.] 0x00000000105440 acme@parisc:~/git/linux-2.6-tip$ Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263501006-14185-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 381999dd5c1f..71d23e1e30e8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1608,11 +1608,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, u8 kallsyms_build_id[BUILD_ID_SIZE]; if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, - sizeof(kallsyms_build_id)) == 0) - - is_kallsyms = dso__build_id_equal(self, kallsyms_build_id); - if (is_kallsyms) - goto do_kallsyms; + sizeof(kallsyms_build_id)) == 0) { + is_kallsyms = dso__build_id_equal(self, kallsyms_build_id); + if (is_kallsyms) + goto do_kallsyms; + } goto do_vmlinux; } @@ -1623,6 +1623,9 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, do_vmlinux: err = dso__load_vmlinux(self, map, session, self->long_name, filter); if (err <= 0) { + if (self->has_build_id) + return -1; + pr_info("The file %s cannot be used, " "trying to use /proc/kallsyms...", self->long_name); do_kallsyms: From 9e201442de7c954f03710ac76f28c1927d07550c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 18:30:06 -0200 Subject: [PATCH 064/640] perf symbols: Cache /proc/kallsyms files by build-id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that when we don't have a vmlinux handy we can store the kallsyms for later use by 'perf report'. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263501006-14185-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.c | 2 +- tools/perf/util/header.c | 15 +++++++++---- tools/perf/util/symbol.c | 48 +++++++++++++++++++++++++++++----------- tools/perf/util/symbol.h | 5 +++-- tools/perf/util/util.c | 30 +++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 24ec5be4a1c0..0e9820ac4f5e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -245,7 +245,7 @@ int event__synthesize_kernel_mmap(event__handler_t process, */ struct process_symbol_args args = { .name = symbol_name, }; - if (kallsyms__parse(&args, find_symbol_cb) <= 0) + if (kallsyms__parse("/proc/kallsyms", &args, find_symbol_cb) <= 0) return -ENOENT; size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b31e0ae4b8db..1b65fed0dd2d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -237,11 +237,13 @@ static int dso__cache_build_id(struct dso *self, const char *debugdir) char *filename = malloc(size), *linkname = malloc(size), *targetname, *sbuild_id; int len, err = -1; + bool is_kallsyms = self->kernel && self->long_name[0] != '/'; if (filename == NULL || linkname == NULL) goto out_free; - len = snprintf(filename, size, "%s%s", debugdir, self->long_name); + len = snprintf(filename, size, "%s%s%s", + debugdir, is_kallsyms ? "/" : "", self->long_name); if (mkdir_p(filename, 0755)) goto out_free; @@ -249,9 +251,14 @@ static int dso__cache_build_id(struct dso *self, const char *debugdir) sbuild_id = filename + len; build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id); - if (access(filename, F_OK) && link(self->long_name, filename) && - copyfile(self->long_name, filename)) - goto out_free; + if (access(filename, F_OK)) { + if (is_kallsyms) { + if (copyfile("/proc/kallsyms", filename)) + goto out_free; + } else if (link(self->long_name, filename) && + copyfile(self->long_name, filename)) + goto out_free; + } len = snprintf(linkname, size, "%s/.build-id/%.2s", debugdir, sbuild_id); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 71d23e1e30e8..ae61e9f4d6eb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -383,13 +383,14 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) return ret; } -int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name, +int kallsyms__parse(const char *filename, void *arg, + int (*process_symbol)(void *arg, const char *name, char type, u64 start)) { char *line = NULL; size_t n; int err = 0; - FILE *file = fopen("/proc/kallsyms", "r"); + FILE *file = fopen(filename, "r"); if (file == NULL) goto out_failure; @@ -466,10 +467,11 @@ static int map__process_kallsym_symbol(void *arg, const char *name, * so that we can in the next step set the symbol ->end address and then * call kernel_maps__split_kallsyms. */ -static int dso__load_all_kallsyms(struct dso *self, struct map *map) +static int dso__load_all_kallsyms(struct dso *self, const char *filename, + struct map *map) { struct process_kallsyms_args args = { .map = map, .dso = self, }; - return kallsyms__parse(&args, map__process_kallsym_symbol); + return kallsyms__parse(filename, &args, map__process_kallsym_symbol); } /* @@ -556,10 +558,10 @@ discard_symbol: rb_erase(&pos->rb_node, root); } -static int dso__load_kallsyms(struct dso *self, struct map *map, +static int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map, struct perf_session *session, symbol_filter_t filter) { - if (dso__load_all_kallsyms(self, map) < 0) + if (dso__load_all_kallsyms(self, filename, map) < 0) return -1; symbols__fixup_end(&self->symbols[map->type]); @@ -1580,7 +1582,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, struct perf_session *session, symbol_filter_t filter) { int err; - bool is_kallsyms; + const char *kallsyms_filename = NULL; + char *kallsyms_allocated_filename = NULL; if (vmlinux_path != NULL) { int i; @@ -1606,19 +1609,37 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, */ if (self->has_build_id) { u8 kallsyms_build_id[BUILD_ID_SIZE]; + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, sizeof(kallsyms_build_id)) == 0) { - is_kallsyms = dso__build_id_equal(self, kallsyms_build_id); - if (is_kallsyms) + if (dso__build_id_equal(self, kallsyms_build_id)) { + kallsyms_filename = "/proc/kallsyms"; goto do_kallsyms; + } } + + build_id__sprintf(self->build_id, sizeof(self->build_id), + sbuild_id); + + if (asprintf(&kallsyms_allocated_filename, + "%s/.debug/[kernel.kallsyms]/%s", + getenv("HOME"), sbuild_id) != -1) { + if (access(kallsyms_filename, F_OK)) { + kallsyms_filename = kallsyms_allocated_filename; + goto do_kallsyms; + } + free(kallsyms_allocated_filename); + kallsyms_allocated_filename = NULL; + } + goto do_vmlinux; } - is_kallsyms = self->long_name[0] == '['; - if (is_kallsyms) + if (self->long_name[0] == '[') { + kallsyms_filename = "/proc/kallsyms"; goto do_kallsyms; + } do_vmlinux: err = dso__load_vmlinux(self, map, session, self->long_name, filter); @@ -1629,9 +1650,10 @@ do_vmlinux: pr_info("The file %s cannot be used, " "trying to use /proc/kallsyms...", self->long_name); do_kallsyms: - err = dso__load_kallsyms(self, map, session, filter); - if (err > 0 && !is_kallsyms) + err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter); + if (err > 0 && kallsyms_filename == NULL) dso__set_long_name(self, strdup("[kernel.kallsyms]")); + free(kallsyms_allocated_filename); } if (err > 0) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 594156e43b10..36b7c717f5ee 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -144,8 +144,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); -int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start)); +int kallsyms__parse(const char *filename, void *arg, + int (*process_symbol)(void *arg, const char *name, + char type, u64 start)); int symbol__init(void); bool symbol_type__is_a(char symbol_type, enum map_type map_type); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index f3c0798a5e78..f0685849b244 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -32,6 +32,33 @@ int mkdir_p(char *path, mode_t mode) return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; } +static int slow_copyfile(const char *from, const char *to) +{ + int err = 0; + char *line = NULL; + size_t n; + FILE *from_fp = fopen(from, "r"), *to_fp; + + if (from_fp == NULL) + goto out; + + to_fp = fopen(to, "w"); + if (to_fp == NULL) + goto out_fclose_from; + + while (getline(&line, &n, from_fp) > 0) + if (fputs(line, to_fp) == EOF) + goto out_fclose_to; + err = 0; +out_fclose_to: + fclose(to_fp); + free(line); +out_fclose_from: + fclose(from_fp); +out: + return err; +} + int copyfile(const char *from, const char *to) { int fromfd, tofd; @@ -42,6 +69,9 @@ int copyfile(const char *from, const char *to) if (stat(from, &st)) goto out; + if (st.st_size == 0) /* /proc? do it slowly... */ + return slow_copyfile(from, to); + fromfd = open(from, O_RDONLY); if (fromfd < 0) goto out; From cf4e5b0838e822dd404638ad00d35b63fffe8191 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 23:45:27 -0200 Subject: [PATCH 065/640] perf symbols: Use dso->long_name in dsos__find() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If not we end up duplicating the module DSOs because first we insert them using the short name found in /proc/modules, then, when processing synthesized MMAP events we add them again. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263519930-22803-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ae61e9f4d6eb..4267138c7bbe 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1679,7 +1679,7 @@ static struct dso *dsos__find(struct list_head *head, const char *name) struct dso *pos; list_for_each_entry(pos, head, node) - if (strcmp(pos->name, name) == 0) + if (strcmp(pos->long_name, name) == 0) return pos; return NULL; } From 18c3daa4961b9fa1f2db0711d93c0acf0c39fd12 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 23:45:28 -0200 Subject: [PATCH 066/640] perf record: Encode the domain while synthesizing MMAP events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past 'perf record' had to process only userspace MMAP events, the ones generated in the kernel, but after we reused the MMAP events to encode the module mapings we ended up adding them first to the list of userspace DSOs (dsos__user) and to the kernel one (dsos__kernel). Fix this by encoding the header.misc field and then using it, like other parts to decide the right DSOs list to insert/find. The gotcha here is that since the kernel puts zero in .misc, which isn't PERF_RECORD_MISC_KERNEL (1 << 1), to differentiate, we put 1 in .misc. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263519930-22803-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 8 ++++++-- tools/perf/util/event.c | 11 +++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c130df2676f1..614fa9a4c67c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -117,8 +117,12 @@ static void write_event(event_t *buf, size_t size) * Add it to the list of DSOs, so that when we finish this * record session we can pick the available build-ids. */ - if (buf->header.type == PERF_RECORD_MMAP) - dsos__findnew(buf->mmap.filename); + if (buf->header.type == PERF_RECORD_MMAP) { + struct list_head *head = &dsos__user; + if (buf->mmap.header.misc == 1) + head = &dsos__kernel; + __dsos__findnew(head, buf->mmap.filename); + } write_output(buf, size); } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0e9820ac4f5e..1abaefc126a8 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -110,7 +110,10 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, while (1) { char bf[BUFSIZ], *pbf = bf; event_t ev = { - .header = { .type = PERF_RECORD_MMAP }, + .header = { + .type = PERF_RECORD_MMAP, + .misc = 0, /* Just like the kernel, see kernel/perf_event.c __perf_event_mmap */ + }, }; int n; size_t size; @@ -170,6 +173,7 @@ int event__synthesize_modules(event__handler_t process, size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); memset(&ev, 0, sizeof(ev)); + ev.mmap.header.misc = 1; /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */ ev.mmap.header.type = PERF_RECORD_MMAP; ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size)); @@ -236,7 +240,10 @@ int event__synthesize_kernel_mmap(event__handler_t process, { size_t size; event_t ev = { - .header = { .type = PERF_RECORD_MMAP }, + .header = { + .type = PERF_RECORD_MMAP, + .misc = 1, /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */ + }, }; /* * We should get this from /sys/kernel/sections/.text, but till that is From 59ee68ecd1561a233fb6ad351980bea8402533e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 23:45:29 -0200 Subject: [PATCH 067/640] perf symbols: Create thread__find_addr_map from thread__find_addr_location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because some tools will only want to know with maps had hits, not needing the full symbol resolution done by thread__find_addr_location. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263519930-22803-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.c | 26 +++++++++++++++++--------- tools/perf/util/thread.h | 5 +++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1abaefc126a8..5a6e827a09eb 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -422,11 +422,10 @@ int event__process_task(event_t *self, struct perf_session *session) return 0; } -void thread__find_addr_location(struct thread *self, - struct perf_session *session, u8 cpumode, - enum map_type type, u64 addr, - struct addr_location *al, - symbol_filter_t filter) +void thread__find_addr_map(struct thread *self, + struct perf_session *session, u8 cpumode, + enum map_type type, u64 addr, + struct addr_location *al) { struct map_groups *mg = &self->mg; @@ -441,7 +440,6 @@ void thread__find_addr_location(struct thread *self, else { al->level = 'H'; al->map = NULL; - al->sym = NULL; return; } try_again: @@ -460,11 +458,21 @@ try_again: mg = &session->kmaps; goto try_again; } - al->sym = NULL; - } else { + } else al->addr = al->map->map_ip(al->map, al->addr); +} + +void thread__find_addr_location(struct thread *self, + struct perf_session *session, u8 cpumode, + enum map_type type, u64 addr, + struct addr_location *al, + symbol_filter_t filter) +{ + thread__find_addr_map(self, session, cpumode, type, addr, al); + if (al->map != NULL) al->sym = map__find_symbol(al->map, session, al->addr, filter); - } + else + al->sym = NULL; } static void dso__calc_col_width(struct dso *self) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index c06c13535a70..e35653c1817c 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -48,6 +48,11 @@ static inline struct map *thread__find_map(struct thread *self, return self ? map_groups__find(&self->mg, type, addr) : NULL; } +void thread__find_addr_map(struct thread *self, + struct perf_session *session, u8 cpumode, + enum map_type type, u64 addr, + struct addr_location *al); + void thread__find_addr_location(struct thread *self, struct perf_session *session, u8 cpumode, enum map_type type, u64 addr, From 88d3d9b7c843a42cb73c55a2d13cd1041da31fb9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2010 23:45:30 -0200 Subject: [PATCH 068/640] perf buildid-list: Introduce --with-hits option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using this option 'perf buildid-list' will process all samples, marking the DSOs that had some hits to list just them. This in turn will be used by a new porcelain, 'perf archive', that will be just a shell script to create a tarball from the 'perf buildid-list --with-hits' output and the files cached by 'perf record' in ~/.debug. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263519930-22803-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-buildid-list.c | 35 ++++++++++++++++++++++++++++++- tools/perf/util/symbol.c | 11 ++++++---- tools/perf/util/symbol.h | 3 ++- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 4229c2c213cc..431f204bde64 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -16,6 +16,7 @@ static char const *input_name = "perf.data"; static int force; +static bool with_hits; static const char * const buildid_list_usage[] = { "perf buildid-list []", @@ -23,6 +24,7 @@ static const char * const buildid_list_usage[] = { }; static const struct option options[] = { + OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"), OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -31,6 +33,34 @@ static const struct option options[] = { OPT_END() }; +static int build_id_list__process_event(event_t *event, + struct perf_session *session) +{ + struct addr_location al; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread = perf_session__findnew(session, event->ip.pid); + + if (thread == NULL) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, + event->ip.ip, &al); + + if (al.map != NULL) + al.map->dso->hit = 1; + + return 0; +} + +static struct perf_event_ops build_id_list__event_ops = { + .sample = build_id_list__process_event, + .mmap = event__process_mmap, + .fork = event__process_task, +}; + static int __cmd_buildid_list(void) { int err = -1; @@ -40,7 +70,10 @@ static int __cmd_buildid_list(void) if (session == NULL) return -1; - dsos__fprintf_buildid(stdout); + if (with_hits) + perf_session__process_events(session, &build_id_list__event_ops); + + dsos__fprintf_buildid(stdout, with_hits); perf_session__delete(session); return err; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4267138c7bbe..a4e745934584 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1716,22 +1716,25 @@ void dsos__fprintf(FILE *fp) __dsos__fprintf(&dsos__user, fp); } -static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp) +static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, + bool with_hits) { struct dso *pos; size_t ret = 0; list_for_each_entry(pos, head, node) { + if (with_hits && !pos->hit) + continue; ret += dso__fprintf_buildid(pos, fp); ret += fprintf(fp, " %s\n", pos->long_name); } return ret; } -size_t dsos__fprintf_buildid(FILE *fp) +size_t dsos__fprintf_buildid(FILE *fp, bool with_hits) { - return (__dsos__fprintf_buildid(&dsos__kernel, fp) + - __dsos__fprintf_buildid(&dsos__user, fp)); + return (__dsos__fprintf_buildid(&dsos__kernel, fp, with_hits) + + __dsos__fprintf_buildid(&dsos__user, fp, with_hits)); } static struct dso *dsos__create_kernel(const char *vmlinux) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 36b7c717f5ee..525085fd0735 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -97,6 +97,7 @@ struct dso { u8 slen_calculated:1; u8 has_build_id:1; u8 kernel:1; + u8 hit:1; unsigned char origin; u8 sorted_by_name; u8 loaded; @@ -129,7 +130,7 @@ struct perf_session; int dso__load(struct dso *self, struct map *map, struct perf_session *session, symbol_filter_t filter); void dsos__fprintf(FILE *fp); -size_t dsos__fprintf_buildid(FILE *fp); +size_t dsos__fprintf_buildid(FILE *fp, bool with_hits); size_t dso__fprintf_buildid(struct dso *self, FILE *fp); size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); From 460848fceffc91652b2d36d19db4ac40d12fb607 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jan 2010 13:17:51 -0200 Subject: [PATCH 069/640] perf symbols: The synthesized kernel modules MMAP must use the pathnames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we use ->long_name in dsos__find now. Now 'perf buildid_list' is not duplicating those and managing to show the proper build-ids for the DSOs with hits: [root@doppio linux-2.6-tip]# perf buildid-list -H 74f9930ee94475b6b3238caf3725a50d59cb994b [kernel.kallsyms] 9ffdcac0a7935922d1f04b6cc9029dfef0f066ef /lib/modules/2.6.33-rc4-tip+/kernel/arch/x86/crypto/aes-x86_64.ko 3aaf89c32ebfc438ff546c93597d41788e3e65f3 /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwl3945.ko 19f46033f73e1ec612937189bb118c5daba5a0c8 /lib/modules/2.6.33-rc4-tip+/kernel/net/mac80211/mac80211.ko 1772f014a7a7272859655acb0c64a20ab20b75ee /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 /lib64/libc-2.10.2.so 5c68f7afeb33309c78037e374b0deee84dd441f6 /lib64/libpthread-2.10.2.so e9c9ad5c138ef882e4507d2605645b597da43873 /bin/dbus-daemon bcda7d09eb6c9ee380dae0ed3d591d4311decc31 /lib64/libdbus-1.so.3.4.0 7cc449a77f48b85d6088114000e970ced613bed8 /usr/lib64/libcrypto.so.0.9.8k fdd1ccd1ff7917ab020653147ab3bacf0a85b5b9 /lib64/libglib-2.0.so.0.2000.5 e4417ebb8762e5f2eee93c8011a71115ff5edad8 /lib64/libgobject-2.0.so.0.2000.5 931e49461f6df99104f0febcc52f6fed5e2efce6 /usr/sbin/sshd dab5f724c088f89fbd8304da553ed6cb30bbec96 /usr/lib64/libgdk-x11-2.0.so.0.1600.6 f2037a091ef36b591187a858d75e203690ea9409 /usr/sbin/openvpn a8e4f743b40fb1fd8b85e2f9b88d93b661472b8f /bin/find 81120aada06e68b1e85882925a0fc6d7345ef59a /home/acme/bin/perf [root@doppio linux-2.6-tip]# Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263568672-30323-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 5a6e827a09eb..966d207a1509 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -345,15 +345,15 @@ int event__process_mmap(event_t *self, struct perf_session *session) map = perf_session__new_module_map(session, self->mmap.start, - short_module_name); + self->mmap.filename); if (map == NULL) goto out_problem; - name = strdup(self->mmap.filename); + name = strdup(short_module_name); if (name == NULL) goto out_problem; - dso__set_long_name(map->dso, name); + map->dso->short_name = name; map->end = map->start + self->mmap.len; } else if (memcmp(self->mmap.filename, kmmap_prefix, sizeof(kmmap_prefix) - 1) == 0) { From 2c5851747bcf751908c02e253cb7582d342b4612 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jan 2010 13:17:52 -0200 Subject: [PATCH 070/640] perf archive: Add helper script to package files needed to do analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It uses 'perf buildid-list --with-hits' to create a tarball with what is needed to have in the destination machine ~/.debug hierarchy to properly decode the perf.data file specified. Here is an example where a perf.data file collected on a x86-64 machine running Fedora 12 is used and then the data is packaged, transferred and decoded on a PARISC64 machine running Debian Testing, 32-bit userspace: [root@doppio linux-2.6-tip]# uname -a Linux doppio.ghostprotocols.net 2.6.33-rc4-tip+ #3 SMP Wed Jan 13 11:58:15 BRST 2010 x86_64 x86_64 x86_64 GNU/Linux [root@doppio linux-2.6-tip]# perf archive [root@doppio linux-2.6-tip]# ls -la perf.data* -rw------- 1 root root 737696 2010-01-14 23:36 perf.data -rw-r--r-- 1 root root 8840025 2010-01-15 12:27 perf.data.tar.bz2 [root@doppio linux-2.6-tip]# scp perf.data.* parisc64:. Password: perf.data.tar.bz2 100% 8633KB 1.4MB/s 00:06 [root@doppio linux-2.6-tip]# ssh parisc64 Password: Linux parisc 2.6.19-g2bbf29ac-dirty #1 Sun Dec 3 17:24:04 BRST 2006 parisc64 The programs included with the Debian GNU/Linux system are free software; the exact distribution terms for each program are described in the individual files in /usr/share/doc/*/copyright. Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent permitted by applicable law. Last login: Thu Jan 14 11:23:24 2010 from d parisc:~# uname -a Linux parisc 2.6.19-g2bbf29ac-dirty #1 Sun Dec 3 17:24:04 BRST 2006 parisc64 GNU/Linux parisc:~# mkdir .debug parisc:~# tar xvf perf.data.tar.bz2 -C ~/.debug tar: Record size = 8 blocks .build-id/74/f9930ee94475b6b3238caf3725a50d59cb994b [kernel.kallsyms]/74f9930ee94475b6b3238caf3725a50d59cb994b .build-id/9f/fdcac0a7935922d1f04b6cc9029dfef0f066ef lib/modules/2.6.33-rc4-tip+/kernel/arch/x86/crypto/aes-x86_64.ko/9ffdcac0a7935922d1f04b6cc9029dfef0f066ef .build-id/3a/af89c32ebfc438ff546c93597d41788e3e65f3 lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwl3945.ko/3aaf89c32ebfc438ff546c93597d41788e3e65f3 .build-id/19/f46033f73e1ec612937189bb118c5daba5a0c8 lib/modules/2.6.33-rc4-tip+/kernel/net/mac80211/mac80211.ko/19f46033f73e1ec612937189bb118c5daba5a0c8 .build-id/17/72f014a7a7272859655acb0c64a20ab20b75ee lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko/1772f014a7a7272859655acb0c64a20ab20b75ee .build-id/eb/4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 lib64/libc-2.10.2.so/eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 .build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6 lib64/libpthread-2.10.2.so/5c68f7afeb33309c78037e374b0deee84dd441f6 .build-id/e9/c9ad5c138ef882e4507d2605645b597da43873 bin/dbus-daemon/e9c9ad5c138ef882e4507d2605645b597da43873 .build-id/bc/da7d09eb6c9ee380dae0ed3d591d4311decc31 lib64/libdbus-1.so.3.4.0/bcda7d09eb6c9ee380dae0ed3d591d4311decc31 .build-id/7c/c449a77f48b85d6088114000e970ced613bed8 usr/lib64/libcrypto.so.0.9.8k/7cc449a77f48b85d6088114000e970ced613bed8 .build-id/fd/d1ccd1ff7917ab020653147ab3bacf0a85b5b9 lib64/libglib-2.0.so.0.2000.5/fdd1ccd1ff7917ab020653147ab3bacf0a85b5b9 .build-id/e4/417ebb8762e5f2eee93c8011a71115ff5edad8 lib64/libgobject-2.0.so.0.2000.5/e4417ebb8762e5f2eee93c8011a71115ff5edad8 .build-id/93/1e49461f6df99104f0febcc52f6fed5e2efce6 usr/sbin/sshd/931e49461f6df99104f0febcc52f6fed5e2efce6 .build-id/da/b5f724c088f89fbd8304da553ed6cb30bbec96 usr/lib64/libgdk-x11-2.0.so.0.1600.6/dab5f724c088f89fbd8304da553ed6cb30bbec96 .build-id/f2/037a091ef36b591187a858d75e203690ea9409 usr/sbin/openvpn/f2037a091ef36b591187a858d75e203690ea9409 .build-id/a8/e4f743b40fb1fd8b85e2f9b88d93b661472b8f bin/find/a8e4f743b40fb1fd8b85e2f9b88d93b661472b8f .build-id/81/120aada06e68b1e85882925a0fc6d7345ef59a home/acme/bin/perf/81120aada06e68b1e85882925a0fc6d7345ef59a parisc:~# perf report 2> /dev/null | head -25 9.07% find find [.] 0x0000000000fb0e 3.29% perf libc-2.10.2.so [.] __GI_strcmp 3.19% find [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 2.70% find libc-2.10.2.so [.] __GI_memmove 2.62% perf [kernel.kallsyms] [k] vsnprintf 2.03% find libc-2.10.2.so [.] _int_malloc 2.02% perf [kernel.kallsyms] [k] format_decode 1.70% find [kernel.kallsyms] [k] n_tty_write 1.70% find [kernel.kallsyms] [k] half_md4_transform 1.67% find libc-2.10.2.so [.] _IO_vfprintf_internal 1.66% perf [kernel.kallsyms] [k] audit_free_aux 1.62% swapper [kernel.kallsyms] [k] mwait_idle_with_hints 1.58% find [kernel.kallsyms] [k] __kmalloc 1.35% find [kernel.kallsyms] [k] sched_clock_local 1.35% find [kernel.kallsyms] [k] ext4_check_dir_entry 1.35% find [kernel.kallsyms] [k] ext4_htree_store_dirent 1.35% find [kernel.kallsyms] [k] sys_write 1.35% find [e1000e] [k] e1000_clean 1.35% find [kernel.kallsyms] [k] _atomic_dec_and_lock 1.34% find [kernel.kallsyms] [k] __d_lookup parisc:~# Probably the next step is to have 'perf report' notice that there is a perf.data.tar.bz2 file in the same directory and look if it was already added to ~/.debug/. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263568672-30323-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 10 ++-------- tools/perf/command-list.txt | 1 + tools/perf/perf-archive.sh | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 tools/perf/perf-archive.sh diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2c03a9411317..d739552036d0 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -286,11 +286,7 @@ SCRIPT_PERL = SCRIPT_SH = TEST_PROGRAMS = -# -# No scripts right now: -# - -# SCRIPT_SH += perf-am.sh +SCRIPT_SH += perf-archive.sh # # No Perl scripts right now: @@ -315,9 +311,7 @@ PROGRAMS += perf # List built-in command $C whose implementation cmd_$C() is not in # builtin-$C.o but is linked in as part of some other command. # -# None right now: -# -# BUILT_INS += perf-init $X +BUILT_INS += perf-archive # what 'all' will build and 'install' will install, in perfexecdir ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 71dc7c3fe7b2..f73d1d90f5bd 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -3,6 +3,7 @@ # command name category [deprecated] [common] # perf-annotate mainporcelain common +perf-archive mainporcelain perf-bench mainporcelain common perf-buildid-list mainporcelain common perf-diff mainporcelain common diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh new file mode 100644 index 000000000000..45fbe2f07b15 --- /dev/null +++ b/tools/perf/perf-archive.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# perf archive +# Arnaldo Carvalho de Melo + +PERF_DATA=perf.data +if [ $# -ne 0 ] ; then + PERF_DATA=$1 +fi + +DEBUGDIR=~/.debug/ +BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) + +perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS +if [ ! -s $BUILDIDS ] ; then + echo "perf archive: no build-ids found" + rm -f $BUILDIDS + exit 1 +fi + +MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX) + +cut -d ' ' -f 1 $BUILDIDS | \ +while read build_id ; do + linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2} + filename=$(readlink -f $linkname) + echo ${linkname#$DEBUGDIR} >> $MANIFEST + echo ${filename#$DEBUGDIR} >> $MANIFEST +done + +tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST +rm -f $MANIFEST $BUILDIDS +exit 0 From f5a2c3dce03621b55f84496f58adc2d1a87ca16f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jan 2010 18:08:26 -0200 Subject: [PATCH 071/640] perf record: Intercept all events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The event interception we need to do in 'perf record' to create a list of all DSOs in PERF_RECORD_MMAP events wasn't seeing all events, make sure that happens by checking size agains event_t->header.size. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263586107-1756-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 614fa9a4c67c..7bb9ca1b30fa 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -113,16 +113,24 @@ static void write_output(void *buf, size_t size) static void write_event(event_t *buf, size_t size) { - /* - * Add it to the list of DSOs, so that when we finish this - * record session we can pick the available build-ids. - */ - if (buf->header.type == PERF_RECORD_MMAP) { - struct list_head *head = &dsos__user; - if (buf->mmap.header.misc == 1) - head = &dsos__kernel; - __dsos__findnew(head, buf->mmap.filename); - } + size_t processed_size = buf->header.size; + event_t *ev = buf; + + do { + /* + * Add it to the list of DSOs, so that when we finish this + * record session we can pick the available build-ids. + */ + if (ev->header.type == PERF_RECORD_MMAP) { + struct list_head *head = &dsos__user; + if (ev->header.misc == 1) + head = &dsos__kernel; + __dsos__findnew(head, ev->mmap.filename); + } + + ev = ((void *)ev) + ev->header.size; + processed_size += ev->header.size; + } while (processed_size < size); write_output(buf, size); } From 881516eb828a3f7276c378bcef96b7788fc99016 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jan 2010 18:08:27 -0200 Subject: [PATCH 072/640] perf symbols: Accept an alias when looking for "_text" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it is in PARISC64: parisc:~# uname -a Linux parisc 2.6.33-rc4-tip+ #1 SMP Thu Jan 14 13:33:34 BRST 2010 parisc64 GNU/Linux parisc:~# grep -w _text /proc/kallsyms 0000000040100000 A _text parisc:~# grep 0000000040100000 /proc/kallsyms 0000000040100000 T stext 0000000040100000 T _stext 0000000040100000 A _text parisc:~# Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263586107-1756-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 966d207a1509..dc13cad828d7 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -227,7 +227,12 @@ static int find_symbol_cb(void *arg, const char *name, char type, u64 start) { struct process_symbol_args *args = arg; - if (!symbol_type__is_a(type, MAP__FUNCTION) || strcmp(name, args->name)) + /* + * Must be a function or at least an alias, as in PARISC64, where "_text" is + * an 'A' to the same address as "_stext". + */ + if (!(symbol_type__is_a(type, MAP__FUNCTION) || + type == 'A') || strcmp(name, args->name)) return 0; args->start = start; From 889ff0150661512d79484219612b7e2e024b6c07 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 9 Jan 2010 20:04:47 +0100 Subject: [PATCH 073/640] perf/core: Split context's event group list into pinned and non-pinned lists Split-up struct perf_event_context::group_list into pinned_groups and flexible_groups (non-pinned). This first appears to be useless as it duplicates various loops around the group list handlings. But it scales better in the fast-path in perf_sched_in(). We don't anymore iterate twice through the entire list to separate pinned and non-pinned scheduling. Instead we interate through two distinct lists. The another desired effect is that it makes easier to define distinct scheduling rules on both. Changes in v2: - Respectively rename pinned_grp_list and volatile_grp_list into pinned_groups and flexible_groups as per Ingo suggestion. - Various cleanups Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 3 +- kernel/perf_event.c | 231 ++++++++++++++++++++++++------------- 2 files changed, 155 insertions(+), 79 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9a1d276db754..cdbc2aa64a0b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -683,7 +683,8 @@ struct perf_event_context { */ struct mutex mutex; - struct list_head group_list; + struct list_head pinned_groups; + struct list_head flexible_groups; struct list_head event_list; int nr_events; int nr_active; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 27f69a04541d..c9f8a757649d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event) event->total_time_running = run_end - event->tstamp_running; } +static struct list_head * +ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) +{ + if (event->attr.pinned) + return &ctx->pinned_groups; + else + return &ctx->flexible_groups; +} + /* * Add a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. @@ -303,9 +312,12 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) * add it straight to the context's event list, or to the group * leader's sibling list: */ - if (group_leader == event) - list_add_tail(&event->group_entry, &ctx->group_list); - else { + if (group_leader == event) { + struct list_head *list; + + list = ctx_group_list(event, ctx); + list_add_tail(&event->group_entry, list); + } else { list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; } @@ -355,8 +367,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) * to the context list directly: */ list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { + struct list_head *list; - list_move_tail(&sibling->group_entry, &ctx->group_list); + list = ctx_group_list(event, ctx); + list_move_tail(&sibling->group_entry, list); sibling->group_leader = sibling; } } @@ -1056,7 +1070,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, perf_disable(); if (ctx->nr_active) { - list_for_each_entry(event, &ctx->group_list, group_entry) + list_for_each_entry(event, &ctx->pinned_groups, group_entry) + group_sched_out(event, cpuctx, ctx); + + list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); } perf_enable(); @@ -1271,9 +1288,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - list_for_each_entry(event, &ctx->group_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF || - !event->attr.pinned) + list_for_each_entry(event, &ctx->pinned_groups, group_entry) { + if (event->state <= PERF_EVENT_STATE_OFF) continue; if (event->cpu != -1 && event->cpu != cpu) continue; @@ -1291,15 +1307,10 @@ __perf_event_sched_in(struct perf_event_context *ctx, } } - list_for_each_entry(event, &ctx->group_list, group_entry) { - /* - * Ignore events in OFF or ERROR state, and - * ignore pinned events since we did them already. - */ - if (event->state <= PERF_EVENT_STATE_OFF || - event->attr.pinned) + list_for_each_entry(event, &ctx->flexible_groups, group_entry) { + /* Ignore events in OFF or ERROR state */ + if (event->state <= PERF_EVENT_STATE_OFF) continue; - /* * Listen to the 'cpu' scheduling filter constraint * of events: @@ -1453,8 +1464,13 @@ static void rotate_ctx(struct perf_event_context *ctx) * Rotate the first entry last (works just fine for group events too): */ perf_disable(); - list_for_each_entry(event, &ctx->group_list, group_entry) { - list_move_tail(&event->group_entry, &ctx->group_list); + list_for_each_entry(event, &ctx->pinned_groups, group_entry) { + list_move_tail(&event->group_entry, &ctx->pinned_groups); + break; + } + + list_for_each_entry(event, &ctx->flexible_groups, group_entry) { + list_move_tail(&event->group_entry, &ctx->flexible_groups); break; } perf_enable(); @@ -1490,6 +1506,21 @@ void perf_event_task_tick(struct task_struct *curr) perf_event_task_sched_in(curr); } +static int event_enable_on_exec(struct perf_event *event, + struct perf_event_context *ctx) +{ + if (!event->attr.enable_on_exec) + return 0; + + event->attr.enable_on_exec = 0; + if (event->state >= PERF_EVENT_STATE_INACTIVE) + return 0; + + __perf_event_mark_enabled(event, ctx); + + return 1; +} + /* * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. @@ -1500,6 +1531,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) struct perf_event *event; unsigned long flags; int enabled = 0; + int ret; local_irq_save(flags); ctx = task->perf_event_ctxp; @@ -1510,14 +1542,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) raw_spin_lock(&ctx->lock); - list_for_each_entry(event, &ctx->group_list, group_entry) { - if (!event->attr.enable_on_exec) - continue; - event->attr.enable_on_exec = 0; - if (event->state >= PERF_EVENT_STATE_INACTIVE) - continue; - __perf_event_mark_enabled(event, ctx); - enabled = 1; + list_for_each_entry(event, &ctx->pinned_groups, group_entry) { + ret = event_enable_on_exec(event, ctx); + if (ret) + enabled = 1; + } + + list_for_each_entry(event, &ctx->flexible_groups, group_entry) { + ret = event_enable_on_exec(event, ctx); + if (ret) + enabled = 1; } /* @@ -1591,7 +1625,8 @@ __perf_event_init_context(struct perf_event_context *ctx, { raw_spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->group_list); + INIT_LIST_HEAD(&ctx->pinned_groups); + INIT_LIST_HEAD(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); ctx->task = task; @@ -5032,7 +5067,11 @@ void perf_event_exit_task(struct task_struct *child) mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); again: - list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, + list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, + group_entry) + __perf_event_exit_task(child_event, child_ctx, child); + + list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, group_entry) __perf_event_exit_task(child_event, child_ctx, child); @@ -5041,7 +5080,8 @@ again: * its siblings to the list, but we obtained 'tmp' before that which * will still point to the list head terminating the iteration. */ - if (!list_empty(&child_ctx->group_list)) + if (!list_empty(&child_ctx->pinned_groups) || + !list_empty(&child_ctx->flexible_groups)) goto again; mutex_unlock(&child_ctx->mutex); @@ -5049,6 +5089,24 @@ again: put_ctx(child_ctx); } +static void perf_free_event(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *parent = event->parent; + + if (WARN_ON_ONCE(!parent)) + return; + + mutex_lock(&parent->child_mutex); + list_del_init(&event->child_list); + mutex_unlock(&parent->child_mutex); + + fput(parent->filp); + + list_del_event(event, ctx); + free_event(event); +} + /* * free an unexposed, unused context as created by inheritance by * init_task below, used by fork() in case of fail. @@ -5063,23 +5121,15 @@ void perf_event_free_task(struct task_struct *task) mutex_lock(&ctx->mutex); again: - list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { - struct perf_event *parent = event->parent; + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) + perf_free_event(event, ctx); - if (WARN_ON_ONCE(!parent)) - continue; + list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, + group_entry) + perf_free_event(event, ctx); - mutex_lock(&parent->child_mutex); - list_del_init(&event->child_list); - mutex_unlock(&parent->child_mutex); - - fput(parent->filp); - - list_del_event(event, ctx); - free_event(event); - } - - if (!list_empty(&ctx->group_list)) + if (!list_empty(&ctx->pinned_groups) || + !list_empty(&ctx->flexible_groups)) goto again; mutex_unlock(&ctx->mutex); @@ -5087,12 +5137,54 @@ again: put_ctx(ctx); } +static int +inherit_task_group(struct perf_event *event, struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + int *inherited_all) +{ + int ret; + struct perf_event_context *child_ctx = child->perf_event_ctxp; + + if (!event->attr.inherit) { + *inherited_all = 0; + return 0; + } + + if (!child_ctx) { + /* + * This is executed from the parent task context, so + * inherit events that have been marked for cloning. + * First allocate and initialize a context for the + * child. + */ + + child_ctx = kzalloc(sizeof(struct perf_event_context), + GFP_KERNEL); + if (!child_ctx) + return -ENOMEM; + + __perf_event_init_context(child_ctx, child); + child->perf_event_ctxp = child_ctx; + get_task_struct(child); + } + + ret = inherit_group(event, parent, parent_ctx, + child, child_ctx); + + if (ret) + *inherited_all = 0; + + return ret; +} + + /* * Initialize the perf_event context in task_struct */ int perf_event_init_task(struct task_struct *child) { - struct perf_event_context *child_ctx = NULL, *parent_ctx; + struct perf_event_context *child_ctx, *parent_ctx; struct perf_event_context *cloned_ctx; struct perf_event *event; struct task_struct *parent = current; @@ -5130,41 +5222,22 @@ int perf_event_init_task(struct task_struct *child) * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ - list_for_each_entry(event, &parent_ctx->group_list, group_entry) { - - if (!event->attr.inherit) { - inherited_all = 0; - continue; - } - - if (!child->perf_event_ctxp) { - /* - * This is executed from the parent task context, so - * inherit events that have been marked for cloning. - * First allocate and initialize a context for the - * child. - */ - - child_ctx = kzalloc(sizeof(struct perf_event_context), - GFP_KERNEL); - if (!child_ctx) { - ret = -ENOMEM; - break; - } - - __perf_event_init_context(child_ctx, child); - child->perf_event_ctxp = child_ctx; - get_task_struct(child); - } - - ret = inherit_group(event, parent, parent_ctx, - child, child_ctx); - if (ret) { - inherited_all = 0; + list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { + ret = inherit_task_group(event, parent, parent_ctx, child, + &inherited_all); + if (ret) break; - } } + list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { + ret = inherit_task_group(event, parent, parent_ctx, child, + &inherited_all); + if (ret) + break; + } + + child_ctx = child->perf_event_ctxp; + if (child_ctx && inherited_all) { /* * Mark the child context as a clone of the parent @@ -5213,7 +5286,9 @@ static void __perf_event_exit_cpu(void *info) struct perf_event_context *ctx = &cpuctx->ctx; struct perf_event *event, *tmp; - list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) + __perf_event_remove_from_context(event); + list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) __perf_event_remove_from_context(event); } static void perf_event_exit_cpu(int cpu) From 5908cdc85eb30f8d07f2cb11d4a62334d7229048 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 9 Jan 2010 20:53:14 +0100 Subject: [PATCH 074/640] list: Introduce list_rotate_left() Bring a new list_rotate_left() helper that rotates a list to the left. This is useful for codes that need to round roubin elements which queue priority increases from tail to head. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/list.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/list.h b/include/linux/list.h index 969f6e92d089..5d9c6558e8ab 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -205,6 +205,20 @@ static inline int list_empty_careful(const struct list_head *head) return (next == head) && (next == head->prev); } +/** + * list_rotate_left - rotate the list to the left + * @head: the head of the list + */ +static inline void list_rotate_left(struct list_head *head) +{ + struct list_head *first; + + if (!list_empty(head)) { + first = head->next; + list_move_tail(first, head); + } +} + /** * list_is_singular - tests whether a list has just one entry. * @head: the list to test. From e286417378b4f9ce6e473b556193465ab22e12ab Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 9 Jan 2010 21:05:28 +0100 Subject: [PATCH 075/640] perf: Round robin flexible groups of events using list_rotate_left() This is more proper that doing it through a list_for_each_entry() that breaks after the first entry. v2: Don't rotate pinned groups as its not needed to time share them. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- kernel/perf_event.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index c9f8a757649d..bbebe2832639 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1454,25 +1454,16 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ static void rotate_ctx(struct perf_event_context *ctx) { - struct perf_event *event; - if (!ctx->nr_events) return; raw_spin_lock(&ctx->lock); - /* - * Rotate the first entry last (works just fine for group events too): - */ - perf_disable(); - list_for_each_entry(event, &ctx->pinned_groups, group_entry) { - list_move_tail(&event->group_entry, &ctx->pinned_groups); - break; - } - list_for_each_entry(event, &ctx->flexible_groups, group_entry) { - list_move_tail(&event->group_entry, &ctx->flexible_groups); - break; - } + /* Rotate the first entry last of non-pinned groups */ + perf_disable(); + + list_rotate_left(&ctx->flexible_groups); + perf_enable(); raw_spin_unlock(&ctx->lock); From d6f962b57bfaab62891c7abbf1469212a56d6103 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 10 Jan 2010 01:25:51 +0100 Subject: [PATCH 076/640] perf: Export software-only event group characteristic as a flag Before scheduling an event group, we first check if a group can go on. We first check if the group is made of software only events first, in which case it is enough to know if the group can be scheduled in. For that purpose, we iterate through the whole group, which is wasteful as we could do this check when we add/delete an event to a group. So we create a group_flags field in perf event that can host characteristics from a group of events, starting with a first PERF_GROUP_SOFTWARE flag that reduces the check on the fast path. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 5 +++++ kernel/perf_event.c | 30 +++++++++++------------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cdbc2aa64a0b..c6f812e4d058 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,6 +565,10 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int, struct perf_sample_data *, struct pt_regs *regs); +enum perf_group_flag { + PERF_GROUP_SOFTWARE = 0x1, +}; + /** * struct perf_event - performance event kernel representation: */ @@ -574,6 +578,7 @@ struct perf_event { struct list_head event_entry; struct list_head sibling_list; int nr_siblings; + int group_flags; struct perf_event *group_leader; struct perf_event *output; const struct pmu *pmu; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index bbebe2832639..eae6ff693604 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -315,9 +315,16 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) if (group_leader == event) { struct list_head *list; + if (is_software_event(event)) + event->group_flags |= PERF_GROUP_SOFTWARE; + list = ctx_group_list(event, ctx); list_add_tail(&event->group_entry, list); } else { + if (group_leader->group_flags & PERF_GROUP_SOFTWARE && + !is_software_event(event)) + group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; + list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; } @@ -372,6 +379,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) list = ctx_group_list(event, ctx); list_move_tail(&sibling->group_entry, list); sibling->group_leader = sibling; + + /* Inherit group flags from the previous leader */ + sibling->group_flags = event->group_flags; } } @@ -699,24 +709,6 @@ group_error: return -EAGAIN; } -/* - * Return 1 for a group consisting entirely of software events, - * 0 if the group contains any hardware events. - */ -static int is_software_only_group(struct perf_event *leader) -{ - struct perf_event *event; - - if (!is_software_event(leader)) - return 0; - - list_for_each_entry(event, &leader->sibling_list, group_entry) - if (!is_software_event(event)) - return 0; - - return 1; -} - /* * Work out whether we can put this event group on the CPU now. */ @@ -727,7 +719,7 @@ static int group_can_go_on(struct perf_event *event, /* * Groups consisting entirely of software events can always go on. */ - if (is_software_only_group(event)) + if (event->group_flags & PERF_GROUP_SOFTWARE) return 1; /* * If an exclusive group is already on, no other hardware From 69e3f52d1b1a3ed4390bb8a09bb1324265af7fbf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Jan 2010 14:21:15 +0100 Subject: [PATCH 077/640] perf: Fix implicit declaration of getline in util.c getline() is considered as undeclared in util/util.c because it includes string.h, that in turn includes stdio.h, without having defined _GNU_SOURCE. But util.c also includes util.h that handles the _GNU_SOURCE and all the needed inclusions already. Let's include only util.h and sys/mman.h which is the only one header not handled by util.h This fixes the following build error: util/util.c: In function 'slow_copyfile': util/util.c:49: erreur: implicit declaration of function 'getline' util/util.c:49: erreur: nested extern declaration of 'getline' Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1263648075-3858-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/util.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index f0685849b244..f9b890fde681 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,10 +1,5 @@ -#include -#include -#include -#include -#include -#include #include "util.h" +#include int mkdir_p(char *path, mode_t mode) { From 0eda7385db1f30271ade830a231006938a76fb53 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sat, 16 Jan 2010 21:31:16 +0900 Subject: [PATCH 078/640] perf probe: Fix build error of builtin-probe.c I got this build error when building tip tree: | cc1: warnings being treated as errors | builtin-probe.c:123: error: 'opt_show_lines' defined but not used This error is caused by: | #ifndef NO_LIBDWARF | OPT_CALLBACK('L', "line", NULL, | "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]", | "Show source code lines.", opt_show_lines), | #endif My environment defines NO_LIBDWARF, so gcc treated opt_show_lines() as garbage. So I moved opt_show_lines() into #ifndef NO_LIBDWARF ... #endif block. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Mike Galbraith LKML-Reference: <1263645076-9993-1-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar --- tools/perf/builtin-probe.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 1d3a99ea5ce1..34f2acb1cc88 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -118,15 +118,6 @@ static int opt_del_probe_event(const struct option *opt __used, return 0; } -static int opt_show_lines(const struct option *opt __used, - const char *str, int unset __used) -{ - if (str) - parse_line_range_desc(str, &session.line_range); - INIT_LIST_HEAD(&session.line_range.line_list); - session.show_lines = true; - return 0; -} /* Currently just checking function name from symbol map */ static void evaluate_probe_point(struct probe_point *pp) { @@ -148,6 +139,16 @@ static int open_vmlinux(void) pr_debug("Try to open %s\n", session.kmap->dso->long_name); return open(session.kmap->dso->long_name, O_RDONLY); } + +static int opt_show_lines(const struct option *opt __used, + const char *str, int unset __used) +{ + if (str) + parse_line_range_desc(str, &session.line_range); + INIT_LIST_HEAD(&session.line_range.line_list); + session.show_lines = true; + return 0; +} #endif static const char * const probe_usage[] = { From 231e36f4d2e63dd770db80b9f5113310c2bcfcfd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 14 Jan 2010 00:12:12 -0500 Subject: [PATCH 079/640] tracing/kprobe: Update kprobe tracing self test for new syntax Update kprobe tracing self test for new syntax (it supports deleting individual probes, and drops $argN support) and behavior change (new probes are disabled in default). This selftest includes the following checks: - Adding function-entry probe and return probe with arguments. - Enabling these probes. - Deleting it individually. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20100114051211.7814.29436.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 55 +++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7ac728ded964..d6266cad6953 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1507,28 +1507,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3, static __init int kprobe_trace_self_tests_init(void) { - int ret; + int ret, warn = 0; int (*target)(int, int, int, int, int, int); + struct trace_probe *tp; target = kprobe_trace_selftest_target; pr_info("Testing kprobe tracing: "); ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " - "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); - if (WARN_ON_ONCE(ret)) - pr_warning("error enabling function entry\n"); + "$stack $stack0 +0($stack)"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on probing function entry.\n"); + warn++; + } else { + /* Enable trace point */ + tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tp == NULL)) { + pr_warning("error on getting new probe.\n"); + warn++; + } else + probe_event_enable(&tp->call); + } ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " "$retval"); - if (WARN_ON_ONCE(ret)) - pr_warning("error enabling function return\n"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on probing function return.\n"); + warn++; + } else { + /* Enable trace point */ + tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tp == NULL)) { + pr_warning("error on getting new probe.\n"); + warn++; + } else + probe_event_enable(&tp->call); + } + + if (warn) + goto end; ret = target(1, 2, 3, 4, 5, 6); - cleanup_all_probes(); + ret = command_trace_probe("-:testprobe"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on deleting a probe.\n"); + warn++; + } - pr_cont("OK\n"); + ret = command_trace_probe("-:testprobe2"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on deleting a probe.\n"); + warn++; + } + +end: + cleanup_all_probes(); + if (warn) + pr_cont("NG: Some tests are failed. Please check them.\n"); + else + pr_cont("OK\n"); return 0; } From 42cce92f4ddfa41e2dfe26fdcad4887943c032f2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 17 Jan 2010 10:36:08 +0100 Subject: [PATCH 080/640] perf: Make __perf_event_sched_out static __perf_event_sched_out doesn't need to be globally available, make it static. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index eae6ff693604..c4e90b8cd60d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1049,8 +1049,8 @@ static int perf_event_refresh(struct perf_event *event, int refresh) return 0; } -void __perf_event_sched_out(struct perf_event_context *ctx, - struct perf_cpu_context *cpuctx) +static void __perf_event_sched_out(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx) { struct perf_event *event; From 5b0311e1f2464547fc6f17a82d7ea2538c8c7a70 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 17 Jan 2010 11:59:13 +0100 Subject: [PATCH 081/640] perf: Allow pinned and flexible groups to be scheduled separately Tune the scheduling helpers so that we can choose to schedule either pinned and/or flexible groups from a context. And while at it, refactor a bit the naming of these helpers to make these more consistent and flexible. There is no (intended) change in scheduling behaviour in this patch. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- kernel/perf_event.c | 137 ++++++++++++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 44 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index c4e90b8cd60d..bfc4ee015c87 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1049,8 +1049,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh) return 0; } -static void __perf_event_sched_out(struct perf_event_context *ctx, - struct perf_cpu_context *cpuctx) +enum event_type_t { + EVENT_FLEXIBLE = 0x1, + EVENT_PINNED = 0x2, + EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, +}; + +static void ctx_sched_out(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + enum event_type_t event_type) { struct perf_event *event; @@ -1061,13 +1068,18 @@ static void __perf_event_sched_out(struct perf_event_context *ctx, update_context_time(ctx); perf_disable(); - if (ctx->nr_active) { + if (!ctx->nr_active) + goto out_enable; + + if (event_type & EVENT_PINNED) list_for_each_entry(event, &ctx->pinned_groups, group_entry) group_sched_out(event, cpuctx, ctx); + if (event_type & EVENT_FLEXIBLE) list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); - } + + out_enable: perf_enable(); out: raw_spin_unlock(&ctx->lock); @@ -1229,15 +1241,13 @@ void perf_event_task_sched_out(struct task_struct *task, rcu_read_unlock(); if (do_switch) { - __perf_event_sched_out(ctx, cpuctx); + ctx_sched_out(ctx, cpuctx, EVENT_ALL); cpuctx->task_ctx = NULL; } } -/* - * Called with IRQs disabled - */ -static void __perf_event_task_sched_out(struct perf_event_context *ctx) +static void task_ctx_sched_out(struct perf_event_context *ctx, + enum event_type_t event_type) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); @@ -1247,39 +1257,34 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx) if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) return; - __perf_event_sched_out(ctx, cpuctx); + ctx_sched_out(ctx, cpuctx, event_type); cpuctx->task_ctx = NULL; } /* * Called with IRQs disabled */ -static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) +static void __perf_event_task_sched_out(struct perf_event_context *ctx) { - __perf_event_sched_out(&cpuctx->ctx, cpuctx); + task_ctx_sched_out(ctx, EVENT_ALL); +} + +/* + * Called with IRQs disabled + */ +static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, + enum event_type_t event_type) +{ + ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); } static void -__perf_event_sched_in(struct perf_event_context *ctx, - struct perf_cpu_context *cpuctx) +ctx_pinned_sched_in(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + int cpu) { - int cpu = smp_processor_id(); struct perf_event *event; - int can_add_hw = 1; - raw_spin_lock(&ctx->lock); - ctx->is_active = 1; - if (likely(!ctx->nr_events)) - goto out; - - ctx->timestamp = perf_clock(); - - perf_disable(); - - /* - * First go through the list and put on any pinned groups - * in order to give them the best chance of going on. - */ list_for_each_entry(event, &ctx->pinned_groups, group_entry) { if (event->state <= PERF_EVENT_STATE_OFF) continue; @@ -1298,6 +1303,15 @@ __perf_event_sched_in(struct perf_event_context *ctx, event->state = PERF_EVENT_STATE_ERROR; } } +} + +static void +ctx_flexible_sched_in(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + int cpu) +{ + struct perf_event *event; + int can_add_hw = 1; list_for_each_entry(event, &ctx->flexible_groups, group_entry) { /* Ignore events in OFF or ERROR state */ @@ -1314,11 +1328,53 @@ __perf_event_sched_in(struct perf_event_context *ctx, if (group_sched_in(event, cpuctx, ctx, cpu)) can_add_hw = 0; } +} + +static void +ctx_sched_in(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + enum event_type_t event_type) +{ + int cpu = smp_processor_id(); + + raw_spin_lock(&ctx->lock); + ctx->is_active = 1; + if (likely(!ctx->nr_events)) + goto out; + + ctx->timestamp = perf_clock(); + + perf_disable(); + + /* + * First go through the list and put on any pinned groups + * in order to give them the best chance of going on. + */ + if (event_type & EVENT_PINNED) + ctx_pinned_sched_in(ctx, cpuctx, cpu); + + /* Then walk through the lower prio flexible groups */ + if (event_type & EVENT_FLEXIBLE) + ctx_flexible_sched_in(ctx, cpuctx, cpu); + perf_enable(); out: raw_spin_unlock(&ctx->lock); } +static void task_ctx_sched_in(struct task_struct *task, + enum event_type_t event_type) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = task->perf_event_ctxp; + + if (likely(!ctx)) + return; + if (cpuctx->task_ctx == ctx) + return; + ctx_sched_in(ctx, cpuctx, event_type); + cpuctx->task_ctx = ctx; +} /* * Called from scheduler to add the events of the current task * with interrupts disabled. @@ -1332,22 +1388,15 @@ __perf_event_sched_in(struct perf_event_context *ctx, */ void perf_event_task_sched_in(struct task_struct *task) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_event_context *ctx = task->perf_event_ctxp; - - if (likely(!ctx)) - return; - if (cpuctx->task_ctx == ctx) - return; - __perf_event_sched_in(ctx, cpuctx); - cpuctx->task_ctx = ctx; + task_ctx_sched_in(task, EVENT_ALL); } -static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx) +static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, + enum event_type_t event_type) { struct perf_event_context *ctx = &cpuctx->ctx; - __perf_event_sched_in(ctx, cpuctx); + ctx_sched_in(ctx, cpuctx, event_type); } #define MAX_INTERRUPTS (~0ULL) @@ -1476,17 +1525,17 @@ void perf_event_task_tick(struct task_struct *curr) if (ctx) perf_ctx_adjust_freq(ctx); - perf_event_cpu_sched_out(cpuctx); + cpu_ctx_sched_out(cpuctx, EVENT_ALL); if (ctx) - __perf_event_task_sched_out(ctx); + task_ctx_sched_out(ctx, EVENT_ALL); rotate_ctx(&cpuctx->ctx); if (ctx) rotate_ctx(ctx); - perf_event_cpu_sched_in(cpuctx); + cpu_ctx_sched_in(cpuctx, EVENT_ALL); if (ctx) - perf_event_task_sched_in(curr); + task_ctx_sched_in(curr, EVENT_ALL); } static int event_enable_on_exec(struct perf_event *event, From 7defb0f879bbcfe29e3c6f29d685d4f29b7a0700 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 17 Jan 2010 12:15:31 +0100 Subject: [PATCH 082/640] perf: Don't schedule out/in pinned events on task tick We don't need to schedule in/out pinned events on task tick, now that pinned and flexible groups can be scheduled separately. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- kernel/perf_event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index bfc4ee015c87..a90ae694cbc1 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1525,17 +1525,17 @@ void perf_event_task_tick(struct task_struct *curr) if (ctx) perf_ctx_adjust_freq(ctx); - cpu_ctx_sched_out(cpuctx, EVENT_ALL); + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_out(ctx, EVENT_ALL); + task_ctx_sched_out(ctx, EVENT_FLEXIBLE); rotate_ctx(&cpuctx->ctx); if (ctx) rotate_ctx(ctx); - cpu_ctx_sched_in(cpuctx, EVENT_ALL); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_in(curr, EVENT_ALL); + task_ctx_sched_in(curr, EVENT_FLEXIBLE); } static int event_enable_on_exec(struct perf_event *event, From 329c0e012b99fa2325a0be205c052e4aba690f16 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 17 Jan 2010 12:56:05 +0100 Subject: [PATCH 083/640] perf: Better order flexible and pinned scheduling When a task gets scheduled in. We don't touch the cpu bound events so the priority order becomes: cpu pinned, cpu flexible, task pinned, task flexible. So schedule out cpu flexibles when a new task context gets in and correctly order the groups to schedule in: task pinned, cpu flexible, task flexible. Cpu pinned groups don't need to be touched at this time. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- kernel/perf_event.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index a90ae694cbc1..edc46b92b508 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1362,6 +1362,14 @@ ctx_sched_in(struct perf_event_context *ctx, raw_spin_unlock(&ctx->lock); } +static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, + enum event_type_t event_type) +{ + struct perf_event_context *ctx = &cpuctx->ctx; + + ctx_sched_in(ctx, cpuctx, event_type); +} + static void task_ctx_sched_in(struct task_struct *task, enum event_type_t event_type) { @@ -1388,15 +1396,27 @@ static void task_ctx_sched_in(struct task_struct *task, */ void perf_event_task_sched_in(struct task_struct *task) { - task_ctx_sched_in(task, EVENT_ALL); -} + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_event_context *ctx = task->perf_event_ctxp; -static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type) -{ - struct perf_event_context *ctx = &cpuctx->ctx; + if (likely(!ctx)) + return; - ctx_sched_in(ctx, cpuctx, event_type); + if (cpuctx->task_ctx == ctx) + return; + + /* + * We want to keep the following priority order: + * cpu pinned (that don't need to move), task pinned, + * cpu flexible, task flexible. + */ + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); + + ctx_sched_in(ctx, cpuctx, EVENT_PINNED); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); + + cpuctx->task_ctx = ctx; } #define MAX_INTERRUPTS (~0ULL) From 580d9e00fdfb85e65c5097dcd739c6efcdbadc96 Mon Sep 17 00:00:00 2001 From: Motohiro KOSAKI Date: Mon, 18 Jan 2010 21:35:05 -0500 Subject: [PATCH 084/640] kprobetrace, doc: Shell needs single quote to use $ character Shell interprets $val as shell variable, thus we need quote if we use the echo command. Signed-off-by: KOSAKI Motohiro Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100119023505.31880.17367.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index f30978e001f8..ab57f02e53bb 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -79,7 +79,7 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events + echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording 1st to 4th arguments as "myprobe" event. Note, which register/stack entry is @@ -88,7 +88,7 @@ the ABI, please try to use probe subcommand of perf-tools (you can find it under tools/perf/). As this example shows, users can choose more familiar names for each arguments. - echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events + echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events This sets a kretprobe on the return point of do_sys_open() function with recording return value as "myretprobe" event. From df3ab708b787a2b35de5101452bd51d4a8ae0ded Mon Sep 17 00:00:00 2001 From: Motohiro KOSAKI Date: Mon, 18 Jan 2010 21:35:12 -0500 Subject: [PATCH 085/640] kprobetrace, doc: Add the explanation to remove probe points Latest kprobetrace can remove probe points selectively, thus the documentation should be updated too. Signed-off-by: KOSAKI Motohiro Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100119023512.31880.35535.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index ab57f02e53bb..a9100b28eb84 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -24,6 +24,7 @@ Synopsis of kprobe_events ------------------------- p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + -:[GRP/]EVENT : Clear a probe GRP : Group name. If omitted, use "kprobes" for it. EVENT : Event name. If omitted, the event name is generated @@ -122,6 +123,12 @@ REC->dfd, REC->filename, REC->flags, REC->mode This clears all probe points. + Or, + + echo -:myprobe >> kprobe_events + + This clears probe points selectively. + Right after definition, each event is disabled by default. For tracing these events, you need to enable it. From e8d433f335d44028d41af231ef5c52fd8a9b280b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Jan 2010 15:59:19 -0200 Subject: [PATCH 086/640] perf archive: Add documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also makes it appear on the 'perf --help' output, i.e. util/generate-cmdlist.sh now takes it into account. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263837559-24168-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-archive.txt | 22 ++++++++++++++++++++++ tools/perf/command-list.txt | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 tools/perf/Documentation/perf-archive.txt diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt new file mode 100644 index 000000000000..fae174dc7d01 --- /dev/null +++ b/tools/perf/Documentation/perf-archive.txt @@ -0,0 +1,22 @@ +perf-archive(1) +=============== + +NAME +---- +perf-archive - Create archive with object files with build-ids found in perf.data file + +SYNOPSIS +-------- +[verse] +'perf archive' [file] + +DESCRIPTION +----------- +This command runs runs perf-buildid-list --with-hits, and collects the files +with the buildids found so that analisys of perf.data contents can be possible +on another machine. + + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1] diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index f73d1d90f5bd..cf6444dfd73a 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -3,7 +3,7 @@ # command name category [deprecated] [common] # perf-annotate mainporcelain common -perf-archive mainporcelain +perf-archive mainporcelain common perf-bench mainporcelain common perf-buildid-list mainporcelain common perf-diff mainporcelain common From d5526d8cb8e5aa3349c1ff4e409ad9b4cdac380c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Jan 2010 18:21:42 -0200 Subject: [PATCH 087/640] perf archive: Fix installation steps in the Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix these warning: acme@parisc:~/git/linux-2.6-tip$ make -C tools/perf/ install make: Entering directory `/home/acme/git/linux-2.6-tip/tools/perf' Makefile:833: warning: overriding commands for target `perf-archive' Makefile:822: warning: ignoring old commands for target `perf-archive' Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263846102-24841-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d739552036d0..ddbeeee9ade2 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -311,7 +311,6 @@ PROGRAMS += perf # List built-in command $C whose implementation cmd_$C() is not in # builtin-$C.o but is linked in as part of some other command. # -BUILT_INS += perf-archive # what 'all' will build and 'install' will install, in perfexecdir ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) @@ -1004,6 +1003,7 @@ install: all $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(INSTALL) perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' From f162f87ad6e98e8bfb2362955da46bed7b2514be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Jan 2010 10:36:13 -0200 Subject: [PATCH 088/640] perf symbols: Set dso->kernel when handling the fake vmlinux MMAP event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because it may be possible that there was no buildid section, where we would set this to 1. Found while analysing a perf.data file collected on an ARM machine where an explicitely specified vmlinux was being disregarded. Reported-by: Jamie Iles Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263904574-30732-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dc13cad828d7..bbaee61c1683 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -373,6 +373,7 @@ int event__process_mmap(event_t *self, struct perf_session *session) if (kernel == NULL) goto out_problem; + kernel->kernel = 1; if (__map_groups__create_kernel_maps(&session->kmaps, session->vmlinux_maps, kernel) < 0) From dc8d6ab2b61a2d92b5d7438565ccd20b29724cb2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Jan 2010 10:36:14 -0200 Subject: [PATCH 089/640] perf symbols: Use only --vmlinux if specified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found while analysing a perf.data file collected on an ARM machine where an explicitely specified vmlinux was being disregarded. Reported-by: Jamie Iles Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1263904574-30732-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 75 ++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a4e745934584..b6ab23dd5f9f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1572,7 +1572,7 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return -1; dso__set_loaded(self, map->type); - err = dso__load_sym(self, map, session, self->long_name, fd, filter, 1, 0); + err = dso__load_sym(self, map, session, vmlinux, fd, filter, 1, 0); close(fd); return err; @@ -1584,6 +1584,26 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, int err; const char *kallsyms_filename = NULL; char *kallsyms_allocated_filename = NULL; + /* + * Step 1: if the user specified a vmlinux filename, use it and only + * it, reporting errors to the user if it cannot be used. + * + * For instance, try to analyse an ARM perf.data file _without_ a + * build-id, or if the user specifies the wrong path to the right + * vmlinux file, obviously we can't fallback to another vmlinux (a + * x86_86 one, on the machine where analysis is being performed, say), + * or worse, /proc/kallsyms. + * + * If the specified file _has_ a build-id and there is a build-id + * section in the perf.data file, we will still do the expected + * validation in dso__load_vmlinux and will bail out if they don't + * match. + */ + if (symbol_conf.vmlinux_name != NULL) { + err = dso__load_vmlinux(self, map, session, + symbol_conf.vmlinux_name, filter); + goto out_try_fixup; + } if (vmlinux_path != NULL) { int i; @@ -1618,46 +1638,41 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, goto do_kallsyms; } } - + /* + * Now look if we have it on the build-id cache in + * $HOME/.debug/[kernel.kallsyms]. + */ build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id); if (asprintf(&kallsyms_allocated_filename, "%s/.debug/[kernel.kallsyms]/%s", - getenv("HOME"), sbuild_id) != -1) { - if (access(kallsyms_filename, F_OK)) { - kallsyms_filename = kallsyms_allocated_filename; - goto do_kallsyms; - } - free(kallsyms_allocated_filename); - kallsyms_allocated_filename = NULL; - } - - goto do_vmlinux; - } - - if (self->long_name[0] == '[') { - kallsyms_filename = "/proc/kallsyms"; - goto do_kallsyms; - } - -do_vmlinux: - err = dso__load_vmlinux(self, map, session, self->long_name, filter); - if (err <= 0) { - if (self->has_build_id) + getenv("HOME"), sbuild_id) == -1) return -1; - pr_info("The file %s cannot be used, " - "trying to use /proc/kallsyms...", self->long_name); -do_kallsyms: - err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter); - if (err > 0 && kallsyms_filename == NULL) - dso__set_long_name(self, strdup("[kernel.kallsyms]")); - free(kallsyms_allocated_filename); + if (access(kallsyms_filename, F_OK)) { + free(kallsyms_allocated_filename); + return -1; + } + + kallsyms_filename = kallsyms_allocated_filename; + } else { + /* + * Last resort, if we don't have a build-id and couldn't find + * any vmlinux file, try the running kernel kallsyms table. + */ + kallsyms_filename = "/proc/kallsyms"; } +do_kallsyms: + err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter); + free(kallsyms_allocated_filename); + +out_try_fixup: if (err > 0) { out_fixup: + if (kallsyms_filename == NULL) + dso__set_long_name(self, strdup("[kernel.kallsyms]")); map__fixup_start(map); map__fixup_end(map); } From ef12a141306c90336a3a10d40213ecd98624d274 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Jan 2010 15:28:45 -0200 Subject: [PATCH 090/640] perf buildid-cache: Add new command to manage build-id cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now it just has operations to examine a given file, find its build-id and add or remove it to/from the cache. Useful, for instance, when adding binaries sent together with a perf.data file, so that we can add them to the cache and have the tools find it when resolving symbols. It'll also manage the size of the cache like 'ccache' does. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264008525-29025-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- .../perf/Documentation/perf-buildid-cache.txt | 33 +++++ tools/perf/Makefile | 1 + tools/perf/builtin-buildid-cache.c | 133 ++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/command-list.txt | 1 + tools/perf/perf.c | 1 + tools/perf/util/header.c | 72 ++++++++-- tools/perf/util/header.h | 5 + tools/perf/util/symbol.c | 4 +- tools/perf/util/symbol.h | 2 +- 10 files changed, 241 insertions(+), 12 deletions(-) create mode 100644 tools/perf/Documentation/perf-buildid-cache.txt create mode 100644 tools/perf/builtin-buildid-cache.c diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt new file mode 100644 index 000000000000..88bc3b519746 --- /dev/null +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -0,0 +1,33 @@ +perf-buildid-cache(1) +===================== + +NAME +---- +perf-buildid-cache - Manage build-id cache. + +SYNOPSIS +-------- +[verse] +'perf buildid-list ' + +DESCRIPTION +----------- +This command manages the build-id cache. It can add and remove files to the +cache. In the future it should as well purge older entries, set upper limits +for the space used by the cache, etc. + +OPTIONS +------- +-a:: +--add=:: + Add specified file to the cache. +-r:: +--remove=:: + Remove specified file to the cache. +-v:: +--verbose:: + Be more verbose. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index ddbeeee9ade2..9b173e66fb41 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -445,6 +445,7 @@ BUILTIN_OBJS += builtin-diff.o BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-sched.o BUILTIN_OBJS += builtin-buildid-list.o +BUILTIN_OBJS += builtin-buildid-cache.o BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c new file mode 100644 index 000000000000..30a05f552c96 --- /dev/null +++ b/tools/perf/builtin-buildid-cache.c @@ -0,0 +1,133 @@ +/* + * builtin-buildid-cache.c + * + * Builtin buildid-cache command: Manages build-id cache + * + * Copyright (C) 2010, Red Hat Inc. + * Copyright (C) 2010, Arnaldo Carvalho de Melo + */ +#include "builtin.h" +#include "perf.h" +#include "util/cache.h" +#include "util/debug.h" +#include "util/header.h" +#include "util/parse-options.h" +#include "util/strlist.h" +#include "util/symbol.h" + +static char const *add_name_list_str, *remove_name_list_str; + +static const char * const buildid_cache_usage[] = { + "perf buildid-cache []", + NULL +}; + +static const struct option buildid_cache_options[] = { + OPT_STRING('a', "add", &add_name_list_str, + "file list", "file(s) to add"), + OPT_STRING('r', "remove", &remove_name_list_str, "file list", + "file(s) to remove"), + OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose"), + OPT_END() +}; + +static int build_id_cache__add_file(const char *filename, const char *debugdir) +{ + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + u8 build_id[BUILD_ID_SIZE]; + int err; + + if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { + pr_debug("Couldn't read a build-id in %s\n", filename); + return -1; + } + + build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + err = build_id_cache__add_s(sbuild_id, debugdir, filename, false); + if (verbose) + pr_info("Adding %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); + return err; +} + +static int build_id_cache__remove_file(const char *filename __used, + const char *debugdir __used) +{ + u8 build_id[BUILD_ID_SIZE]; + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + int err; + + if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { + pr_debug("Couldn't read a build-id in %s\n", filename); + return -1; + } + + build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + err = build_id_cache__remove_s(sbuild_id, debugdir); + if (verbose) + pr_info("Removing %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); + + return err; +} + +static int __cmd_buildid_cache(void) +{ + struct strlist *list; + struct str_node *pos; + char debugdir[PATH_MAX]; + + snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), + DEBUG_CACHE_DIR); + + if (add_name_list_str) { + list = strlist__new(true, add_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__add_file(pos->s, debugdir)) { + if (errno == EEXIST) { + pr_debug("%s already in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't add %s: %s\n", + pos->s, strerror(errno)); + } + + strlist__delete(list); + } + } + + if (remove_name_list_str) { + list = strlist__new(true, remove_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__remove_file(pos->s, debugdir)) { + if (errno == ENOENT) { + pr_debug("%s wasn't in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't remove %s: %s\n", + pos->s, strerror(errno)); + } + + strlist__delete(list); + } + } + + return 0; +} + +int cmd_buildid_cache(int argc, const char **argv, const char *prefix __used) +{ + argc = parse_options(argc, argv, buildid_cache_options, + buildid_cache_usage, 0); + + if (symbol__init() < 0) + return -1; + + setup_pager(); + return __cmd_buildid_cache(); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 18035b1f16c7..dee97cfe3794 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -16,6 +16,7 @@ extern int check_pager_config(const char *cmd); extern int cmd_annotate(int argc, const char **argv, const char *prefix); extern int cmd_bench(int argc, const char **argv, const char *prefix); +extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix); extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); extern int cmd_diff(int argc, const char **argv, const char *prefix); extern int cmd_help(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index cf6444dfd73a..9afcff2e3ae5 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -5,6 +5,7 @@ perf-annotate mainporcelain common perf-archive mainporcelain common perf-bench mainporcelain common +perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common perf-diff mainporcelain common perf-list mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index fc89005c3e51..05c861c045d5 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -285,6 +285,7 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; static struct cmd_struct commands[] = { + { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, { "diff", cmd_diff, 0 }, { "help", cmd_help, 0 }, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1b65fed0dd2d..2bb2bdb1f456 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -231,32 +231,29 @@ static int dsos__write_buildid_table(int fd) return err; } -static int dso__cache_build_id(struct dso *self, const char *debugdir) +int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, + const char *name, bool is_kallsyms) { const size_t size = PATH_MAX; char *filename = malloc(size), - *linkname = malloc(size), *targetname, *sbuild_id; + *linkname = malloc(size), *targetname; int len, err = -1; - bool is_kallsyms = self->kernel && self->long_name[0] != '/'; if (filename == NULL || linkname == NULL) goto out_free; len = snprintf(filename, size, "%s%s%s", - debugdir, is_kallsyms ? "/" : "", self->long_name); + debugdir, is_kallsyms ? "/" : "", name); if (mkdir_p(filename, 0755)) goto out_free; - len += snprintf(filename + len, sizeof(filename) - len, "/"); - sbuild_id = filename + len; - build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id); + snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id); if (access(filename, F_OK)) { if (is_kallsyms) { if (copyfile("/proc/kallsyms", filename)) goto out_free; - } else if (link(self->long_name, filename) && - copyfile(self->long_name, filename)) + } else if (link(name, filename) && copyfile(name, filename)) goto out_free; } @@ -278,6 +275,63 @@ out_free: return err; } +static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, + const char *name, const char *debugdir, + bool is_kallsyms) +{ + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(build_id, build_id_size, sbuild_id); + + return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms); +} + +int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) +{ + const size_t size = PATH_MAX; + char *filename = malloc(size), + *linkname = malloc(size); + int err = -1; + + if (filename == NULL || linkname == NULL) + goto out_free; + + snprintf(linkname, size, "%s/.build-id/%.2s/%s", + debugdir, sbuild_id, sbuild_id + 2); + + if (access(linkname, F_OK)) + goto out_free; + + if (readlink(linkname, filename, size) < 0) + goto out_free; + + if (unlink(linkname)) + goto out_free; + + /* + * Since the link is relative, we must make it absolute: + */ + snprintf(linkname, size, "%s/.build-id/%.2s/%s", + debugdir, sbuild_id, filename); + + if (unlink(linkname)) + goto out_free; + + err = 0; +out_free: + free(filename); + free(linkname); + return err; +} + +static int dso__cache_build_id(struct dso *self, const char *debugdir) +{ + bool is_kallsyms = self->kernel && self->long_name[0] != '/'; + + return build_id_cache__add_b(self->build_id, sizeof(self->build_id), + self->long_name, debugdir, is_kallsyms); +} + static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) { struct dso *pos; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index ccc8540feccd..82a6af72d4cc 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -5,6 +5,7 @@ #include #include #include "types.h" +#include "event.h" #include @@ -84,4 +85,8 @@ int perf_header__process_sections(struct perf_header *self, int fd, struct perf_header *ph, int feat, int fd)); +int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, + const char *name, bool is_kallsyms); +int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); + #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b6ab23dd5f9f..6f30fe18c265 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -345,10 +345,10 @@ void dso__sort_by_name(struct dso *self, enum map_type type) &self->symbols[type]); } -int build_id__sprintf(u8 *self, int len, char *bf) +int build_id__sprintf(const u8 *self, int len, char *bf) { char *bid = bf; - u8 *raw = self; + const u8 *raw = self; int i; for (i = 0; i < len; ++i) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 525085fd0735..ffe0b0f2e5d3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -144,7 +144,7 @@ struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type, int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); -int build_id__sprintf(u8 *self, int len, char *bf); +int build_id__sprintf(const u8 *self, int len, char *bf); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start)); From 71318da9d2a46b9986dcecb44b7ae978753ca4dd Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 21 Jan 2010 12:12:52 +0300 Subject: [PATCH 091/640] ARM: MX3: Fixed typo in declared enum type name. To distinguish between mx31lite and mx31lilly boards better to use different enum types. Signed-off-by: Vladimir Zapolskiy Acked-by: Daniel Mack Signed-off-by: Sascha Hauer --- arch/arm/plat-mxc/include/mach/board-mx31lite.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-mxc/include/mach/board-mx31lite.h b/arch/arm/plat-mxc/include/mach/board-mx31lite.h index 0184b638c268..2b2da0367578 100644 --- a/arch/arm/plat-mxc/include/mach/board-mx31lite.h +++ b/arch/arm/plat-mxc/include/mach/board-mx31lite.h @@ -25,7 +25,7 @@ #ifndef __ASSEMBLY__ -enum mx31lilly_boards { +enum mx31lite_boards { MX31LITE_NOBOARD = 0, MX31LITE_DB = 1, }; From 0406ad336c066190770cbf350b552d608e43ed09 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 20 Jan 2010 00:06:30 -0700 Subject: [PATCH 092/640] ACPI: processor: add kernel command line support for early _PDC eval Allow platforms not listed in DMI table to opt-in and evaluate _PDC early. Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 4 ++++ drivers/acpi/processor_pdc.c | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 736d45602886..826b6e148316 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -199,6 +199,10 @@ and is between 256 and 4096 characters. It is defined in the file acpi_display_output=video See above. + acpi_early_pdc_eval [HW,ACPI] Evaluate processor _PDC methods + early. Needed on some platforms to properly + initialize the EC. + acpi_irq_balance [HW,ACPI] ACPI will balance active IRQs default in APIC mode diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 7247819dbd80..3bbafe9576ae 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -151,6 +151,13 @@ static int set_early_pdc_optin(const struct dmi_system_id *id) return 0; } +static int param_early_pdc_optin(char *s) +{ + early_pdc_optin = 1; + return 1; +} +__setup("acpi_early_pdc_eval", param_early_pdc_optin); + static struct dmi_system_id __cpuinitdata early_pdc_optin_table[] = { { set_early_pdc_optin, "HP Envy", { From a4932299d03a1c20e58e4cc40a66fb0a048fb3a7 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 20 Jan 2010 00:06:35 -0700 Subject: [PATCH 093/640] ACPI: processor: only evaluate _PDC once per processor If we evaluate _PDC in the early path, we do not want to evaluate it again when the processor driver is loaded. Cc: Venkatesh Pallipadi Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- drivers/acpi/processor_pdc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 3bbafe9576ae..e306ba9aa34e 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -125,6 +125,8 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in) return status; } +static int early_pdc_done; + void acpi_processor_set_pdc(acpi_handle handle) { struct acpi_object_list *obj_list; @@ -132,6 +134,9 @@ void acpi_processor_set_pdc(acpi_handle handle) if (arch_has_acpi_pdc() == false) return; + if (early_pdc_done) + return; + obj_list = acpi_processor_alloc_pdc(); if (!obj_list) return; @@ -199,4 +204,6 @@ void __init acpi_early_processor_set_pdc(void) acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, early_init_pdc, NULL, NULL, NULL); + + early_pdc_done = 1; } From eceb784cec4dc0fcc2993d9ee4a7c0d111ada80a Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 25 Jan 2010 10:35:16 +0800 Subject: [PATCH 094/640] drm/i915: disable hotplug detect before Ironlake CRT detect This tries to fix CRT detect loop hang seen on some Ironlake form factor, to clear up hotplug detect state before taking CRT detect to make sure next hotplug detect cycle is consistent. Cc: Stable Team Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_crt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index ddefc871edfe..79dd4026586f 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -157,6 +157,9 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector) adpa = I915_READ(PCH_ADPA); adpa &= ~ADPA_CRT_HOTPLUG_MASK; + /* disable HPD first */ + I915_WRITE(PCH_ADPA, adpa); + (void)I915_READ(PCH_ADPA); adpa |= (ADPA_CRT_HOTPLUG_PERIOD_128 | ADPA_CRT_HOTPLUG_WARMUP_10MS | From 0ce907f89118aa8748f950700b6919b1d8d8a038 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 23 Jan 2010 20:26:35 +0000 Subject: [PATCH 095/640] drm/i915: Prevent use of uninitialized pointers along error path. X.org hang with [drm:i915_gem_do_execbuffer] *ERROR* in dmesg http://bugzilla.kernel.org/show_bug.cgi?id=15114 Matej found he was hitting an error path within i915_gem_do_execbuffer() that led to the attempt to dereference an uninitialised pointer during cleanup. This path used to be safe as we used to calloc the object lists, but this was changed in c8e0f93. Daniel Vetter had also spotted this error and proposed a similar patch. [ 6379.732892] [drm:i915_gem_do_execbuffer] *ERROR* Object ffff880098cd6540 appears more than once in object list [ 6379.740976] [drm:i915_gem_do_execbuffer] *ERROR* Object ffff880098cd6540 appears more than once in object list [ 6379.740995] BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 [ 6379.740998] IP: [] i915_gem_do_execbuffer+0xba5/0x1260 [ 6379.741006] PGD babab067 PUD bb435067 PMD 0 [ 6379.741010] Oops: 0002 [#1] PREEMPT SMP [ 6379.741014] last sysfs file: /sys/devices/pci0000:00/0000:00:1c.2/0000:06:00.0/ieee80211/phy0/rfkill0/state [ 6379.741017] CPU 1 [ 6379.741021] Pid: 2186, comm: X Not tainted 2.6.33-rc4-00399-g24bc734 #142 M11D/ESPRIMO Mobile M9400 [ 6379.741023] RIP: 0010:[] [] i915_gem_do_execbuffer+0xba5/0x1260 [ 6379.741027] RSP: 0018:ffff8800b9047b78 EFLAGS: 00213206 [ 6379.741029] RAX: 0000000000000000 RBX: 000000000000004f RCX: ffff880098cac800 [ 6379.741032] RDX: ffff880098caca78 RSI: ffff8800b9047c98 RDI: ffff880098cd6540 [ 6379.741034] RBP: ffff8800b9047c78 R08: ffffffff814b96b5 R09: 0000000000000006 [ 6379.741036] R10: 0000000000000000 R11: 0000000000000003 R12: 000000000000004e [ 6379.741038] R13: 00000000fffffff7 R14: 0000000000000000 R15: 0000000000000001 [ 6379.741041] FS: 0000000000000000(0000) GS:ffff880001900000(0063) knlGS:00000000f72636c0 [ 6379.741043] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 6379.741041] FS: 0000000000000000(0000) GS:ffff880001900000(0063) knlGS:00000000f72636c0 [ 6379.741043] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 6379.741045] CR2: 00000000000000a0 CR3: 00000000b9000000 CR4: 00000000000006e0 [ 6379.741048] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6379.741050] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 6379.741052] Process X (pid: 2186, threadinfo ffff8800b9046000, task ffff8800bb5d8000) [ 6379.741054] Stack: [ 6379.741055] ffffc90023f57000 ffffc90023f56fff ffffc90023f56fff ffffc90023f55000 [ 6379.741059] <0> ffff8800b9047c98 ffff8800bb43c840 ffff8800bf1de800 ffff8800bf1de820 [ 6379.741063] <0> ffff8800b9047bd8 ffff880098cac800 0000000000000000 0000000000000002 [ 6379.741068] Call Trace: [ 6379.741072] [] ? i915_gem_execbuffer+0x6b/0x370 [ 6379.741077] [] ? __vmalloc_node+0xa2/0xb0 [ 6379.741080] [] ? i915_gem_execbuffer+0x6b/0x370 [ 6379.741083] [] i915_gem_execbuffer+0x1b6/0x370 [ 6379.741086] [] drm_ioctl+0x1d5/0x460 [ 6379.741089] [] ? i915_gem_execbuffer+0x0/0x370 [ 6379.741093] [] i915_compat_ioctl+0x45/0x50 [ 6379.741097] [] compat_sys_ioctl+0xa9/0x1570 [ 6379.741102] [] ? vfs_read+0x13c/0x1a0 [ 6379.741106] [] sysenter_dispatch+0x7/0x2b [ 6379.741108] Code: 08 85 c0 74 52 31 db 0f 1f 80 00 00 00 00 48 63 c3 48 8b 8d 68 ff ff ff 48 8d 14 c1 48 8b 02 48 85 c0 74 25 48 8b 80 80 00 00 00 80 a0 00 00 00 00 00 00 00 48 8b 3a 48 85 ff 74 0c 48 c7 c6 [ 6379.741142] RIP [] i915_gem_do_execbuffer+0xba5/0x1260 [ 6379.741145] RSP [ 6379.741147] CR2: 00000000000000a0 [ 6379.741159] ---[ end trace 0598809afa4c31db ]--- Reported-by: Matej Laitl Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0c67924ca80c..1ef7ec4f38fe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3742,6 +3742,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (object_list[i] == NULL) { DRM_ERROR("Invalid object handle %d at index %d\n", exec_list[i].handle, i); + /* prevent error path from reading uninitialized data */ + args->buffer_count = i + 1; ret = -EBADF; goto err; } @@ -3750,6 +3752,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (obj_priv->in_execbuffer) { DRM_ERROR("Object %p appears more than once in object list\n", object_list[i]); + /* prevent error path from reading uninitialized data */ + args->buffer_count = i + 1; ret = -EBADF; goto err; } From c062df61966405b0be6743bfb0cf300dca2fa7c2 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Sat, 23 Jan 2010 00:12:58 +0800 Subject: [PATCH 096/640] drm/i915: enable vblank interrupt on ironlake so far vblank interrupt on ironlake is disabled, this would cause bad gfx performance if userspace calls drm_wait_vblank. This patch enables vblank interrupt on ironlake and follows vblank get/put model. Signed-off-by: Li Peng Acked-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_irq.c | 30 ++++++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 1 + 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 89a071a3e6fb..e7472d82132a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -309,6 +309,12 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev) if (de_iir & DE_GSE) ironlake_opregion_gse_intr(dev); + if (de_iir & DE_PIPEA_VBLANK) + drm_handle_vblank(dev, 0); + + if (de_iir & DE_PIPEB_VBLANK) + drm_handle_vblank(dev, 1); + /* check event from PCH */ if ((de_iir & DE_PCH_EVENT) && (pch_iir & SDE_HOTPLUG_MASK)) { @@ -844,11 +850,11 @@ int i915_enable_vblank(struct drm_device *dev, int pipe) if (!(pipeconf & PIPEACONF_ENABLE)) return -EINVAL; - if (IS_IRONLAKE(dev)) - return 0; - spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags); - if (IS_I965G(dev)) + if (IS_IRONLAKE(dev)) + ironlake_enable_display_irq(dev_priv, (pipe == 0) ? + DE_PIPEA_VBLANK: DE_PIPEB_VBLANK); + else if (IS_I965G(dev)) i915_enable_pipestat(dev_priv, pipe, PIPE_START_VBLANK_INTERRUPT_ENABLE); else @@ -866,13 +872,14 @@ void i915_disable_vblank(struct drm_device *dev, int pipe) drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; unsigned long irqflags; - if (IS_IRONLAKE(dev)) - return; - spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags); - i915_disable_pipestat(dev_priv, pipe, - PIPE_VBLANK_INTERRUPT_ENABLE | - PIPE_START_VBLANK_INTERRUPT_ENABLE); + if (IS_IRONLAKE(dev)) + ironlake_disable_display_irq(dev_priv, (pipe == 0) ? + DE_PIPEA_VBLANK: DE_PIPEB_VBLANK); + else + i915_disable_pipestat(dev_priv, pipe, + PIPE_VBLANK_INTERRUPT_ENABLE | + PIPE_START_VBLANK_INTERRUPT_ENABLE); spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags); } @@ -1015,7 +1022,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; /* enable kind of interrupts always enabled */ - u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT; + u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT | + DE_PIPEA_VBLANK | DE_PIPEB_VBLANK; u32 render_mask = GT_USER_INTERRUPT; u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG | SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 45da78ef4a92..2cc489b6629e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1638,6 +1638,7 @@ static void ironlake_crtc_dpms(struct drm_crtc *crtc, int mode) case DRM_MODE_DPMS_OFF: DRM_DEBUG_KMS("crtc %d dpms off\n", pipe); + drm_vblank_off(dev, pipe); /* Disable display plane */ temp = I915_READ(dspcntr_reg); if ((temp & DISPLAY_PLANE_ENABLE) != 0) { From 8a6c77d645d9111f7bc01f908464301f5c3e0a05 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Thu, 21 Jan 2010 18:45:46 +0800 Subject: [PATCH 097/640] drm/i915: Fix the device info of Pineview Pineview doesn't has CXSR and need GTT-based hardware status page. It fixes a X boot hung issue on Pinview since commit cfdf1f Signed-off-by: Li Peng Acked-by: Zhao Yakui Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 46d88965852a..ecac882e1d54 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -120,7 +120,7 @@ const static struct intel_device_info intel_gm45_info = { const static struct intel_device_info intel_pineview_info = { .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .is_i9xx = 1, - .has_pipe_cxsr = 1, + .need_gfx_hws = 1, .has_hotplug = 1, }; From 1097df3ffe855eb1476496fa5394816fb197af05 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 20 Jan 2010 11:31:11 +0800 Subject: [PATCH 098/640] ocfs2: Sync max_inline_data_with_xattr from tools. In ocfs2-tools, we have added ocfs2_max_inline_data_with_xattr, so add it in the kernel's ocfs2_fs.h. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/ocfs2_fs.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 1a1a679e51b5..7638a38c32bc 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -1417,9 +1417,16 @@ static inline int ocfs2_fast_symlink_chars(int blocksize) return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); } -static inline int ocfs2_max_inline_data(int blocksize) +static inline int ocfs2_max_inline_data_with_xattr(int blocksize, + struct ocfs2_dinode *di) { - return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data); + if (di && (di->i_dyn_features & OCFS2_INLINE_XATTR_FL)) + return blocksize - + offsetof(struct ocfs2_dinode, id2.i_data.id_data) - + di->i_xattr_inline_size; + else + return blocksize - + offsetof(struct ocfs2_dinode, id2.i_data.id_data); } static inline int ocfs2_extent_recs_per_inode(int blocksize) From e5f2cb2b1ad05473fffe6970618997b906f23873 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 22 Jan 2010 21:58:04 +0800 Subject: [PATCH 099/640] ocfs2: fix a misleading variable name a local variable "dlm_version" is used as a fs locking version. rename it fs_version. Signed-off-by: Wengang Wang Signed-off-by: Joel Becker --- fs/ocfs2/stack_o2cb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index e49c41050264..3038c92af493 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -277,7 +277,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) u32 dlm_key; struct dlm_ctxt *dlm; struct o2dlm_private *priv; - struct dlm_protocol_version dlm_version; + struct dlm_protocol_version fs_version; BUG_ON(conn == NULL); BUG_ON(o2cb_stack.sp_proto == NULL); @@ -304,18 +304,18 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) /* used by the dlm code to make message headers unique, each * node in this domain must agree on this. */ dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); - dlm_version.pv_major = conn->cc_version.pv_major; - dlm_version.pv_minor = conn->cc_version.pv_minor; + fs_version.pv_major = conn->cc_version.pv_major; + fs_version.pv_minor = conn->cc_version.pv_minor; - dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); + dlm = dlm_register_domain(conn->cc_name, dlm_key, &fs_version); if (IS_ERR(dlm)) { rc = PTR_ERR(dlm); mlog_errno(rc); goto out_free; } - conn->cc_version.pv_major = dlm_version.pv_major; - conn->cc_version.pv_minor = dlm_version.pv_minor; + conn->cc_version.pv_major = fs_version.pv_major; + conn->cc_version.pv_minor = fs_version.pv_minor; conn->cc_lockspace = dlm; dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); From 2bd632165c1f783888bd4cbed95f2f304829159b Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 16:57:38 -0800 Subject: [PATCH 100/640] ocfs2/trivial: Remove trailing whitespaces Patch removes trailing whitespaces. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/aops.c | 4 ++-- fs/ocfs2/buffer_head_io.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 6 +++--- fs/ocfs2/cluster/tcp.c | 4 ++-- fs/ocfs2/cluster/tcp_internal.h | 4 ++-- fs/ocfs2/dlm/dlmapi.h | 2 +- fs/ocfs2/dlm/dlmast.c | 2 +- fs/ocfs2/dlm/dlmconvert.c | 2 +- fs/ocfs2/dlm/dlmdomain.c | 2 +- fs/ocfs2/dlm/dlmlock.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 38 ++++++++++++++++----------------- fs/ocfs2/dlm/dlmrecovery.c | 38 ++++++++++++++++----------------- fs/ocfs2/dlm/dlmunlock.c | 8 +++---- fs/ocfs2/dlmglue.c | 2 +- fs/ocfs2/export.c | 2 +- fs/ocfs2/file.c | 14 ++++++------ fs/ocfs2/inode.c | 4 ++-- fs/ocfs2/journal.c | 2 +- fs/ocfs2/super.c | 2 +- fs/ocfs2/uptodate.c | 4 ++-- 20 files changed, 72 insertions(+), 72 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3dae4a13f6e4..7e9df11260f4 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -599,7 +599,7 @@ bail: return ret; } -/* +/* * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're * particularly interested in the aio/dio case. Like the core uses * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from @@ -670,7 +670,7 @@ static ssize_t ocfs2_direct_IO(int rw, ret = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, - nr_segs, + nr_segs, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io); diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index d43d34a1dd31..21c808f752d8 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -368,7 +368,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, } ocfs2_metadata_cache_io_unlock(ci); - mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", + mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", (unsigned long long)block, nr, ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", flags); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index eda5b8bcddd5..5c9890006708 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -78,7 +78,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; -/* Only sets a new threshold if there are no active regions. +/* Only sets a new threshold if there are no active regions. * * No locking or otherwise interesting code is required for reading * o2hb_dead_threshold as it can't change once regions are active and @@ -170,7 +170,7 @@ static void o2hb_write_timeout(struct work_struct *work) mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " "milliseconds\n", reg->hr_dev_name, - jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); + jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); o2quo_disk_timeout(); } @@ -624,7 +624,7 @@ static int o2hb_check_slot(struct o2hb_region *reg, "seq %llu last %llu changed %u equal %u\n", slot->ds_node_num, (long long)slot->ds_last_generation, le32_to_cpu(hb_block->hb_cksum), - (unsigned long long)le64_to_cpu(hb_block->hb_seq), + (unsigned long long)le64_to_cpu(hb_block->hb_seq), (unsigned long long)slot->ds_last_time, slot->ds_changed_samples, slot->ds_equal_samples); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 334f231a422c..938ba181a3d9 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -930,7 +930,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc, cond_resched(); continue; } - mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT + mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); o2net_ensure_shutdown(nn, sc, 0); break; @@ -1483,7 +1483,7 @@ static void o2net_idle_timer(unsigned long data) mlog(ML_NOTICE, "here are some times that might help debug the " "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", - sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, + sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, now.tv_sec, (long) now.tv_usec, sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, sc->sc_tv_advance_start.tv_sec, diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 8d58cfe410b1..96fa7ebc530c 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -32,10 +32,10 @@ * on their number */ #define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS) -/* +/* * This version number represents quite a lot, unfortunately. It not * only represents the raw network message protocol on the wire but also - * locking semantics of the file system using the protocol. It should + * locking semantics of the file system using the protocol. It should * be somewhere else, I'm sure, but right now it isn't. * * With version 11, we separate out the filesystem locking portion. The diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index b5786a787fab..3cfa114aa391 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h @@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err); mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \ } while (0) -#define DLM_LKSB_UNUSED1 0x01 +#define DLM_LKSB_UNUSED1 0x01 #define DLM_LKSB_PUT_LVB 0x02 #define DLM_LKSB_GET_LVB 0x04 #define DLM_LKSB_UNUSED2 0x08 diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 01cf8cc3d286..dccc439fa087 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -123,7 +123,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) dlm_lock_put(lock); /* free up the reserved bast that we are cancelling. * guaranteed that this will not be the last reserved - * ast because *both* an ast and a bast were reserved + * ast because *both* an ast and a bast were reserved * to get to this point. the res->spinlock will not be * taken here */ dlm_lockres_release_ast(dlm, res); diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index ca96bce50e18..f283bce776b4 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -396,7 +396,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, /* instead of logging the same network error over * and over, sleep here and wait for the heartbeat * to notice the node is dead. times out after 5s. */ - dlm_wait_for_node_death(dlm, res->owner, + dlm_wait_for_node_death(dlm, res->owner, DLM_NODE_DEATH_WAIT_MAX); ret = DLM_RECOVERING; mlog(0, "node %u died so returning DLM_RECOVERING " diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0334000676d3..988c9055fd4e 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, } /* Once the dlm ctxt is marked as leaving then we don't want - * to be put in someone's domain map. + * to be put in someone's domain map. * Also, explicitly disallow joining at certain troublesome * times (ie. during recovery). */ if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 437698e9465f..733337772671 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, } dlm_revert_pending_lock(res, lock); dlm_lock_put(lock); - } else if (dlm_is_recovery_lock(res->lockname.name, + } else if (dlm_is_recovery_lock(res->lockname.name, res->lockname.len)) { /* special case for the $RECOVERY lock. * there will never be an AST delivered to put diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 03ccf9a7b1f4..a659606dcb95 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) struct dlm_master_list_entry *mle; assert_spin_locked(&dlm->spinlock); - + list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { if (node_up) dlm_mle_node_up(dlm, mle, NULL, idx); @@ -833,7 +833,7 @@ lookup: __dlm_insert_mle(dlm, mle); /* still holding the dlm spinlock, check the recovery map - * to see if there are any nodes that still need to be + * to see if there are any nodes that still need to be * considered. these will not appear in the mle nodemap * but they might own this lockres. wait on them. */ bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); @@ -883,7 +883,7 @@ redo_request: msleep(500); } continue; - } + } dlm_kick_recovery_thread(dlm); msleep(1000); @@ -939,8 +939,8 @@ wait: res->lockname.name, blocked); if (++tries > 20) { mlog(ML_ERROR, "%s:%.*s: spinning on " - "dlm_wait_for_lock_mastery, blocked=%d\n", - dlm->name, res->lockname.len, + "dlm_wait_for_lock_mastery, blocked=%d\n", + dlm->name, res->lockname.len, res->lockname.name, blocked); dlm_print_one_lock_resource(res); dlm_print_one_mle(mle); @@ -1029,7 +1029,7 @@ recheck: ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked); b = (mle->type == DLM_MLE_BLOCK); if ((*blocked && !b) || (!*blocked && b)) { - mlog(0, "%s:%.*s: status change: old=%d new=%d\n", + mlog(0, "%s:%.*s: status change: old=%d new=%d\n", dlm->name, res->lockname.len, res->lockname.name, *blocked, b); *blocked = b; @@ -1602,7 +1602,7 @@ send_response: } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); - ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, + ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, DLM_ASSERT_MASTER_MLE_CLEANUP); if (ret < 0) { mlog(ML_ERROR, "failed to dispatch assert master work\n"); @@ -1701,7 +1701,7 @@ again: if (r & DLM_ASSERT_RESPONSE_REASSERT) { mlog(0, "%.*s: node %u create mles on other " - "nodes and requests a re-assert\n", + "nodes and requests a re-assert\n", namelen, lockname, to); reassert = 1; } @@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); goto done; - } + } } } spin_unlock(&dlm->master_lock); @@ -1883,7 +1883,7 @@ ok: int extra_ref = 0; int nn = -1; int rr, err = 0; - + spin_lock(&mle->spinlock); if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) extra_ref = 1; @@ -1891,7 +1891,7 @@ ok: /* MASTER mle: if any bits set in the response map * then the calling node needs to re-assert to clear * up nodes that this node contacted */ - while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, + while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, nn+1)) < O2NM_MAX_NODES) { if (nn != dlm->node_num && nn != assert->node_idx) master_request = 1; @@ -2002,7 +2002,7 @@ kill: __dlm_print_one_lock_resource(res); spin_unlock(&res->spinlock); spin_unlock(&dlm->spinlock); - *ret_data = (void *)res; + *ret_data = (void *)res; dlm_put(dlm); return -EINVAL; } @@ -2040,10 +2040,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, item->u.am.request_from = request_from; item->u.am.flags = flags; - if (ignore_higher) - mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, + if (ignore_higher) + mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, res->lockname.name); - + spin_lock(&dlm->work_lock); list_add_tail(&item->list, &dlm->work_list); spin_unlock(&dlm->work_lock); @@ -2133,7 +2133,7 @@ put: * think that $RECOVERY is currently mastered by a dead node. If so, * we wait a short time to allow that node to get notified by its own * heartbeat stack, then check again. All $RECOVERY lock resources - * mastered by dead nodes are purged when the hearbeat callback is + * mastered by dead nodes are purged when the hearbeat callback is * fired, so we can know for sure that it is safe to continue once * the node returns a live node or no node. */ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, @@ -2174,7 +2174,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, ret = -EAGAIN; } spin_unlock(&dlm->spinlock); - mlog(0, "%s: reco lock master is %u\n", dlm->name, + mlog(0, "%s: reco lock master is %u\n", dlm->name, master); break; } @@ -2602,7 +2602,7 @@ fail: mlog(0, "%s:%.*s: timed out during migration\n", dlm->name, res->lockname.len, res->lockname.name); - /* avoid hang during shutdown when migrating lockres + /* avoid hang during shutdown when migrating lockres * to a node which also goes down */ if (dlm_is_node_dead(dlm, target)) { mlog(0, "%s:%.*s: expected migration " @@ -2738,7 +2738,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm, can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING); spin_unlock(&res->spinlock); - /* target has died, so make the caller break out of the + /* target has died, so make the caller break out of the * wait_event, but caller must recheck the domain_map */ spin_lock(&dlm->spinlock); if (!test_bit(mig_target, dlm->domain_map)) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 2f9e4e19a4f2..57736d3ea7b5 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, if (lock->ml.node == dead_node) { mlog(0, "AHA! there was " "a $RECOVERY lock for dead " - "node %u (%s)!\n", + "node %u (%s)!\n", dead_node, dlm->name); list_del_init(&lock->list); dlm_lock_put(lock); @@ -1839,7 +1839,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, * the lvb. */ memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); } else { - /* otherwise, the node is sending its + /* otherwise, the node is sending its * most recent valid lvb info */ BUG_ON(ml->type != LKM_EXMODE && ml->type != LKM_PRMODE); @@ -2114,7 +2114,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, assert_spin_locked(&res->spinlock); if (res->owner == dlm->node_num) - /* if this node owned the lockres, and if the dead node + /* if this node owned the lockres, and if the dead node * had an EX when he died, blank out the lvb */ search_node = dead_node; else { @@ -2152,7 +2152,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, /* this node is the lockres master: * 1) remove any stale locks for the dead node - * 2) if the dead node had an EX when he died, blank out the lvb + * 2) if the dead node had an EX when he died, blank out the lvb */ assert_spin_locked(&dlm->spinlock); assert_spin_locked(&res->spinlock); @@ -2260,7 +2260,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) } spin_unlock(&res->spinlock); continue; - } + } spin_lock(&res->spinlock); /* zero the lvb if necessary */ dlm_revalidate_lvb(dlm, res, dead_node); @@ -2411,7 +2411,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st) * this function on each node racing to become the recovery * master will not stop attempting this until either: * a) this node gets the EX (and becomes the recovery master), - * or b) dlm->reco.new_master gets set to some nodenum + * or b) dlm->reco.new_master gets set to some nodenum * != O2NM_INVALID_NODE_NUM (another node will do the reco). * so each time a recovery master is needed, the entire cluster * will sync at this point. if the new master dies, that will @@ -2424,7 +2424,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); -again: +again: memset(&lksb, 0, sizeof(lksb)); ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, @@ -2437,8 +2437,8 @@ again: if (ret == DLM_NORMAL) { mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", dlm->name, dlm->node_num); - - /* got the EX lock. check to see if another node + + /* got the EX lock. check to see if another node * just became the reco master */ if (dlm_reco_master_ready(dlm)) { mlog(0, "%s: got reco EX lock, but %u will " @@ -2451,12 +2451,12 @@ again: /* see if recovery was already finished elsewhere */ spin_lock(&dlm->spinlock); if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { - status = -EINVAL; + status = -EINVAL; mlog(0, "%s: got reco EX lock, but " "node got recovered already\n", dlm->name); if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { mlog(ML_ERROR, "%s: new master is %u " - "but no dead node!\n", + "but no dead node!\n", dlm->name, dlm->reco.new_master); BUG(); } @@ -2468,7 +2468,7 @@ again: * set the master and send the messages to begin recovery */ if (!status) { mlog(0, "%s: dead=%u, this=%u, sending " - "begin_reco now\n", dlm->name, + "begin_reco now\n", dlm->name, dlm->reco.dead_node, dlm->node_num); status = dlm_send_begin_reco_message(dlm, dlm->reco.dead_node); @@ -2501,7 +2501,7 @@ again: mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", dlm->name, dlm->node_num); /* another node is master. wait on - * reco.new_master != O2NM_INVALID_NODE_NUM + * reco.new_master != O2NM_INVALID_NODE_NUM * for at most one second */ wait_event_timeout(dlm->dlm_reco_thread_wq, dlm_reco_master_ready(dlm), @@ -2599,7 +2599,7 @@ retry: } if (ret < 0) { struct dlm_lock_resource *res; - /* this is now a serious problem, possibly ENOMEM + /* this is now a serious problem, possibly ENOMEM * in the network stack. must retry */ mlog_errno(ret); mlog(ML_ERROR, "begin reco of dlm %s to node %u " @@ -2612,7 +2612,7 @@ retry: } else { mlog(ML_ERROR, "recovery lock not found\n"); } - /* sleep for a bit in hopes that we can avoid + /* sleep for a bit in hopes that we can avoid * another ENOMEM */ msleep(100); goto retry; @@ -2664,7 +2664,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, } if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { mlog(ML_NOTICE, "%s: dead_node previously set to %u, " - "node %u changing it to %u\n", dlm->name, + "node %u changing it to %u\n", dlm->name, dlm->reco.dead_node, br->node_idx, br->dead_node); } dlm_set_reco_master(dlm, br->node_idx); @@ -2730,8 +2730,8 @@ stage2: if (ret < 0) { mlog_errno(ret); if (dlm_is_host_down(ret)) { - /* this has no effect on this recovery - * session, so set the status to zero to + /* this has no effect on this recovery + * session, so set the status to zero to * finish out the last recovery */ mlog(ML_ERROR, "node %u went down after this " "node finished recovery.\n", nodenum); @@ -2768,7 +2768,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, mlog(0, "%s: node %u finalizing recovery stage%d of " "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); - + spin_lock(&dlm->spinlock); if (dlm->reco.new_master != fr->node_idx) { diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 00f53b2aea76..49e29ecd0201 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -190,8 +190,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, actions &= ~(DLM_UNLOCK_REMOVE_LOCK| DLM_UNLOCK_REGRANT_LOCK| DLM_UNLOCK_CLEAR_CONVERT_TYPE); - } else if (status == DLM_RECOVERING || - status == DLM_MIGRATING || + } else if (status == DLM_RECOVERING || + status == DLM_MIGRATING || status == DLM_FORWARD) { /* must clear the actions because this unlock * is about to be retried. cannot free or do @@ -661,14 +661,14 @@ retry: if (call_ast) { mlog(0, "calling unlockast(%p, %d)\n", data, status); if (is_master) { - /* it is possible that there is one last bast + /* it is possible that there is one last bast * pending. make sure it is flushed, then * call the unlockast. * not an issue if this is a mastered remotely, * since this lock has been removed from the * lockres queues and cannot be found. */ dlm_kick_thread(dlm, NULL); - wait_event(dlm->ast_wq, + wait_event(dlm->ast_wq, dlm_lock_basts_flushed(dlm, lock)); } (*unlockast)(data, status); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index c5e4a49e3a12..172f4c6ce1be 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3155,7 +3155,7 @@ out: /* Mark the lockres as being dropped. It will no longer be * queued if blocking, but we still may have to wait on it * being dequeued from the downconvert thread before we can consider - * it safe to drop. + * it safe to drop. * * You can *not* attempt to call cluster_lock on this lockres anymore. */ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 15713cbb865c..19ad145d2af3 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -239,7 +239,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, mlog(0, "Encoding parent: blkno: %llu, generation: %u\n", (unsigned long long)blkno, generation); } - + *max_len = len; bail: diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 06ccf6a86d35..65e9375d2fb3 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -749,7 +749,7 @@ static int ocfs2_write_zero_page(struct inode *inode, int ret; offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ - /* ugh. in prepare/commit_write, if from==to==start of block, we + /* ugh. in prepare/commit_write, if from==to==start of block, we ** skip the prepare. make sure we never send an offset for the start ** of a block */ @@ -1779,7 +1779,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, struct inode *inode = dentry->d_inode; loff_t saved_pos, end; - /* + /* * We start with a read level meta lock and only jump to an ex * if we need to make modifications here. */ @@ -2033,7 +2033,7 @@ out_dio: pos + count - 1); } - /* + /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that * it can unlock our rw lock. (it's the clustered equivalent of @@ -2198,7 +2198,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, goto bail; } - /* + /* * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. */ @@ -2220,10 +2220,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, * We're fine letting folks race truncates and extending * writes with read across the cluster, just like they can * locally. Hence no rw_lock during read. - * + * * Take and drop the meta data lock to update inode fields * like i_size. This allows the checks down below - * generic_file_aio_read() a chance of actually working. + * generic_file_aio_read() a chance of actually working. */ ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); if (ret < 0) { @@ -2248,7 +2248,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, bail: if (have_alloc_sem) up_read(&inode->i_alloc_sem); - if (rw_level != -1) + if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); mlog_exit(ret); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0297fb8982b8..88459bdd1ff3 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -475,7 +475,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { status = ocfs2_try_open_lock(inode, 0); if (status) { - make_bad_inode(inode); + make_bad_inode(inode); return status; } } @@ -684,7 +684,7 @@ bail: return status; } -/* +/* * Serialize with orphan dir recovery. If the process doing * recovery on this orphan dir does an iget() with the dir * i_mutex held, we'll deadlock here. Instead we detect this diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index bf34c491ae96..9336c60e3a36 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -2034,7 +2034,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, status = -ENOENT; mlog_errno(status); return status; - } + } mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 26069917a9f5..755cd49a5ef3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1062,7 +1062,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) "file system, but write access is " "unavailable.\n"); else - mlog_errno(status); + mlog_errno(status); goto read_super_error; } diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index c61369342a27..a0a120e82b97 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -267,8 +267,8 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci, } /* Warning: even if it returns true, this does *not* guarantee that - * the block is stored in our inode metadata cache. - * + * the block is stored in our inode metadata cache. + * * This can be called under lock_buffer() */ int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, From 71656fa6ec10473eb9b646c10a2173fdea2f83c9 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 16:57:39 -0800 Subject: [PATCH 101/640] ocfs2/dlm: Ignore LVBs of locks in the Blocked list During lock resource migration, o2dlm fills the packet with a LVB from the first valid lock. For sanity, it ensures that the other valid locks have the same LVB. If not, it BUGs. The valid locks are ones that have granted EX or PR lock levels and are either on the Granted or Converting lists. Locks in the Blocked list cannot have a valid LVB. This patch ensures that we skip the locks in the Blocked list. Fixes oss bugzilla#1202 http://oss.oracle.com/bugzilla/show_bug.cgi?id=1202 Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 48 +++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 57736d3ea7b5..9d67894cda6d 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, mres->master = master; } +static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock, + struct dlm_migratable_lockres *mres, + int queue) +{ + if (!lock->lksb) + return; + + /* Ignore lvb in all locks in the blocked list */ + if (queue == DLM_BLOCKED_LIST) + return; + + /* Only consider lvbs in locks with granted EX or PR lock levels */ + if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE) + return; + + if (dlm_lvb_is_empty(mres->lvb)) { + memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); + return; + } + + /* Ensure the lvb copied for migration matches in other valid locks */ + if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN)) + return; + + mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, " + "node=%u\n", + dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), + lock->lockres->lockname.len, lock->lockres->lockname.name, + lock->ml.node); + dlm_print_one_lock_resource(lock->lockres); + BUG(); +} /* returns 1 if this lock fills the network structure, * 0 otherwise */ @@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock, ml->list = queue; if (lock->lksb) { ml->flags = lock->lksb->flags; - /* send our current lvb */ - if (ml->type == LKM_EXMODE || - ml->type == LKM_PRMODE) { - /* if it is already set, this had better be a PR - * and it has to match */ - if (!dlm_lvb_is_empty(mres->lvb) && - (ml->type == LKM_EXMODE || - memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) { - mlog(ML_ERROR, "mismatched lvbs!\n"); - dlm_print_one_lock_resource(lock->lockres); - BUG(); - } - memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); - } + dlm_prepare_lvb_for_migration(lock, mres, queue); } ml->node = lock->ml.node; mres->num_locks++; From 26636bf6b2010aa84c54d577231e017ba71493d0 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 16:57:40 -0800 Subject: [PATCH 102/640] ocfs2/dlm: Print more messages during lock migration When a lock resource is migrated, the dlm compares the migrated locks with that that was already existing on the new node. If the comparison fails, it BUGs. This patch prints more messages when the comparison fails inorder to help with the root cause analyis. http://oss.oracle.com/bugzilla/show_bug.cgi?id=1206 This does not fix bz1206. However, if we run into it again, we will have more information to chew on. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 46 +++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9d67894cda6d..cfb2ae9ab538 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1750,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_lock *lock = NULL; u8 from = O2NM_MAX_NODES; unsigned int added = 0; + __be64 c; mlog(0, "running %d locks for this lockres\n", mres->num_locks); for (i=0; inum_locks; i++) { @@ -1797,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* lock is always created locally first, and * destroyed locally last. it must be on the list */ if (!lock) { - __be64 c = ml->cookie; - mlog(ML_ERROR, "could not find local lock " - "with cookie %u:%llu!\n", + c = ml->cookie; + mlog(ML_ERROR, "Could not find local lock " + "with cookie %u:%llu, node %u, " + "list %u, flags 0x%x, type %d, " + "conv %d, highest blocked %d\n", dlm_get_lock_cookie_node(be64_to_cpu(c)), - dlm_get_lock_cookie_seq(be64_to_cpu(c))); + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + ml->node, ml->list, ml->flags, ml->type, + ml->convert_type, ml->highest_blocked); + __dlm_print_one_lock_resource(res); + BUG(); + } + + if (lock->ml.node != ml->node) { + c = lock->ml.cookie; + mlog(ML_ERROR, "Mismatched node# in lock " + "cookie %u:%llu, name %.*s, node %u\n", + dlm_get_lock_cookie_node(be64_to_cpu(c)), + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + res->lockname.len, res->lockname.name, + lock->ml.node); + c = ml->cookie; + mlog(ML_ERROR, "Migrate lock cookie %u:%llu, " + "node %u, list %u, flags 0x%x, type %d, " + "conv %d, highest blocked %d\n", + dlm_get_lock_cookie_node(be64_to_cpu(c)), + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + ml->node, ml->list, ml->flags, ml->type, + ml->convert_type, ml->highest_blocked); __dlm_print_one_lock_resource(res); BUG(); } - BUG_ON(lock->ml.node != ml->node); if (tmpq != queue) { - mlog(0, "lock was on %u instead of %u for %.*s\n", - j, ml->list, res->lockname.len, res->lockname.name); + c = ml->cookie; + mlog(0, "Lock cookie %u:%llu was on list %u " + "instead of list %u for %.*s\n", + dlm_get_lock_cookie_node(be64_to_cpu(c)), + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + j, ml->list, res->lockname.len, + res->lockname.name); + __dlm_print_one_lock_resource(res); spin_unlock(&res->spinlock); continue; } @@ -1906,7 +1936,7 @@ skip_lvb: spin_lock(&res->spinlock); list_for_each_entry(lock, queue, list) { if (lock->ml.cookie == ml->cookie) { - __be64 c = lock->ml.cookie; + c = lock->ml.cookie; mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " "exists on this lockres!\n", dlm->name, res->lockname.len, res->lockname.name, From de3f440f8c9922afe8770dd16c84f1c87d779b59 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 14 Jan 2010 13:18:02 -0800 Subject: [PATCH 103/640] drm/i915: handle non-flip pending case when unpinning the scanout buffer The first page flip queued will replace the current front buffer, which should have a 0 pending flip count. So at finish time we need to handle that case (i.e. if the flip count is 0 *or* dec_and_test is 0 we need to wake the waiters). Also fix up an error path in the queue function and add some debug output (only enabled with driver debugging). Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2cc489b6629e..5f14dfbf715c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3985,6 +3985,12 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe) spin_lock_irqsave(&dev->event_lock, flags); work = intel_crtc->unpin_work; if (work == NULL || !work->pending) { + if (work && !work->pending) { + obj_priv = work->obj->driver_private; + DRM_DEBUG_DRIVER("flip finish: %p (%d) not pending?\n", + obj_priv, + atomic_read(&obj_priv->pending_flip)); + } spin_unlock_irqrestore(&dev->event_lock, flags); return; } @@ -4006,7 +4012,10 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe) spin_unlock_irqrestore(&dev->event_lock, flags); obj_priv = work->obj->driver_private; - if (atomic_dec_and_test(&obj_priv->pending_flip)) + + /* Initial scanout buffer will have a 0 pending flip count */ + if ((atomic_read(&obj_priv->pending_flip) == 0) || + atomic_dec_and_test(&obj_priv->pending_flip)) DRM_WAKEUP(&dev_priv->pending_flip_queue); schedule_work(&work->work); } @@ -4019,8 +4028,11 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane) unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); - if (intel_crtc->unpin_work) + if (intel_crtc->unpin_work) { intel_crtc->unpin_work->pending = 1; + } else { + DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n"); + } spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -4054,6 +4066,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, /* We borrow the event spin lock for protecting unpin_work */ spin_lock_irqsave(&dev->event_lock, flags); if (intel_crtc->unpin_work) { + DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); spin_unlock_irqrestore(&dev->event_lock, flags); kfree(work); mutex_unlock(&dev->struct_mutex); @@ -4067,7 +4080,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, ret = intel_pin_and_fence_fb_obj(dev, obj); if (ret != 0) { + DRM_DEBUG_DRIVER("flip queue: %p pin & fence failed\n", + obj->driver_private); kfree(work); + intel_crtc->unpin_work = NULL; mutex_unlock(&dev->struct_mutex); return ret; } From 9611a9b6f6de95c290efc697a3e1d0530878c047 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 22 Jan 2010 08:46:13 +0100 Subject: [PATCH 104/640] i.MX25: Allow secondary clocks in DEFINE_CLOCK Signed-off-by: Sascha Hauer --- arch/arm/mach-mx25/clock.c | 41 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index 6e838b857712..fbe42d258190 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -144,7 +144,7 @@ static void clk_cgcr_disable(struct clk *clk) __raw_writel(reg, clk->enable_reg); } -#define DEFINE_CLOCK(name, i, er, es, gr, sr) \ +#define DEFINE_CLOCK(name, i, er, es, gr, sr, s) \ static struct clk name = { \ .id = i, \ .enable_reg = CRM_BASE + er, \ @@ -153,27 +153,28 @@ static void clk_cgcr_disable(struct clk *clk) .set_rate = sr, \ .enable = clk_cgcr_enable, \ .disable = clk_cgcr_disable, \ + .secondary = s, \ } -DEFINE_CLOCK(gpt_clk, 0, CCM_CGCR0, 5, get_rate_ipg, NULL); -DEFINE_CLOCK(cspi1_clk, 0, CCM_CGCR1, 5, get_rate_ipg, NULL); -DEFINE_CLOCK(cspi2_clk, 0, CCM_CGCR1, 6, get_rate_ipg, NULL); -DEFINE_CLOCK(cspi3_clk, 0, CCM_CGCR1, 7, get_rate_ipg, NULL); -DEFINE_CLOCK(uart1_clk, 0, CCM_CGCR2, 14, get_rate_uart, NULL); -DEFINE_CLOCK(uart2_clk, 0, CCM_CGCR2, 15, get_rate_uart, NULL); -DEFINE_CLOCK(uart3_clk, 0, CCM_CGCR2, 16, get_rate_uart, NULL); -DEFINE_CLOCK(uart4_clk, 0, CCM_CGCR2, 17, get_rate_uart, NULL); -DEFINE_CLOCK(uart5_clk, 0, CCM_CGCR2, 18, get_rate_uart, NULL); -DEFINE_CLOCK(nfc_clk, 0, CCM_CGCR0, 8, get_rate_nfc, NULL); -DEFINE_CLOCK(usbotg_clk, 0, CCM_CGCR0, 28, get_rate_otg, NULL); -DEFINE_CLOCK(pwm1_clk, 0, CCM_CGCR1, 31, get_rate_ipg, NULL); -DEFINE_CLOCK(pwm2_clk, 0, CCM_CGCR2, 0, get_rate_ipg, NULL); -DEFINE_CLOCK(pwm3_clk, 0, CCM_CGCR2, 1, get_rate_ipg, NULL); -DEFINE_CLOCK(pwm4_clk, 0, CCM_CGCR2, 2, get_rate_ipg, NULL); -DEFINE_CLOCK(kpp_clk, 0, CCM_CGCR1, 28, get_rate_ipg, NULL); -DEFINE_CLOCK(tsc_clk, 0, CCM_CGCR2, 13, get_rate_ipg, NULL); -DEFINE_CLOCK(i2c_clk, 0, CCM_CGCR0, 6, get_rate_i2c, NULL); -DEFINE_CLOCK(fec_clk, 0, CCM_CGCR0, 23, get_rate_ipg, NULL); +DEFINE_CLOCK(gpt_clk, 0, CCM_CGCR0, 5, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(cspi1_clk, 0, CCM_CGCR1, 5, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(cspi2_clk, 0, CCM_CGCR1, 6, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(cspi3_clk, 0, CCM_CGCR1, 7, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(uart1_clk, 0, CCM_CGCR2, 14, get_rate_uart, NULL, NULL); +DEFINE_CLOCK(uart2_clk, 0, CCM_CGCR2, 15, get_rate_uart, NULL, NULL); +DEFINE_CLOCK(uart3_clk, 0, CCM_CGCR2, 16, get_rate_uart, NULL, NULL); +DEFINE_CLOCK(uart4_clk, 0, CCM_CGCR2, 17, get_rate_uart, NULL, NULL); +DEFINE_CLOCK(uart5_clk, 0, CCM_CGCR2, 18, get_rate_uart, NULL, NULL); +DEFINE_CLOCK(nfc_clk, 0, CCM_CGCR0, 8, get_rate_nfc, NULL, NULL); +DEFINE_CLOCK(usbotg_clk, 0, CCM_CGCR0, 28, get_rate_otg, NULL, NULL); +DEFINE_CLOCK(pwm1_clk, 0, CCM_CGCR1, 31, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(pwm2_clk, 0, CCM_CGCR2, 0, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(pwm3_clk, 0, CCM_CGCR2, 1, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(pwm4_clk, 0, CCM_CGCR2, 2, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(kpp_clk, 0, CCM_CGCR1, 28, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(tsc_clk, 0, CCM_CGCR2, 13, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(i2c_clk, 0, CCM_CGCR0, 6, get_rate_i2c, NULL, NULL); +DEFINE_CLOCK(fec_clk, 0, CCM_CGCR0, 23, get_rate_ipg, NULL, NULL); #define _REGISTER_CLOCK(d, n, c) \ { \ From 4cd3f96cd4014419a4ea524d840be0fa39e3ddbc Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 22 Jan 2010 08:47:06 +0100 Subject: [PATCH 105/640] i.MX25: implement secondary clocks for uarts and fec For uarts and fec need two clocks, implement it using the secondary clock field in struct clk. Signed-off-by: Sascha Hauer --- arch/arm/mach-mx25/clock.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index fbe42d258190..3be51dd46c67 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -157,14 +157,16 @@ static void clk_cgcr_disable(struct clk *clk) } DEFINE_CLOCK(gpt_clk, 0, CCM_CGCR0, 5, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(uart_per_clk, 0, CCM_CGCR0, 15, get_rate_uart, NULL, NULL); DEFINE_CLOCK(cspi1_clk, 0, CCM_CGCR1, 5, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(cspi2_clk, 0, CCM_CGCR1, 6, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(cspi3_clk, 0, CCM_CGCR1, 7, get_rate_ipg, NULL, NULL); -DEFINE_CLOCK(uart1_clk, 0, CCM_CGCR2, 14, get_rate_uart, NULL, NULL); -DEFINE_CLOCK(uart2_clk, 0, CCM_CGCR2, 15, get_rate_uart, NULL, NULL); -DEFINE_CLOCK(uart3_clk, 0, CCM_CGCR2, 16, get_rate_uart, NULL, NULL); -DEFINE_CLOCK(uart4_clk, 0, CCM_CGCR2, 17, get_rate_uart, NULL, NULL); -DEFINE_CLOCK(uart5_clk, 0, CCM_CGCR2, 18, get_rate_uart, NULL, NULL); +DEFINE_CLOCK(fec_ipg_clk, 0, CCM_CGCR1, 15, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(uart1_clk, 0, CCM_CGCR2, 14, get_rate_uart, NULL, &uart_per_clk); +DEFINE_CLOCK(uart2_clk, 0, CCM_CGCR2, 15, get_rate_uart, NULL, &uart_per_clk); +DEFINE_CLOCK(uart3_clk, 0, CCM_CGCR2, 16, get_rate_uart, NULL, &uart_per_clk); +DEFINE_CLOCK(uart4_clk, 0, CCM_CGCR2, 17, get_rate_uart, NULL, &uart_per_clk); +DEFINE_CLOCK(uart5_clk, 0, CCM_CGCR2, 18, get_rate_uart, NULL, &uart_per_clk); DEFINE_CLOCK(nfc_clk, 0, CCM_CGCR0, 8, get_rate_nfc, NULL, NULL); DEFINE_CLOCK(usbotg_clk, 0, CCM_CGCR0, 28, get_rate_otg, NULL, NULL); DEFINE_CLOCK(pwm1_clk, 0, CCM_CGCR1, 31, get_rate_ipg, NULL, NULL); @@ -174,7 +176,7 @@ DEFINE_CLOCK(pwm4_clk, 0, CCM_CGCR2, 2, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(kpp_clk, 0, CCM_CGCR1, 28, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(tsc_clk, 0, CCM_CGCR2, 13, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(i2c_clk, 0, CCM_CGCR0, 6, get_rate_i2c, NULL, NULL); -DEFINE_CLOCK(fec_clk, 0, CCM_CGCR0, 23, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(fec_clk, 0, CCM_CGCR0, 23, get_rate_ipg, NULL, &fec_ipg_clk); #define _REGISTER_CLOCK(d, n, c) \ { \ From fadc095622dd188cae88eb2f3ff28fd6e9d2d2f1 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 25 Jan 2010 12:58:19 +0200 Subject: [PATCH 106/640] mx25: remove unused mx25_clocks_init() argument The fref is needless on mx25 since the reference clock is fixed at 24MHz. Signed-off-by: Baruch Siach Signed-off-by: Sascha Hauer --- arch/arm/mach-mx25/clock.c | 2 +- arch/arm/mach-mx25/mx25pdk.c | 2 +- arch/arm/plat-mxc/include/mach/common.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index 3be51dd46c67..abd303bc3bf2 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -211,7 +211,7 @@ static struct clk_lookup lookups[] = { _REGISTER_CLOCK("fec.0", NULL, fec_clk) }; -int __init mx25_clocks_init(unsigned long fref) +int __init mx25_clocks_init(void) { int i; diff --git a/arch/arm/mach-mx25/mx25pdk.c b/arch/arm/mach-mx25/mx25pdk.c index 921bc99ea231..6f06089246eb 100644 --- a/arch/arm/mach-mx25/mx25pdk.c +++ b/arch/arm/mach-mx25/mx25pdk.c @@ -91,7 +91,7 @@ static void __init mx25pdk_init(void) static void __init mx25pdk_timer_init(void) { - mx25_clocks_init(26000000); + mx25_clocks_init(); } static struct sys_timer mx25pdk_timer = { diff --git a/arch/arm/plat-mxc/include/mach/common.h b/arch/arm/plat-mxc/include/mach/common.h index 286cb9b0a25b..4bf1068ffad9 100644 --- a/arch/arm/plat-mxc/include/mach/common.h +++ b/arch/arm/plat-mxc/include/mach/common.h @@ -32,7 +32,7 @@ extern void mxc91231_init_irq(void); extern void mxc_timer_init(struct clk *timer_clk, void __iomem *, int); extern int mx1_clocks_init(unsigned long fref); extern int mx21_clocks_init(unsigned long lref, unsigned long fref); -extern int mx25_clocks_init(unsigned long fref); +extern int mx25_clocks_init(void); extern int mx27_clocks_init(unsigned long fref); extern int mx31_clocks_init(unsigned long fref); extern int mx35_clocks_init(void); From 828df43f139c7fbf0d505c7b9a666d321a0f2c25 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 25 Jan 2010 12:58:20 +0200 Subject: [PATCH 107/640] mx25: properly initialize clocks This patch disables all unnecessary clock in mx25_clocks_init() to make a clean start, the same as is being done for the rest of the i.MX chips. This patch was tested on i.MX25 PDK. Signed-off-by: Baruch Siach Signed-off-by: Sascha Hauer --- arch/arm/mach-mx25/clock.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index abd303bc3bf2..08aaa38f1f1f 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -218,6 +218,14 @@ int __init mx25_clocks_init(void) for (i = 0; i < ARRAY_SIZE(lookups); i++) clkdev_add(&lookups[i]); + /* Turn off all clocks except the ones we need to survive, namely: + * EMI, GPIO1-3 (CCM_CGCR1[18:16]), GPT1, IOMUXC (CCM_CGCR1[27]), IIM, + * SCC + */ + __raw_writel((1 << 19), CRM_BASE + CCM_CGCR0); + __raw_writel((0xf << 16) | (3 << 26), CRM_BASE + CCM_CGCR1); + __raw_writel((1 << 5), CRM_BASE + CCM_CGCR2); + mxc_timer_init(&gpt_clk, MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54); return 0; From faed40665d2d81b7e0e537d14ef680ab3da9f22d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 25 Jan 2010 12:58:21 +0200 Subject: [PATCH 108/640] mx25: fix time accounting The gpt_clk rate function doesn't consider the PER divider. This causes a significant drift in time accounting. Fix this by introducing the correct rate calculation function. Signed-off-by: Baruch Siach Signed-off-by: Sascha Hauer --- arch/arm/mach-mx25/clock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index 08aaa38f1f1f..c003ac40f9a8 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -119,6 +119,11 @@ static unsigned long get_rate_nfc(struct clk *clk) return get_rate_per(8); } +static unsigned long get_rate_gpt(struct clk *clk) +{ + return get_rate_per(5); +} + static unsigned long get_rate_otg(struct clk *clk) { return 48000000; /* FIXME */ @@ -156,7 +161,7 @@ static void clk_cgcr_disable(struct clk *clk) .secondary = s, \ } -DEFINE_CLOCK(gpt_clk, 0, CCM_CGCR0, 5, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(gpt_clk, 0, CCM_CGCR0, 5, get_rate_gpt, NULL, NULL); DEFINE_CLOCK(uart_per_clk, 0, CCM_CGCR0, 15, get_rate_uart, NULL, NULL); DEFINE_CLOCK(cspi1_clk, 0, CCM_CGCR1, 5, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(cspi2_clk, 0, CCM_CGCR1, 6, get_rate_ipg, NULL, NULL); From 1c5740237428ca025a30f53c5615edd11201c17b Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 25 Jan 2010 12:58:22 +0200 Subject: [PATCH 109/640] mx25: make the FEC AHB clk secondary of the IPG This makes the FEC clock configuration consistent with the UART one. Signed-off-by: Baruch Siach Signed-off-by: Sascha Hauer --- arch/arm/mach-mx25/clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c index c003ac40f9a8..6acc88bcdc40 100644 --- a/arch/arm/mach-mx25/clock.c +++ b/arch/arm/mach-mx25/clock.c @@ -166,7 +166,7 @@ DEFINE_CLOCK(uart_per_clk, 0, CCM_CGCR0, 15, get_rate_uart, NULL, NULL); DEFINE_CLOCK(cspi1_clk, 0, CCM_CGCR1, 5, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(cspi2_clk, 0, CCM_CGCR1, 6, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(cspi3_clk, 0, CCM_CGCR1, 7, get_rate_ipg, NULL, NULL); -DEFINE_CLOCK(fec_ipg_clk, 0, CCM_CGCR1, 15, get_rate_ipg, NULL, NULL); +DEFINE_CLOCK(fec_ahb_clk, 0, CCM_CGCR0, 23, NULL, NULL, NULL); DEFINE_CLOCK(uart1_clk, 0, CCM_CGCR2, 14, get_rate_uart, NULL, &uart_per_clk); DEFINE_CLOCK(uart2_clk, 0, CCM_CGCR2, 15, get_rate_uart, NULL, &uart_per_clk); DEFINE_CLOCK(uart3_clk, 0, CCM_CGCR2, 16, get_rate_uart, NULL, &uart_per_clk); @@ -181,7 +181,7 @@ DEFINE_CLOCK(pwm4_clk, 0, CCM_CGCR2, 2, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(kpp_clk, 0, CCM_CGCR1, 28, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(tsc_clk, 0, CCM_CGCR2, 13, get_rate_ipg, NULL, NULL); DEFINE_CLOCK(i2c_clk, 0, CCM_CGCR0, 6, get_rate_i2c, NULL, NULL); -DEFINE_CLOCK(fec_clk, 0, CCM_CGCR0, 23, get_rate_ipg, NULL, &fec_ipg_clk); +DEFINE_CLOCK(fec_clk, 0, CCM_CGCR1, 15, get_rate_ipg, NULL, &fec_ahb_clk); #define _REGISTER_CLOCK(d, n, c) \ { \ From c9edda7140ec6a22accf7f2f86da362dfbfd41fc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:41:34 -0500 Subject: [PATCH 110/640] NFS: Fix a reference leak in nfs_wb_cancel_page() Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d171696017f4..dac8d7676aff 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1541,6 +1541,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) break; } ret = nfs_wait_on_request(req); + nfs_release_request(req); if (ret < 0) goto out; } From 82be934a59ff891cac598727e5a862ba2b9d1fac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:41:53 -0500 Subject: [PATCH 111/640] NFS: Try to commit unstable writes in nfs_release_page() If someone calls nfs_release_page(), we presumably already know that the page is clean, however it may be holding an unstable write. Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6b891328f332..63f2071d6445 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) { dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); + if (gfp & __GFP_WAIT) + nfs_wb_page(page->mapping->host, page); /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; From 0aa05887af728b058af91197f0ae9b3ae63dd74a Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 26 Jan 2010 15:42:03 -0500 Subject: [PATCH 112/640] NFS: Make nfs_commitdata_release static The symbol nfs_commitdata_release is only used locally in this file. Make it static to prevent the following sparse warning: warning: symbol 'nfs_commitdata_release' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Cc: Trond Myklebust Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dac8d7676aff..7b54b8bb101f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1233,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -void nfs_commitdata_release(void *data) +static void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; From b0706ca415b188ed58788420de4d5c9972b2afb2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 26 Jan 2010 15:42:11 -0500 Subject: [PATCH 113/640] NFS: Avoid warnings when CONFIG_NFS_V4=n Avoid the following warnings when CONFIG_NFS_V4=n: fs/nfs/sysctl.c:19: warning: unused variable `nfs_set_port_max' fs/nfs/sysctl.c:18: warning: unused variable `nfs_set_port_min' by making those variables contingent on NFSv4 being configured. Signed-off-by: David Howells Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/sysctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 70e1fbbaaeab..ad4d2e787b20 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -15,8 +15,10 @@ #include "callback.h" +#ifdef CONFIG_NFS_V4 static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; +#endif static struct ctl_table_header *nfs_callback_sysctl_table; static ctl_table nfs_cb_sysctls[] = { From 2bee72a6aa1e6d0a4f5da56217f0d0bbbdd0d9a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:21 -0500 Subject: [PATCH 114/640] NFSv4: Ensure that the NFSv4 locking can recover from stateid errors In most cases, we just want to mark the lock_stateid sequence id as being uninitialised. Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/nfs4proc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 198d51d17c13..0b68238ed0c8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4088,6 +4088,22 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = { .rpc_release = nfs4_lock_release, }; +static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state *state = lsp->ls_state; + + switch (error) { + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_nograce(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + }; +} + static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) { struct nfs4_lockdata *data; @@ -4126,6 +4142,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; + if (ret) + nfs4_handle_setlk_error(data->server, data->lsp, + data->arg.new_lock_owner, ret); } else data->cancelled = 1; rpc_put_task(task); From 8e469ebd6dc32cbaf620e134d79f740bf0ebab79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:30 -0500 Subject: [PATCH 115/640] NFSv4: Don't allow posix locking against servers that don't support it Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 865265bdca03..ea2f41b26aea 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -146,6 +146,7 @@ enum { NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ + NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ }; struct nfs4_state { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0b68238ed0c8..be044b58e811 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1658,6 +1658,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = PTR_ERR(state); if (IS_ERR(state)) goto err_opendata_put; + if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) + set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; @@ -4200,8 +4202,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock { struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; - int status; + int status = -ENOLCK; + if ((fl_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + goto out; /* Is this a delegated open? */ status = nfs4_set_lock_state(state, request); if (status != 0) From 03391693a95900875b0973569d2d73ff3aa8972e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:38 -0500 Subject: [PATCH 116/640] NFSv4.1: Don't call nfs4_schedule_state_recovery() unnecessarily Currently, nfs4_handle_exception() will call it twice if called with an error of -NFS4ERR_STALE_CLIENTID, -NFS4ERR_STALE_STATEID or -NFS4ERR_EXPIRED. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be044b58e811..afbfe673489b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -256,12 +256,8 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, ret = nfs4_wait_clnt_recover(clp); if (ret == 0) exception->retry = 1; -#if !defined(CONFIG_NFS_V4_1) break; -#else /* !defined(CONFIG_NFS_V4_1) */ - if (!nfs4_has_session(server->nfs_client)) - break; - /* FALLTHROUGH */ +#if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: @@ -274,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, nfs4_schedule_state_recovery(clp); exception->retry = 1; break; -#endif /* !defined(CONFIG_NFS_V4_1) */ +#endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { /* We have retried a decent amount, time to From a2c0b9e291208f65221a0ad8a0c80a377707d480 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:47 -0500 Subject: [PATCH 117/640] NFS: Ensure that we handle NFS4ERR_STALE_STATEID correctly Even if the server is crazy, we should be able to mark the stateid as being bad, to ensure it gets recovered. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 44 +++++++++++++++++++++++++++++++------------- fs/nfs/nfs4state.c | 2 +- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ea2f41b26aea..0c6fda33d66e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -278,6 +278,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); +extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index afbfe673489b..375f0fae2c6a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -249,14 +249,14 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, if (state == NULL) break; nfs4_state_mark_reclaim_nograce(clp, state); - case -NFS4ERR_STALE_CLIENTID: + goto do_state_recovery; case -NFS4ERR_STALE_STATEID: + if (state == NULL) + break; + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(clp); - ret = nfs4_wait_clnt_recover(clp); - if (ret == 0) - exception->retry = 1; - break; + goto do_state_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -289,6 +289,12 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, } /* We failed to handle the error */ return nfs4_map_errors(ret); +do_state_recovery: + nfs4_schedule_state_recovery(clp); + ret = nfs4_wait_clnt_recover(clp); + if (ret == 0) + exception->retry = 1; + return ret; } @@ -3420,15 +3426,14 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (state == NULL) break; nfs4_state_mark_reclaim_nograce(clp, state); - case -NFS4ERR_STALE_CLIENTID: + goto do_state_recovery; case -NFS4ERR_STALE_STATEID: + if (state == NULL) + break; + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - nfs4_schedule_state_recovery(clp); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - task->tk_status = 0; - return -EAGAIN; + goto do_state_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -3456,6 +3461,13 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, } task->tk_status = nfs4_map_errors(task->tk_status); return 0; +do_state_recovery: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + task->tk_status = 0; + return -EAGAIN; } static int @@ -4099,6 +4111,12 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) nfs4_state_mark_reclaim_nograce(clp, state); lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + break; + case -NFS4ERR_STALE_STATEID: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_reboot(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; }; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6d263ed79e92..c1e2733f4fa4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -901,7 +901,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } -static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) +int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); From abd50713944c8ea9e0af5b7bffa0aacae21cc91a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Jan 2010 18:50:16 +0100 Subject: [PATCH 118/640] perf: Reimplement frequency driven sampling There was a bug in the old period code that caused intel_pmu_enable_all() or native_write_msr_safe() to show up quite high in the profiles. In staring at that code it made my head hurt, so I rewrote it in a hopefully simpler fashion. Its now fully symetric between tick and overflow driven adjustments and uses less data to boot. The only complication is that it basically wants to do a u128 division. The code approximates that in a rather simple truncate until it fits fashion, taking care to balance the terms while truncating. This version does not generate that sampling artefact. Signed-off-by: Peter Zijlstra LKML-Reference: Cc: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +- kernel/perf_event.c | 132 ++++++++++++++++++++++++++----------- 2 files changed, 94 insertions(+), 43 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c6f812e4d058..72b2615600d8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -498,9 +498,8 @@ struct hw_perf_event { atomic64_t period_left; u64 interrupts; - u64 freq_count; - u64 freq_interrupts; - u64 freq_stamp; + u64 freq_time_stamp; + u64 freq_count_stamp; #endif }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index edc46b92b508..251fb9552492 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task) static void perf_log_throttle(struct perf_event *event, int enable); -static void perf_adjust_period(struct perf_event *event, u64 events) +static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) +{ + u64 frequency = event->attr.sample_freq; + u64 sec = NSEC_PER_SEC; + u64 divisor, dividend; + + int count_fls, nsec_fls, frequency_fls, sec_fls; + + count_fls = fls64(count); + nsec_fls = fls64(nsec); + frequency_fls = fls64(frequency); + sec_fls = 30; + + /* + * We got @count in @nsec, with a target of sample_freq HZ + * the target period becomes: + * + * @count * 10^9 + * period = ------------------- + * @nsec * sample_freq + * + */ + + /* + * Reduce accuracy by one bit such that @a and @b converge + * to a similar magnitude. + */ +#define REDUCE_FLS(a, b) \ +do { \ + if (a##_fls > b##_fls) { \ + a >>= 1; \ + a##_fls--; \ + } else { \ + b >>= 1; \ + b##_fls--; \ + } \ +} while (0) + + /* + * Reduce accuracy until either term fits in a u64, then proceed with + * the other, so that finally we can do a u64/u64 division. + */ + while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { + REDUCE_FLS(nsec, frequency); + REDUCE_FLS(sec, count); + } + + if (count_fls + sec_fls > 64) { + divisor = nsec * frequency; + + while (count_fls + sec_fls > 64) { + REDUCE_FLS(count, sec); + divisor >>= 1; + } + + dividend = count * sec; + } else { + dividend = count * sec; + + while (nsec_fls + frequency_fls > 64) { + REDUCE_FLS(nsec, frequency); + dividend >>= 1; + } + + divisor = nsec * frequency; + } + + return div64_u64(dividend, divisor); +} + +static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; u64 period, sample_period; s64 delta; - events *= hwc->sample_period; - period = div64_u64(events, event->attr.sample_freq); + period = perf_calculate_period(event, nsec, count); delta = (s64)(period - hwc->sample_period); delta = (delta + 7) / 8; /* low pass filter */ @@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) sample_period = 1; hwc->sample_period = sample_period; + + if (atomic64_read(&hwc->period_left) > 8*sample_period) { + perf_disable(); + event->pmu->disable(event); + atomic64_set(&hwc->period_left, 0); + event->pmu->enable(event); + perf_enable(); + } } static void perf_ctx_adjust_freq(struct perf_event_context *ctx) { struct perf_event *event; struct hw_perf_event *hwc; - u64 interrupts, freq; + u64 interrupts, now; + s64 delta; raw_spin_lock(&ctx->lock); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { @@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); event->pmu->unthrottle(event); - interrupts = 2*sysctl_perf_event_sample_rate/HZ; } if (!event->attr.freq || !event->attr.sample_freq) continue; - /* - * if the specified freq < HZ then we need to skip ticks - */ - if (event->attr.sample_freq < HZ) { - freq = event->attr.sample_freq; + event->pmu->read(event); + now = atomic64_read(&event->count); + delta = now - hwc->freq_count_stamp; + hwc->freq_count_stamp = now; - hwc->freq_count += freq; - hwc->freq_interrupts += interrupts; - - if (hwc->freq_count < HZ) - continue; - - interrupts = hwc->freq_interrupts; - hwc->freq_interrupts = 0; - hwc->freq_count -= HZ; - } else - freq = HZ; - - perf_adjust_period(event, freq * interrupts); - - /* - * In order to avoid being stalled by an (accidental) huge - * sample period, force reset the sample period if we didn't - * get any events in this freq period. - */ - if (!interrupts) { - perf_disable(); - event->pmu->disable(event); - atomic64_set(&hwc->period_left, 0); - event->pmu->enable(event); - perf_enable(); - } + if (delta > 0) + perf_adjust_period(event, TICK_NSEC, delta); } raw_spin_unlock(&ctx->lock); } @@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, if (event->attr.freq) { u64 now = perf_clock(); - s64 delta = now - hwc->freq_stamp; + s64 delta = now - hwc->freq_time_stamp; - hwc->freq_stamp = now; + hwc->freq_time_stamp = now; - if (delta > 0 && delta < TICK_NSEC) - perf_adjust_period(event, NSEC_PER_SEC / (int)delta); + if (delta > 0 && delta < 2*TICK_NSEC) + perf_adjust_period(event, delta, hwc->last_period); } /* From 24bfef0f924b4ac4312614422a4982b5f4d9a4c7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Jan 2010 13:04:43 -0200 Subject: [PATCH 119/640] perf top: Fix sample counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broken since "5b2bb75 perf top: Support userspace symbols too". Reported-by: Mike Galbraith Tested-by: Mike Galbraith Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264086284-1431-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7a8a77ec2c9d..8b049888a9dd 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -934,8 +934,11 @@ static void event__process_sample(const event_t *self, struct addr_location al; u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + ++samples; + switch (origin) { case PERF_RECORD_MISC_USER: + ++userspace_samples; if (hide_user_symbols) return; break; @@ -960,9 +963,6 @@ static void event__process_sample(const event_t *self, if (list_empty(&syme->node) || !syme->node.next) __list_insert_active_sym(syme); pthread_mutex_unlock(&active_symbols_lock); - if (origin == PERF_RECORD_MISC_USER) - ++userspace_samples; - ++samples; } } From 0f35cd4cea08a8893e3e2ea03cbdb65f5d2b0e7a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Jan 2010 13:04:44 -0200 Subject: [PATCH 120/640] perf top: Handle PERF_RECORD_{FORK,EXIT} events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As noticed by Mike, symbols in new tasks were not being processed as we weren't processing these events. Reported-by: Mike Galbraith Tested-by: Mike Galbraith Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264086284-1431-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8b049888a9dd..2227b84aa002 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -975,6 +975,10 @@ static int event__process(event_t *event, struct perf_session *session) case PERF_RECORD_MMAP: event__process_mmap(event, session); break; + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: + event__process_task(event, session); + break; default: break; } From e1c7c6a40c8037478742ce134190c1a955853bfb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 22 Jan 2010 14:35:01 -0200 Subject: [PATCH 121/640] perf symbols: Fix inverted logic for showing kallsyms as the source of symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only if we parsed /proc/kallsyms (or a copy found in the buildid cache) we should set the dso long name to "[kernel.kallsyms]". Reported-by: Mike Galbraith Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264178102-4203-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6f30fe18c265..1270cf867e61 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1671,7 +1671,7 @@ do_kallsyms: out_try_fixup: if (err > 0) { out_fixup: - if (kallsyms_filename == NULL) + if (kallsyms_filename != NULL) dso__set_long_name(self, strdup("[kernel.kallsyms]")); map__fixup_start(map); map__fixup_end(map); From 19fc2dedff448120a7aeaa3c136689c6b71777c6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 22 Jan 2010 14:35:02 -0200 Subject: [PATCH 122/640] perf symbols: Use the right variable to check for kallsyms in the cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probably this wasn't noticed when testing this on my parisc machine because I must have copied manually to its cache the vmlinux file used in the x86_64 machine, now that I tried looking on a x86-32 machine with a fresh cache, kernel symbols weren't being resolved even with the right kallsyms copy on its cache, duh. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264178102-4203-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1270cf867e61..f1f609dcf9a1 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1650,12 +1650,12 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, getenv("HOME"), sbuild_id) == -1) return -1; + kallsyms_filename = kallsyms_allocated_filename; + if (access(kallsyms_filename, F_OK)) { free(kallsyms_allocated_filename); return -1; } - - kallsyms_filename = kallsyms_allocated_filename; } else { /* * Last resort, if we don't have a build-id and couldn't find From 408f0d18ba6b9bb447f807f621b2c9663c5cf638 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Fri, 22 Jan 2010 22:45:29 +0900 Subject: [PATCH 123/640] perf trace: Add -i option for choosing input file perf trace lacks -i option for choosing input file. This patch adds it to perf trace. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1264167929-6741-1-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar --- tools/perf/builtin-trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8e9cbfe608d6..0b65779e3c10 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -515,6 +515,8 @@ static const struct option options[] = { parse_scriptname), OPT_STRING('g', "gen-script", &generate_script_lang, "lang", "generate perf-trace.xx script in specified language"), + OPT_STRING('i', "input", &input_name, "file", + "input file name"), OPT_END() }; From 339ce1a4dc2ca26444c4f65c31b71a5056f3bb0b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 18 Jan 2010 16:47:07 +1100 Subject: [PATCH 124/640] perf: Fix inconsistency between IP and callchain sampling When running perf across all cpus with backtracing (-a -g), sometimes we get samples without associated backtraces: 23.44% init [kernel] [k] restore 11.46% init eeba0c [k] 0x00000000eeba0c 6.77% swapper [kernel] [k] .perf_ctx_adjust_freq 5.73% init [kernel] [k] .__trace_hcall_entry 4.69% perf libc-2.9.so [.] 0x0000000006bb8c | |--11.11%-- 0xfffa941bbbc It turns out the backtrace code has a check for the idle task and the IP sampling does not. This creates problems when profiling an interrupt heavy workload (in my case 10Gbit ethernet) since we get no backtraces for interrupts received while idle (ie most of the workload). Right now x86 and sh check that current is not NULL, which should never happen so remove that too. Idle task's exclusion must be performed from the core code, on top of perf_event_attr:exclude_idle. Signed-off-by: Anton Blanchard Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Benjamin Herrenschmidt Cc: Paul Mundt LKML-Reference: <20100118054707.GT12666@kryten> Signed-off-by: Frederic Weisbecker --- arch/powerpc/kernel/perf_callchain.c | 3 --- arch/sh/kernel/perf_callchain.c | 3 --- arch/x86/kernel/cpu/perf_event.c | 3 --- 3 files changed, 9 deletions(-) diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index a3c11cac3d71..95ad9dad298e 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -495,9 +495,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) entry->nr = 0; - if (current->pid == 0) /* idle task? */ - return entry; - if (!user_mode(regs)) { perf_callchain_kernel(regs, entry); if (current->mm) diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 24ea837eac5b..a9dd3abde28e 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -68,9 +68,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) is_user = user_mode(regs); - if (!current || current->pid == 0) - return; - if (is_user && current->state != TASK_RUNNING) return; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b1bb8c550526..ed1998b28a7c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2425,9 +2425,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) is_user = user_mode(regs); - if (!current || current->pid == 0) - return; - if (is_user && current->state != TASK_RUNNING) return; From 430ad5a600a83956749307b13257c464c3826b55 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 28 Jan 2010 09:32:29 +0800 Subject: [PATCH 125/640] perf: Factorize trace events raw sample buffer operations Introduce ftrace_perf_buf_prepare() and ftrace_perf_buf_submit() to gather the common code that operates on raw events sampling buffer. This cleans up redundant code between regular trace events, syscall events and kprobe events. Changelog v1->v2: - Rename function name as per Masami and Frederic's suggestion - Add __kprobes for ftrace_perf_buf_prepare() and make ftrace_perf_buf_submit() inline as per Masami's suggestion - Export ftrace_perf_buf_prepare since modules will use it Signed-off-by: Xiao Guangrong Acked-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Steven Rostedt Cc: Paul Mackerras Cc: Jason Baron Cc: Peter Zijlstra LKML-Reference: <4B60E92D.9000808@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 18 +++++-- include/trace/ftrace.h | 48 +++-------------- kernel/trace/trace_event_profile.c | 52 ++++++++++++++++-- kernel/trace/trace_kprobe.c | 86 ++++-------------------------- kernel/trace/trace_syscalls.c | 71 ++++-------------------- 5 files changed, 88 insertions(+), 187 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0a09e758c7d3..cd95919d9ff3 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -5,6 +5,7 @@ #include #include #include +#include struct trace_array; struct tracer; @@ -138,9 +139,6 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *perf_trace_buf; -extern char *perf_trace_buf_nmi; - #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ @@ -195,6 +193,20 @@ extern void ftrace_profile_disable(int event_id); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); +extern void * +ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp, + unsigned long *irq_flags); + +static inline void +ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr, + u64 count, unsigned long irq_flags) +{ + struct trace_entry *entry = raw_data; + + perf_tp_event(entry->type, addr, count, raw_data, size); + perf_swevent_put_recursion_context(rctx); + local_irq_restore(irq_flags); +} #endif #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4a46a60c2077..f2c09e4d656c 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -850,22 +850,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(void); \ - extern void perf_swevent_put_recursion_context(int rctx); \ - extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ - struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ - char *trace_buf; \ - char *raw_data; \ - int __cpu; \ int rctx; \ - int pc; \ - \ - pc = preempt_count(); \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ @@ -875,42 +865,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ "profile buffer not large enough")) \ return; \ - \ - local_irq_save(irq_flags); \ - \ - rctx = perf_swevent_get_recursion_context(); \ - if (rctx < 0) \ - goto end_recursion; \ - \ - __cpu = smp_processor_id(); \ - \ - if (in_nmi()) \ - trace_buf = rcu_dereference(perf_trace_buf_nmi); \ - else \ - trace_buf = rcu_dereference(perf_trace_buf); \ - \ - if (!trace_buf) \ - goto end; \ - \ - raw_data = per_cpu_ptr(trace_buf, __cpu); \ - \ - *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ - entry = (struct ftrace_raw_##call *)raw_data; \ - ent = &entry->ent; \ - tracing_generic_entry_update(ent, irq_flags, pc); \ - ent->type = event_call->id; \ - \ + entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare( \ + __entry_size, event_call->id, &rctx, &irq_flags); \ + if (!entry) \ + return; \ tstruct \ \ { assign; } \ \ - perf_tp_event(event_call->id, __addr, __count, entry, \ - __entry_size); \ - \ -end: \ - perf_swevent_put_recursion_context(rctx); \ -end_recursion: \ - local_irq_restore(irq_flags); \ + ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \ + __count, irq_flags); \ } #undef DEFINE_EVENT diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 9e25573242cf..f0d693005075 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -6,14 +6,12 @@ */ #include +#include #include "trace.h" -char *perf_trace_buf; -EXPORT_SYMBOL_GPL(perf_trace_buf); - -char *perf_trace_buf_nmi; -EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); +static char *perf_trace_buf; +static char *perf_trace_buf_nmi; typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; @@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id) } mutex_unlock(&event_mutex); } + +__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, + int *rctxp, unsigned long *irq_flags) +{ + struct trace_entry *entry; + char *trace_buf, *raw_data; + int pc, cpu; + + pc = preempt_count(); + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(*irq_flags); + + *rctxp = perf_swevent_get_recursion_context(); + if (*rctxp < 0) + goto err_recursion; + + cpu = smp_processor_id(); + + if (in_nmi()) + trace_buf = rcu_dereference(perf_trace_buf_nmi); + else + trace_buf = rcu_dereference(perf_trace_buf); + + if (!trace_buf) + goto err; + + raw_data = per_cpu_ptr(trace_buf, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + entry = (struct trace_entry *)raw_data; + tracing_generic_entry_update(entry, *irq_flags, pc); + entry->type = type; + + return raw_data; +err: + perf_swevent_put_recursion_context(*rctxp); +err_recursion: + local_irq_restore(*irq_flags); + return NULL; +} +EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d6266cad6953..2e28ee36646f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - struct trace_entry *ent; - int size, __size, i, pc, __cpu; + int size, __size, i; unsigned long irq_flags; - char *trace_buf; - char *raw_data; int rctx; - pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, "profile buffer not large enough")) return 0; - /* - * Protect the non nmi buffer - * This also protects the rcu read side - */ - local_irq_save(irq_flags); + entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + if (!entry) + return 0; - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - __cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, __cpu); - - /* Zero dead bytes from alignment to avoid buffer leak to userspace */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kprobe_trace_entry *)raw_data; - ent = &entry->ent; - - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); return 0; } @@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - struct trace_entry *ent; - int size, __size, i, pc, __cpu; + int size, __size, i; unsigned long irq_flags; - char *trace_buf; - char *raw_data; int rctx; - pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, "profile buffer not large enough")) return 0; - /* - * Protect the non nmi buffer - * This also protects the rcu read side - */ - local_irq_save(irq_flags); + entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + if (!entry) + return 0; - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - __cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, __cpu); - - /* Zero dead bytes from alignment to avoid buffer leak to userspace */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kretprobe_trace_entry *)raw_data; - ent = &entry->ent; - - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; entry->nargs = tp->nr_args; entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); return 0; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f694f66d75b0..4e332b9e449c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; unsigned long flags; - char *trace_buf; - char *raw_data; int syscall_nr; int rctx; int size; - int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) @@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) "profile buffer not large enough")) return; - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(flags); + rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, + sys_data->enter_event->id, &rctx, &flags); + if (!rec) + return; - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - cpu = smp_processor_id(); - - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, cpu); - - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - - rec = (struct syscall_trace_enter *) raw_data; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); - -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); } int prof_sysenter_enable(struct ftrace_event_call *call) @@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) struct syscall_trace_exit *rec; unsigned long flags; int syscall_nr; - char *trace_buf; - char *raw_data; int rctx; int size; - int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) @@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) "exit event has grown above profile buffer size")) return; - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(flags); + rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, + sys_data->exit_event->id, &rctx, &flags); + if (!rec) + return; - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - cpu = smp_processor_id(); - - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, cpu); - - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - - rec = (struct syscall_trace_exit *)raw_data; - - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); - -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); } int prof_sysexit_enable(struct ftrace_event_call *call) From 1e12a4a7a3a78bc9c3aaf3486dde3b8ab1cdf465 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 28 Jan 2010 09:34:27 +0800 Subject: [PATCH 126/640] tracing/kprobe: Cleanup unused return value of tracing functions The return values of the kprobe's tracing functions are meaningless, lets remove these. Signed-off-by: Xiao Guangrong Acked-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Ingo Molnar Cc: Paul Mackerras Cc: Jason Baron Cc: Peter Zijlstra LKML-Reference: <4B60E9A3.2040505@cn.fujitsu.com> [fweisbec@gmail: whitespace fixes, drop useless void returns in end of functions] Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_kprobe.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2e28ee36646f..6178abf3637e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -942,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = { }; /* Kprobe handler */ -static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) +static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct kprobe_trace_entry *entry; @@ -962,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) event = trace_current_buffer_lock_reserve(&buffer, call->id, size, irq_flags, pc); if (!event) - return 0; + return; entry = ring_buffer_event_data(event); entry->nargs = tp->nr_args; @@ -972,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); - return 0; } /* Kretprobe handler */ -static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, +static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); @@ -995,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, event = trace_current_buffer_lock_reserve(&buffer, call->id, size, irq_flags, pc); if (!event) - return 0; + return; entry = ring_buffer_event_data(event); entry->nargs = tp->nr_args; @@ -1006,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); - - return 0; } /* Event entry printers */ @@ -1237,7 +1234,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static __kprobes int kprobe_profile_func(struct kprobe *kp, +static __kprobes void kprobe_profile_func(struct kprobe *kp, struct pt_regs *regs) { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); @@ -1252,11 +1249,11 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, size -= sizeof(u32); if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, "profile buffer not large enough")) - return 0; + return; entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); if (!entry) - return 0; + return; entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; @@ -1264,12 +1261,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); - - return 0; } /* Kretprobe profile handler */ -static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, +static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); @@ -1284,11 +1279,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, size -= sizeof(u32); if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, "profile buffer not large enough")) - return 0; + return; entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); if (!entry) - return 0; + return; entry->nargs = tp->nr_args; entry->func = (unsigned long)tp->rp.kp.addr; @@ -1297,8 +1292,6 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); - - return 0; } static int probe_profile_enable(struct ftrace_event_call *call) From 40f9249a73f6c251adea492b1c3d19d39e2a9bda Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Thu, 28 Jan 2010 16:44:01 +0530 Subject: [PATCH 127/640] x86/debug: Clear reserved bits of DR6 in do_debug() Clear the reserved bits from the stored copy of debug status register (DR6). This will help easy bitwise operations such as quick testing of a debug event origin. Signed-off-by: K.Prasad Cc: Roland McGrath Cc: Jan Kiszka Cc: Alan Stern Cc: Ingo Molnar LKML-Reference: <20100128111401.GB13935@in.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/debugreg.h | 3 +++ arch/x86/kernel/traps.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 8240f76b531e..b81002f23614 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -14,6 +14,9 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ +/* Define reserved bits in DR6 which are always set to 1 */ +#define DR6_RESERVED (0xFFFF0FF0) + #define DR_TRAP0 (0x1) /* db0 */ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 33399176512a..1168e4454188 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) get_debugreg(dr6, 6); + /* Filter out all the reserved bits which are preset to 1 */ + dr6 &= ~DR6_RESERVED; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; From e0e53db6133c32964fd17f20b17073a402f07ed3 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Thu, 28 Jan 2010 16:44:15 +0530 Subject: [PATCH 128/640] x86/hw-breakpoints: Optimize return code from notifier chain in hw_breakpoint_handler Processing of debug exceptions in do_debug() can stop if it originated from a hw-breakpoint exception by returning NOTIFY_STOP in most cases. But for certain cases such as: a) user-space breakpoints with pending SIGTRAP signal delivery (as in the case of ptrace induced breakpoints). b) exceptions due to other causes than breakpoints We will continue to process the exception by returning NOTIFY_DONE. Signed-off-by: K.Prasad Cc: Ingo Molnar Cc: Roland McGrath Cc: Alan Stern Cc: Jan Kiszka LKML-Reference: <20100128111415.GC13935@in.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/hw_breakpoint.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 05d5fec64a94..ae90b4739435 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -502,8 +502,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); bp = per_cpu(bp_per_reg[i], cpu); - if (bp) - rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -522,7 +520,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) rcu_read_unlock(); } - if (dr6 & (~DR_TRAP_BITS)) + /* + * Further processing in do_debug() is needed for a) user-space + * breakpoints (to generate signals) and b) when the system has + * taken exception due to multiple causes + */ + if ((current->thread.debugreg6 & DR_TRAP_BITS) || + (dr6 & (~DR_TRAP_BITS))) rc = NOTIFY_DONE; set_debugreg(dr7, 7); From 1da53e023029c067ba1277a33038c65d6e4c99b3 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 18 Jan 2010 10:58:01 +0200 Subject: [PATCH 129/640] perf_events, x86: Improve x86 event scheduling This patch improves event scheduling by maximizing the use of PMU registers regardless of the order in which events are created in a group. The algorithm takes into account the list of counter constraints for each event. It assigns events to counters from the most constrained, i.e., works on only one counter, to the least constrained, i.e., works on any counter. Intel Fixed counter events and the BTS special event are also handled via this algorithm which is designed to be fairly generic. The patch also updates the validation of an event to use the scheduling algorithm. This will cause early failure in perf_event_open(). The 2nd version of this patch follows the model used by PPC, by running the scheduling algorithm and the actual assignment separately. Actual assignment takes place in hw_perf_enable() whereas scheduling is implemented in hw_perf_group_sched_in() and x86_pmu_enable(). Signed-off-by: Stephane Eranian [ fixup whitespace and style nits as well as adding is_x86_event() ] Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4b5430c6.0f975e0a.1bf9.ffff85fe@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 16 +- arch/x86/kernel/cpu/perf_event.c | 771 +++++++++++++++++++++--------- 2 files changed, 572 insertions(+), 215 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8d9f8548a870..dbc082685d52 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -26,7 +26,14 @@ /* * Includes eventsel and unit mask as well: */ -#define ARCH_PERFMON_EVENT_MASK 0xffff + + +#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL +#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL +#define INTEL_ARCH_EDGE_MASK 0x00040000ULL +#define INTEL_ARCH_INV_MASK 0x00800000ULL +#define INTEL_ARCH_CNT_MASK 0xFF000000ULL +#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) /* * filter mask to validate fixed counter events. @@ -37,7 +44,12 @@ * The other filters are supported by fixed counters. * The any-thread option is supported starting with v3. */ -#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 +#define INTEL_ARCH_FIXED_MASK \ + (INTEL_ARCH_CNT_MASK| \ + INTEL_ARCH_INV_MASK| \ + INTEL_ARCH_EDGE_MASK|\ + INTEL_ARCH_UNIT_MASK|\ + INTEL_ARCH_EVENT_MASK) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed1998b28a7c..995ac4ae379c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -7,6 +7,7 @@ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra * Copyright (C) 2009 Intel Corporation, + * Copyright (C) 2009 Google, Inc., Stephane Eranian * * For licencing details see kernel-base/COPYING */ @@ -68,26 +69,37 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) + +struct event_constraint { + u64 idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)]; + int code; + int cmask; +}; + struct cpu_hw_events { - struct perf_event *events[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; struct debug_store *ds; + + int n_events; + int n_added; + int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -struct event_constraint { - unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int code; -}; +#define EVENT_CONSTRAINT(c, n, m) { \ + .code = (c), \ + .cmask = (m), \ + .idxmsk[0] = (n) } -#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } -#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } +#define EVENT_CONSTRAINT_END \ + { .code = 0, .cmask = 0, .idxmsk[0] = 0 } #define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->idxmsk[0]; (e)++) - + for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu @@ -114,8 +126,9 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - int (*get_event_idx)(struct cpu_hw_events *cpuc, - struct hw_perf_event *hwc); + void (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk); + void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); + const struct event_constraint *event_constraints; }; static struct x86_pmu x86_pmu __read_mostly; @@ -124,7 +137,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -static const struct event_constraint *event_constraints; +static int x86_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx); /* * Not sure about some of these @@ -171,14 +185,14 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } -static const struct event_constraint intel_p6_event_constraints[] = +static struct event_constraint intel_p6_event_constraints[] = { - EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ - EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ EVENT_CONSTRAINT_END }; @@ -196,32 +210,43 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; -static const struct event_constraint intel_core_event_constraints[] = +static struct event_constraint intel_core_event_constraints[] = { - EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ - EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ - EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ - EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ + EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ + EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */ EVENT_CONSTRAINT_END }; -static const struct event_constraint intel_nehalem_event_constraints[] = +static struct event_constraint intel_nehalem_event_constraints[] = { - EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ - EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ - EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ - EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ - EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ - EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ - EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ + EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ + EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_gen_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ + EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ EVENT_CONSTRAINT_END }; @@ -527,11 +552,11 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ - (CORE_EVNTSEL_EVENT_MASK | \ - CORE_EVNTSEL_UNIT_MASK | \ - CORE_EVNTSEL_EDGE_MASK | \ - CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_REG_MASK) + (INTEL_ARCH_EVTSEL_MASK | \ + INTEL_ARCH_UNIT_MASK | \ + INTEL_ARCH_EDGE_MASK | \ + INTEL_ARCH_INV_MASK | \ + INTEL_ARCH_CNT_MASK) return hw_event & CORE_EVNTSEL_MASK; } @@ -1120,9 +1145,15 @@ static void amd_pmu_disable_all(void) void hw_perf_disable(void) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (!x86_pmu_initialized()) return; - return x86_pmu.disable_all(); + + if (cpuc->enabled) + cpuc->n_added = 0; + + x86_pmu.disable_all(); } static void p6_pmu_enable_all(void) @@ -1189,10 +1220,237 @@ static void amd_pmu_enable_all(void) } } +static const struct pmu pmu; + +static inline int is_x86_event(struct perf_event *event) +{ + return event->pmu == &pmu; +} + +static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +{ + int i, j , w, num; + int weight, wmax; + unsigned long *c; + u64 constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct hw_perf_event *hwc; + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + + for (i = 0; i < n; i++) { + x86_pmu.get_event_constraints(cpuc, + cpuc->event_list[i], + constraints[i]); + } + + /* + * weight = number of possible counters + * + * 1 = most constrained, only works on one counter + * wmax = least constrained, works on any counter + * + * assign events to counters starting with most + * constrained events. + */ + wmax = x86_pmu.num_events; + + /* + * when fixed event counters are present, + * wmax is incremented by 1 to account + * for one more choice + */ + if (x86_pmu.num_events_fixed) + wmax++; + + num = n; + for (w = 1; num && w <= wmax; w++) { + /* for each event */ + for (i = 0; i < n; i++) { + c = (unsigned long *)constraints[i]; + hwc = &cpuc->event_list[i]->hw; + + weight = bitmap_weight(c, X86_PMC_IDX_MAX); + if (weight != w) + continue; + + /* + * try to reuse previous assignment + * + * This is possible despite the fact that + * events or events order may have changed. + * + * What matters is the level of constraints + * of an event and this is constant for now. + * + * This is possible also because we always + * scan from most to least constrained. Thus, + * if a counter can be reused, it means no, + * more constrained events, needed it. And + * next events will either compete for it + * (which cannot be solved anyway) or they + * have fewer constraints, and they can use + * another counter. + */ + j = hwc->idx; + if (j != -1 && !test_bit(j, used_mask)) + goto skip; + + for_each_bit(j, c, X86_PMC_IDX_MAX) { + if (!test_bit(j, used_mask)) + break; + } + + if (j == X86_PMC_IDX_MAX) + break; +skip: + set_bit(j, used_mask); + +#if 0 + pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", + smp_processor_id(), + hwc->config, + j, + assign ? 'y' : 'n'); +#endif + + if (assign) + assign[i] = j; + num--; + } + } + /* + * scheduling failed or is just a simulation, + * free resources if necessary + */ + if (!assign || num) { + for (i = 0; i < n; i++) { + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); + } + } + return num ? -ENOSPC : 0; +} + +/* + * dogrp: true if must collect siblings events (group) + * returns total number of events and error code + */ +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) +{ + struct perf_event *event; + int n, max_count; + + max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; + + /* current number of events already accepted */ + n = cpuc->n_events; + + if (is_x86_event(leader)) { + if (n >= max_count) + return -ENOSPC; + cpuc->event_list[n] = leader; + n++; + } + if (!dogrp) + return n; + + list_for_each_entry(event, &leader->sibling_list, group_entry) { + if (!is_x86_event(event) || + event->state == PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -ENOSPC; + + cpuc->event_list[n] = event; + n++; + } + return n; +} + + +static inline void x86_assign_hw_event(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + hwc->idx = idx; + + if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { + hwc->config_base = 0; + hwc->event_base = 0; + } else if (hwc->idx >= X86_PMC_IDX_FIXED) { + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that event_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->event_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + } else { + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; + } +} + void hw_perf_enable(void) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + struct hw_perf_event *hwc; + int i; + if (!x86_pmu_initialized()) return; + if (cpuc->n_added) { + /* + * apply assignment obtained either from + * hw_perf_group_sched_in() or x86_pmu_enable() + * + * step1: save events moving to new counters + * step2: reprogram moved events into new counters + */ + for (i = 0; i < cpuc->n_events; i++) { + + event = cpuc->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) + continue; + + x86_pmu.disable(hwc, hwc->idx); + + clear_bit(hwc->idx, cpuc->active_mask); + barrier(); + cpuc->events[hwc->idx] = NULL; + + x86_perf_event_update(event, hwc, hwc->idx); + + hwc->idx = -1; + } + + for (i = 0; i < cpuc->n_events; i++) { + + event = cpuc->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == -1) { + x86_assign_hw_event(event, hwc, cpuc->assign[i]); + x86_perf_event_set_period(event, hwc, hwc->idx); + } + /* + * need to mark as active because x86_pmu_disable() + * clear active_mask and eventsp[] yet it preserves + * idx + */ + set_bit(hwc->idx, cpuc->active_mask); + cpuc->events[hwc->idx] = event; + + x86_pmu.enable(hwc, hwc->idx); + perf_event_update_userpage(event); + } + cpuc->n_added = 0; + perf_events_lapic_init(); + } x86_pmu.enable_all(); } @@ -1391,148 +1649,43 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int fixed_mode_idx(struct hw_perf_event *hwc) -{ - unsigned int hw_event; - - hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; - - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (hwc->sample_period == 1))) - return X86_PMC_IDX_FIXED_BTS; - - if (!x86_pmu.num_events_fixed) - return -1; - - /* - * fixed counters do not take all possible filters - */ - if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) - return -1; - - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) - return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) - return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) - return X86_PMC_IDX_FIXED_BUS_CYCLES; - - return -1; -} - /* - * generic counter allocator: get next free counter - */ -static int -gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - int idx; - - idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); - return idx == x86_pmu.num_events ? -1 : idx; -} - -/* - * intel-specific counter allocator: check event constraints - */ -static int -intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - const struct event_constraint *event_constraint; - int i, code; - - if (!event_constraints) - goto skip; - - code = hwc->config & CORE_EVNTSEL_EVENT_MASK; - - for_each_event_constraint(event_constraint, event_constraints) { - if (code == event_constraint->code) { - for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { - if (!test_and_set_bit(i, cpuc->used_mask)) - return i; - } - return -1; - } - } -skip: - return gen_get_event_idx(cpuc, hwc); -} - -static int -x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - int idx; - - idx = fixed_mode_idx(hwc); - if (idx == X86_PMC_IDX_FIXED_BTS) { - /* BTS is already occupied. */ - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - hwc->config_base = 0; - hwc->event_base = 0; - hwc->idx = idx; - } else if (idx >= 0) { - /* - * Try to get the fixed event, if that is already taken - * then try to get a generic event: - */ - if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; - - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that event_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->event_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; - hwc->idx = idx; - } else { - idx = hwc->idx; - /* Try to get the previous generic event again */ - if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { -try_generic: - idx = x86_pmu.get_event_idx(cpuc, hwc); - if (idx == -1) - return -EAGAIN; - - set_bit(idx, cpuc->used_mask); - hwc->idx = idx; - } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; - } - - return idx; -} - -/* - * Find a PMC slot for the freshly enabled / scheduled in event: + * activate a single event + * + * The event is added to the group of enabled events + * but only if it can be scehduled with existing events. + * + * Called with PMU disabled. If successful and return value 1, + * then guaranteed to call perf_enable() and hw_perf_enable() */ static int x86_pmu_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; + struct hw_perf_event *hwc; + int assign[X86_PMC_IDX_MAX]; + int n, n0, ret; - idx = x86_schedule_event(cpuc, hwc); - if (idx < 0) - return idx; + hwc = &event->hw; - perf_events_lapic_init(); + n0 = cpuc->n_events; + n = collect_events(cpuc, event, false); + if (n < 0) + return n; - x86_pmu.disable(hwc, idx); + ret = x86_schedule_events(cpuc, n, assign); + if (ret) + return ret; + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n*sizeof(int)); - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); + cpuc->n_events = n; + cpuc->n_added = n - n0; - x86_perf_event_set_period(event, hwc, idx); - x86_pmu.enable(hwc, idx); - - perf_event_update_userpage(event); + if (hwc->idx != -1) + x86_perf_event_set_period(event, hwc, hwc->idx); return 0; } @@ -1576,7 +1729,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); + pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_events; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); @@ -1664,7 +1817,7 @@ static void x86_pmu_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; + int i, idx = hwc->idx; /* * Must be done before we disable, otherwise the nmi handler @@ -1690,8 +1843,19 @@ static void x86_pmu_disable(struct perf_event *event) intel_pmu_drain_bts_buffer(cpuc); cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + for (i = 0; i < cpuc->n_events; i++) { + if (event == cpuc->event_list[i]) { + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, event); + + while (++i < cpuc->n_events) + cpuc->event_list[i-1] = cpuc->event_list[i]; + + --cpuc->n_events; + } + } perf_event_update_userpage(event); } @@ -1962,6 +2126,176 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } +static struct event_constraint bts_constraint = { + .code = 0, + .cmask = 0, + .idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS +}; + +static int intel_special_constraints(struct perf_event *event, + u64 *idxmsk) +{ + unsigned int hw_event; + + hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; + + if (unlikely((hw_event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (event->hw.sample_period == 1))) { + + bitmap_copy((unsigned long *)idxmsk, + (unsigned long *)bts_constraint.idxmsk, + X86_PMC_IDX_MAX); + return 1; + } + return 0; +} + +static void intel_get_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event, + u64 *idxmsk) +{ + const struct event_constraint *c; + + /* + * cleanup bitmask + */ + bitmap_zero((unsigned long *)idxmsk, X86_PMC_IDX_MAX); + + if (intel_special_constraints(event, idxmsk)) + return; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + if ((event->hw.config & c->cmask) == c->code) { + + bitmap_copy((unsigned long *)idxmsk, + (unsigned long *)c->idxmsk, + X86_PMC_IDX_MAX); + return; + } + } + } + /* no constraints, means supports all generic counters */ + bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); +} + +static void amd_get_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event, + u64 *idxmsk) +{ +} + +static int x86_event_sched_in(struct perf_event *event, + struct perf_cpu_context *cpuctx, int cpu) +{ + int ret = 0; + + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; + event->tstamp_running += event->ctx->time - event->tstamp_stopped; + + if (!is_x86_event(event)) + ret = event->pmu->enable(event); + + if (!ret && !is_software_event(event)) + cpuctx->active_oncpu++; + + if (!ret && event->attr.exclusive) + cpuctx->exclusive = 1; + + return ret; +} + +static void x86_event_sched_out(struct perf_event *event, + struct perf_cpu_context *cpuctx, int cpu) +{ + event->state = PERF_EVENT_STATE_INACTIVE; + event->oncpu = -1; + + if (!is_x86_event(event)) + event->pmu->disable(event); + + event->tstamp_running -= event->ctx->time - event->tstamp_stopped; + + if (!is_software_event(event)) + cpuctx->active_oncpu--; + + if (event->attr.exclusive || !cpuctx->active_oncpu) + cpuctx->exclusive = 0; +} + +/* + * Called to enable a whole group of events. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + * + * called with PMU disabled. If successful and return value 1, + * then guaranteed to call perf_enable() and hw_perf_enable() + */ +int hw_perf_group_sched_in(struct perf_event *leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct perf_event *sub; + int assign[X86_PMC_IDX_MAX]; + int n0, n1, ret; + + /* n0 = total number of events */ + n0 = collect_events(cpuc, leader, true); + if (n0 < 0) + return n0; + + ret = x86_schedule_events(cpuc, n0, assign); + if (ret) + return ret; + + ret = x86_event_sched_in(leader, cpuctx, cpu); + if (ret) + return ret; + + n1 = 1; + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + if (sub->state != PERF_EVENT_STATE_OFF) { + ret = x86_event_sched_in(sub, cpuctx, cpu); + if (ret) + goto undo; + ++n1; + } + } + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n0*sizeof(int)); + + cpuc->n_events = n0; + cpuc->n_added = n1; + ctx->nr_active += n1; + + /* + * 1 means successful and events are active + * This is not quite true because we defer + * actual activation until hw_perf_enable() but + * this way we* ensure caller won't try to enable + * individual events + */ + return 1; +undo: + x86_event_sched_out(leader, cpuctx, cpu); + n0 = 1; + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + if (sub->state == PERF_EVENT_STATE_ACTIVE) { + x86_event_sched_out(sub, cpuctx, cpu); + if (++n0 == n1) + break; + } + } + return ret; +} + static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, @@ -1993,7 +2327,8 @@ static __initconst struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, - .get_event_idx = intel_get_event_idx, + .get_event_constraints = intel_get_event_constraints, + .event_constraints = intel_p6_event_constraints }; static __initconst struct x86_pmu intel_pmu = { @@ -2017,7 +2352,7 @@ static __initconst struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, - .get_event_idx = intel_get_event_idx, + .get_event_constraints = intel_get_event_constraints }; static __initconst struct x86_pmu amd_pmu = { @@ -2038,7 +2373,7 @@ static __initconst struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, - .get_event_idx = gen_get_event_idx, + .get_event_constraints = amd_get_event_constraints }; static __init int p6_pmu_init(void) @@ -2051,12 +2386,9 @@ static __init int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ - event_constraints = intel_p6_event_constraints; - break; case 9: case 13: /* Pentium M */ - event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -2121,23 +2453,29 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + x86_pmu.event_constraints = intel_core_event_constraints; pr_cont("Core2 events, "); - event_constraints = intel_core_event_constraints; break; - default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - event_constraints = intel_nehalem_event_constraints; + x86_pmu.event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; + default: + /* + * default constraints for v2 and up + */ + x86_pmu.event_constraints = intel_gen_event_constraints; + pr_cont("generic architected perfmon, "); } return 0; } @@ -2234,36 +2572,43 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; -static int -validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct hw_perf_event fake_event = event->hw; - - if (event->pmu && event->pmu != &pmu) - return 0; - - return x86_schedule_event(cpuc, &fake_event) >= 0; -} - +/* + * validate a single event group + * + * validation include: + * - check events are compatible which each other + * - events do not compete for the same counter + * - number of events <= number of counters + * + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ static int validate_group(struct perf_event *event) { - struct perf_event *sibling, *leader = event->group_leader; - struct cpu_hw_events fake_pmu; + struct perf_event *leader = event->group_leader; + struct cpu_hw_events fake_cpuc; + int n; - memset(&fake_pmu, 0, sizeof(fake_pmu)); + memset(&fake_cpuc, 0, sizeof(fake_cpuc)); - if (!validate_event(&fake_pmu, leader)) + /* + * the event is not yet connected with its + * siblings therefore we must first collect + * existing siblings, then add the new event + * before we can simulate the scheduling + */ + n = collect_events(&fake_cpuc, leader, true); + if (n < 0) return -ENOSPC; - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(&fake_pmu, sibling)) - return -ENOSPC; - } - - if (!validate_event(&fake_pmu, event)) + fake_cpuc.n_events = n; + n = collect_events(&fake_cpuc, event, false); + if (n < 0) return -ENOSPC; - return 0; + fake_cpuc.n_events = n; + + return x86_schedule_events(&fake_cpuc, n, NULL); } const struct pmu *hw_perf_event_init(struct perf_event *event) From 8113070d6639d2245c6c79afb8df42cedab30540 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 21 Jan 2010 17:39:01 +0200 Subject: [PATCH 130/640] perf_events: Add fast-path to the rescheduling code Implement correct fastpath scheduling, i.e., reuse previous assignment. Signed-off-by: Stephane Eranian [ split from larger patch] Signed-off-by: Peter Zijlstra LKML-Reference: <4b588464.1818d00a.4456.383b@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 91 +++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 995ac4ae379c..0bd23d01af34 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1244,6 +1244,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) constraints[i]); } + /* + * fastpath, try to reuse previous register + */ + for (i = 0, num = n; i < n; i++, num--) { + hwc = &cpuc->event_list[i]->hw; + c = (unsigned long *)constraints[i]; + + /* never assigned */ + if (hwc->idx == -1) + break; + + /* constraint still honored */ + if (!test_bit(hwc->idx, c)) + break; + + /* not already used */ + if (test_bit(hwc->idx, used_mask)) + break; + +#if 0 + pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n", + smp_processor_id(), + hwc->config, + hwc->idx, + assign ? 'y' : 'n'); +#endif + + set_bit(hwc->idx, used_mask); + if (assign) + assign[i] = hwc->idx; + } + if (!num) + goto done; + + /* + * begin slow path + */ + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + /* * weight = number of possible counters * @@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (x86_pmu.num_events_fixed) wmax++; - num = n; - for (w = 1; num && w <= wmax; w++) { + for (w = 1, num = n; num && w <= wmax; w++) { /* for each event */ - for (i = 0; i < n; i++) { + for (i = 0; num && i < n; i++) { c = (unsigned long *)constraints[i]; hwc = &cpuc->event_list[i]->hw; @@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (weight != w) continue; - /* - * try to reuse previous assignment - * - * This is possible despite the fact that - * events or events order may have changed. - * - * What matters is the level of constraints - * of an event and this is constant for now. - * - * This is possible also because we always - * scan from most to least constrained. Thus, - * if a counter can be reused, it means no, - * more constrained events, needed it. And - * next events will either compete for it - * (which cannot be solved anyway) or they - * have fewer constraints, and they can use - * another counter. - */ - j = hwc->idx; - if (j != -1 && !test_bit(j, used_mask)) - goto skip; - for_each_bit(j, c, X86_PMC_IDX_MAX) { if (!test_bit(j, used_mask)) break; @@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (j == X86_PMC_IDX_MAX) break; -skip: - set_bit(j, used_mask); #if 0 - pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", + pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n", smp_processor_id(), hwc->config, j, assign ? 'y' : 'n'); #endif + set_bit(j, used_mask); + if (assign) assign[i] = j; num--; } } +done: /* * scheduling failed or is just a simulation, * free resources if necessary @@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, list_for_each_entry(event, &leader->sibling_list, group_entry) { if (!is_x86_event(event) || - event->state == PERF_EVENT_STATE_OFF) + event->state <= PERF_EVENT_STATE_OFF) continue; if (n >= max_count) @@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk) { + /* no constraints, means supports all generic counters */ + bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); } static int x86_event_sched_in(struct perf_event *event, @@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, n1 = 1; list_for_each_entry(sub, &leader->sibling_list, group_entry) { - if (sub->state != PERF_EVENT_STATE_OFF) { + if (sub->state > PERF_EVENT_STATE_OFF) { ret = x86_event_sched_in(sub, cpuctx, cpu); if (ret) goto undo; @@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event) const struct pmu *hw_perf_event_init(struct perf_event *event) { + const struct pmu *tmp; int err; err = __hw_perf_event_init(event); if (!err) { + /* + * we temporarily connect event to its pmu + * such that validate_group() can classify + * it as an x86 event using is_x86_event() + */ + tmp = event->pmu; + event->pmu = &pmu; + if (event->group_leader != event) err = validate_group(event); + + event->pmu = tmp; } if (err) { if (event->destroy) From 502568d563bcc37ac505a83341c0c95b88c015a8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 14:35:46 +0100 Subject: [PATCH 131/640] perf_event: x86: Allocate the fake_cpuc GCC was complaining the stack usage was too large, so allocate the structure. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.411197266@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0bd23d01af34..7bd359a57839 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2606,10 +2606,13 @@ static const struct pmu pmu = { static int validate_group(struct perf_event *event) { struct perf_event *leader = event->group_leader; - struct cpu_hw_events fake_cpuc; - int n; + struct cpu_hw_events *fake_cpuc; + int ret, n; - memset(&fake_cpuc, 0, sizeof(fake_cpuc)); + ret = -ENOMEM; + fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); + if (!fake_cpuc) + goto out; /* * the event is not yet connected with its @@ -2617,18 +2620,24 @@ static int validate_group(struct perf_event *event) * existing siblings, then add the new event * before we can simulate the scheduling */ - n = collect_events(&fake_cpuc, leader, true); + ret = -ENOSPC; + n = collect_events(fake_cpuc, leader, true); if (n < 0) - return -ENOSPC; + goto out_free; - fake_cpuc.n_events = n; - n = collect_events(&fake_cpuc, event, false); + fake_cpuc->n_events = n; + n = collect_events(fake_cpuc, event, false); if (n < 0) - return -ENOSPC; + goto out_free; - fake_cpuc.n_events = n; + fake_cpuc->n_events = n; - return x86_schedule_events(&fake_cpuc, n, NULL); + ret = x86_schedule_events(fake_cpuc, n, NULL); + +out_free: + kfree(fake_cpuc); +out: + return ret; } const struct pmu *hw_perf_event_init(struct perf_event *event) From 81269a085669b5130058a0275aa7ba9f94abd1fa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 14:55:22 +0100 Subject: [PATCH 132/640] perf_event: x86: Fixup constraints typing issue Constraints gets defined an u64 but in long quantities and then cast to long. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.504916780@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7bd359a57839..7e181a5097ea 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1232,7 +1232,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) int i, j , w, num; int weight, wmax; unsigned long *c; - u64 constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct hw_perf_event *hwc; @@ -1249,7 +1249,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) */ for (i = 0, num = n; i < n; i++, num--) { hwc = &cpuc->event_list[i]->hw; - c = (unsigned long *)constraints[i]; + c = constraints[i]; /* never assigned */ if (hwc->idx == -1) @@ -1306,7 +1306,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) for (w = 1, num = n; num && w <= wmax; w++) { /* for each event */ for (i = 0; num && i < n; i++) { - c = (unsigned long *)constraints[i]; + c = constraints[i]; hwc = &cpuc->event_list[i]->hw; weight = bitmap_weight(c, X86_PMC_IDX_MAX); From c91e0f5da81c6f3a611a1bd6d0cca6717c90fdab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 15:25:59 +0100 Subject: [PATCH 133/640] perf_event: x86: Clean up some of the u64/long bitmask casting We need this to be u64 for direct assigment, but the bitmask functions all work on unsigned long, leading to cast heaven, solve this by using a union. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.595961269@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 47 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7e181a5097ea..921bbf732e77 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -69,10 +69,11 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) - struct event_constraint { - u64 idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)]; + union { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 idxmsk64[1]; + }; int code; int cmask; }; @@ -90,13 +91,14 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ - .code = (c), \ - .cmask = (m), \ - .idxmsk[0] = (n) } +#define EVENT_CONSTRAINT(c, n, m) { \ + { .idxmsk64[0] = (n) }, \ + .code = (c), \ + .cmask = (m), \ +} #define EVENT_CONSTRAINT_END \ - { .code = 0, .cmask = 0, .idxmsk[0] = 0 } + EVENT_CONSTRAINT(0, 0, 0) #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->cmask; (e)++) @@ -126,8 +128,11 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - void (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk); - void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); + void (*get_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event, + unsigned long *idxmsk); + void (*put_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); const struct event_constraint *event_constraints; }; @@ -2144,14 +2149,11 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } -static struct event_constraint bts_constraint = { - .code = 0, - .cmask = 0, - .idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS -}; +static struct event_constraint bts_constraint = + EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); static int intel_special_constraints(struct perf_event *event, - u64 *idxmsk) + unsigned long *idxmsk) { unsigned int hw_event; @@ -2171,14 +2173,14 @@ static int intel_special_constraints(struct perf_event *event, static void intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, - u64 *idxmsk) + unsigned long *idxmsk) { const struct event_constraint *c; /* * cleanup bitmask */ - bitmap_zero((unsigned long *)idxmsk, X86_PMC_IDX_MAX); + bitmap_zero(idxmsk, X86_PMC_IDX_MAX); if (intel_special_constraints(event, idxmsk)) return; @@ -2186,10 +2188,7 @@ static void intel_get_event_constraints(struct cpu_hw_events *cpuc, if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { if ((event->hw.config & c->cmask) == c->code) { - - bitmap_copy((unsigned long *)idxmsk, - (unsigned long *)c->idxmsk, - X86_PMC_IDX_MAX); + bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX); return; } } @@ -2200,10 +2199,10 @@ static void intel_get_event_constraints(struct cpu_hw_events *cpuc, static void amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, - u64 *idxmsk) + unsigned long *idxmsk) { /* no constraints, means supports all generic counters */ - bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); + bitmap_fill(idxmsk, x86_pmu.num_events); } static int x86_event_sched_in(struct perf_event *event, From 8433be1184e4f22c37d4b8ed36cde529a47882f4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 15:38:26 +0100 Subject: [PATCH 134/640] perf_event: x86: Reduce some overly long lines with some MACROs Introduce INTEL_EVENT_CONSTRAINT and FIXED_EVENT_CONSTRAINT to reduce some line length and typing work. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.688730371@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 68 +++++++++++++++++--------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 921bbf732e77..4d1ed101c10d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -97,6 +97,12 @@ struct cpu_hw_events { .cmask = (m), \ } +#define INTEL_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) + +#define FIXED_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -192,12 +198,12 @@ static u64 p6_pmu_raw_event(u64 hw_event) static struct event_constraint intel_p6_event_constraints[] = { - EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK), /* FLOPS */ - EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ EVENT_CONSTRAINT_END }; @@ -217,41 +223,41 @@ static const u64 intel_perfmon_event_map[] = static struct event_constraint intel_core_event_constraints[] = { - EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ - EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ - EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ - EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */ - EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */ - EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */ - EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_nehalem_event_constraints[] = { - EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ - EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ - EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */ - EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */ - EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */ - EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */ - EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */ - EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */ - EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */ - EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_gen_event_constraints[] = { - EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ - EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ EVENT_CONSTRAINT_END }; From 9f41699ed067fa695faff8e2e9981b2550abec62 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 15:59:29 +0100 Subject: [PATCH 135/640] bitops: Provide compile time HWEIGHT{8,16,32,64} Provide compile time versions of hweight. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner LKML-Reference: <20100122155535.797688466@chello.nl> [ Remove some whitespace damage while we are at it ] Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index c05a29cb9bb2..ba0fd1eb4af7 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -25,7 +25,7 @@ static __inline__ int get_bitmask_order(unsigned int count) { int order; - + order = fls(count); return order; /* We could be slightly more clever with -1 here... */ } @@ -33,7 +33,7 @@ static __inline__ int get_bitmask_order(unsigned int count) static __inline__ int get_count_order(unsigned int count) { int order; - + order = fls(count) - 1; if (count & (count - 1)) order++; @@ -45,6 +45,20 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } +#define HWEIGHT8(w) \ + ( (!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ + (!!((w) & (1ULL << 7))) ) + +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8)) +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16)) +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32)) + /** * rol32 - rotate a 32-bit value left * @word: value to rotate From 63b146490befc027a7e0923e333269e68b20d380 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 16:32:17 +0100 Subject: [PATCH 136/640] perf_event: x86: Optimize the constraint searching bits Instead of copying bitmasks around, pass pointers to the constraint structure. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.887853503@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 75 +++++++++++++++----------------- 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4d1ed101c10d..092ad566734c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -134,12 +134,14 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - void (*get_event_constraints)(struct cpu_hw_events *cpuc, - struct perf_event *event, - unsigned long *idxmsk); + + struct event_constraint * + (*get_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); + void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); - const struct event_constraint *event_constraints; + struct event_constraint *event_constraints; }; static struct x86_pmu x86_pmu __read_mostly; @@ -1242,17 +1244,15 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { int i, j , w, num; int weight, wmax; - unsigned long *c; - unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); for (i = 0; i < n; i++) { - x86_pmu.get_event_constraints(cpuc, - cpuc->event_list[i], - constraints[i]); + constraints[i] = + x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); } /* @@ -1267,7 +1267,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) break; /* constraint still honored */ - if (!test_bit(hwc->idx, c)) + if (!test_bit(hwc->idx, c->idxmsk)) break; /* not already used */ @@ -1320,11 +1320,11 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) c = constraints[i]; hwc = &cpuc->event_list[i]->hw; - weight = bitmap_weight(c, X86_PMC_IDX_MAX); + weight = bitmap_weight(c->idxmsk, X86_PMC_IDX_MAX); if (weight != w) continue; - for_each_bit(j, c, X86_PMC_IDX_MAX) { + for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { if (!test_bit(j, used_mask)) break; } @@ -2155,11 +2155,13 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } +static struct event_constraint unconstrained; + static struct event_constraint bts_constraint = EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); -static int intel_special_constraints(struct perf_event *event, - unsigned long *idxmsk) +static struct event_constraint * +intel_special_constraints(struct perf_event *event) { unsigned int hw_event; @@ -2169,46 +2171,34 @@ static int intel_special_constraints(struct perf_event *event, x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && (event->hw.sample_period == 1))) { - bitmap_copy((unsigned long *)idxmsk, - (unsigned long *)bts_constraint.idxmsk, - X86_PMC_IDX_MAX); - return 1; + return &bts_constraint; } - return 0; + return NULL; } -static void intel_get_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event, - unsigned long *idxmsk) +static struct event_constraint * +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - const struct event_constraint *c; + struct event_constraint *c; - /* - * cleanup bitmask - */ - bitmap_zero(idxmsk, X86_PMC_IDX_MAX); - - if (intel_special_constraints(event, idxmsk)) - return; + c = intel_special_constraints(event); + if (c) + return c; if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) { - bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX); - return; - } + if ((event->hw.config & c->cmask) == c->code) + return c; } } - /* no constraints, means supports all generic counters */ - bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); + + return &unconstrained; } -static void amd_get_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event, - unsigned long *idxmsk) +static struct event_constraint * +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - /* no constraints, means supports all generic counters */ - bitmap_fill(idxmsk, x86_pmu.num_events); + return &unconstrained; } static int x86_event_sched_in(struct perf_event *event, @@ -2576,6 +2566,9 @@ void __init init_hw_perf_events(void) perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); + unconstrained = (struct event_constraint) + EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0); + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.event_bits); pr_info("... generic registers: %d\n", x86_pmu.num_events); From 272d30be622c9c6cbd514b1211ff359292001baa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 16:32:17 +0100 Subject: [PATCH 137/640] perf_event: x86: Optimize constraint weight computation Add a weight member to the constraint structure and avoid recomputing the weight at runtime. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.963944926@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 092ad566734c..2c22ce4fa784 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -76,6 +77,7 @@ struct event_constraint { }; int code; int cmask; + int weight; }; struct cpu_hw_events { @@ -95,6 +97,7 @@ struct cpu_hw_events { { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ + .weight = HWEIGHT64((u64)(n)), \ } #define INTEL_EVENT_CONSTRAINT(c, n) \ @@ -1242,8 +1245,7 @@ static inline int is_x86_event(struct perf_event *event) static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - int i, j , w, num; - int weight, wmax; + int i, j, w, num, wmax; struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct hw_perf_event *hwc; @@ -1320,8 +1322,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) c = constraints[i]; hwc = &cpuc->event_list[i]->hw; - weight = bitmap_weight(c->idxmsk, X86_PMC_IDX_MAX); - if (weight != w) + if (c->weight != w) continue; for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { From c933c1a603d5bf700ddce79216c1be0ec3bc0e6c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 16:40:12 +0100 Subject: [PATCH 138/640] perf_event: x86: Optimize the fast path a little more Remove num from the fast path and save a few ops. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155536.056430539@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2c22ce4fa784..33c889ff21ae 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1245,9 +1245,9 @@ static inline int is_x86_event(struct perf_event *event) static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - int i, j, w, num, wmax; struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int i, j, w, wmax, num = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); @@ -1260,7 +1260,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* * fastpath, try to reuse previous register */ - for (i = 0, num = n; i < n; i++, num--) { + for (i = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; c = constraints[i]; @@ -1288,7 +1288,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (assign) assign[i] = hwc->idx; } - if (!num) + if (i == n) goto done; /* From 6c9687abeb24d5b7aae7db5be070c2139ad29e29 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jan 2010 11:57:25 +0100 Subject: [PATCH 139/640] perf_event: x86: Optimize x86_pmu_disable() x86_pmu_disable() removes the event from the cpuc->event_list[], however since an event can only be on that list once, stop looking after we found it. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 33c889ff21ae..66de282ad2fb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1884,6 +1884,7 @@ static void x86_pmu_disable(struct perf_event *event) cpuc->event_list[i-1] = cpuc->event_list[i]; --cpuc->n_events; + break; } } perf_event_update_userpage(event); From 184f412c3341cd24fbd26604634a5800b83dbdc3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Jan 2010 08:39:39 +0100 Subject: [PATCH 140/640] perf, x86: Clean up event constraints code a bit - Remove stray debug code - Improve ugly macros a bit - Remove some whitespace damage - (Also fix up some accumulated damage in perf_event.h) Signed-off-by: Ingo Molnar Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: --- arch/x86/kernel/cpu/perf_event.c | 37 +++++++------------------------- include/linux/perf_event.h | 24 ++++++++++----------- 2 files changed, 19 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66de282ad2fb..fdbe24842271 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -93,24 +93,19 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ +#define EVENT_CONSTRAINT(c, n, m) { \ { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = HWEIGHT64((u64)(n)), \ } -#define INTEL_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define INTEL_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define FIXED_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) -#define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) +#define EVENT_CONSTRAINT_END EVENT_CONSTRAINT(0, 0, 0) -#define EVENT_CONSTRAINT_END \ - EVENT_CONSTRAINT(0, 0, 0) - -#define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->cmask; (e)++) +#define for_each_event_constraint(e, c) for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu @@ -1276,14 +1271,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (test_bit(hwc->idx, used_mask)) break; -#if 0 - pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n", - smp_processor_id(), - hwc->config, - hwc->idx, - assign ? 'y' : 'n'); -#endif - set_bit(hwc->idx, used_mask); if (assign) assign[i] = hwc->idx; @@ -1333,14 +1320,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (j == X86_PMC_IDX_MAX) break; -#if 0 - pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n", - smp_processor_id(), - hwc->config, - j, - assign ? 'y' : 'n'); -#endif - set_bit(j, used_mask); if (assign) @@ -2596,9 +2575,9 @@ static const struct pmu pmu = { * validate a single event group * * validation include: - * - check events are compatible which each other - * - events do not compete for the same counter - * - number of events <= number of counters + * - check events are compatible which each other + * - events do not compete for the same counter + * - number of events <= number of counters * * validation ensures the group can be loaded onto the * PMU if it was the only group available. diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 72b2615600d8..953c17731e0d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -290,7 +290,7 @@ struct perf_event_mmap_page { }; #define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) -#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) #define PERF_RECORD_MISC_USER (2 << 0) #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) @@ -356,8 +356,8 @@ enum perf_event_type { * u64 stream_id; * }; */ - PERF_RECORD_THROTTLE = 5, - PERF_RECORD_UNTHROTTLE = 6, + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, /* * struct { @@ -371,10 +371,10 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; - * u32 pid, tid; + * struct perf_event_header header; + * u32 pid, tid; * - * struct read_format values; + * struct read_format values; * }; */ PERF_RECORD_READ = 8, @@ -412,7 +412,7 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * }; */ - PERF_RECORD_SAMPLE = 9, + PERF_RECORD_SAMPLE = 9, PERF_RECORD_MAX, /* non-ABI */ }; @@ -752,8 +752,7 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); extern void perf_event_task_sched_in(struct task_struct *task); -extern void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next); +extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); @@ -853,8 +852,7 @@ extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); -extern void perf_tp_event(int event_id, u64 addr, u64 count, - void *record, int entry_size); +extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags @@ -895,13 +893,13 @@ static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } static inline void -perf_bp_event(struct perf_event *event, void *data) { } +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } From 2e8418736dff9c6fdadb2f87dcc2087cebf32167 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jan 2010 15:58:43 +0100 Subject: [PATCH 141/640] perf_event: x86: Deduplicate the disable code Share the meat of the x86_pmu_disable() code with hw_perf_enable(). Also remove the barrier() from that code, since I could not convince myself we actually need it. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fdbe24842271..07fa0c2faa09 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1401,6 +1401,8 @@ static inline void x86_assign_hw_event(struct perf_event *event, } } +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); + void hw_perf_enable(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1426,13 +1428,7 @@ void hw_perf_enable(void) if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) continue; - x86_pmu.disable(hwc, hwc->idx); - - clear_bit(hwc->idx, cpuc->active_mask); - barrier(); - cpuc->events[hwc->idx] = NULL; - - x86_perf_event_update(event, hwc, hwc->idx); + __x86_pmu_disable(event, cpuc); hwc->idx = -1; } @@ -1822,11 +1818,10 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) event->pending_kill = POLL_IN; } -static void x86_pmu_disable(struct perf_event *event) +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int i, idx = hwc->idx; + int idx = hwc->idx; /* * Must be done before we disable, otherwise the nmi handler @@ -1835,12 +1830,6 @@ static void x86_pmu_disable(struct perf_event *event) clear_bit(idx, cpuc->active_mask); x86_pmu.disable(hwc, idx); - /* - * Make sure the cleared pointer becomes visible before we - * (potentially) free the event: - */ - barrier(); - /* * Drain the remaining delta count out of a event * that we are disabling: @@ -1852,6 +1841,14 @@ static void x86_pmu_disable(struct perf_event *event) intel_pmu_drain_bts_buffer(cpuc); cpuc->events[idx] = NULL; +} + +static void x86_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; + + __x86_pmu_disable(event, cpuc); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { From ed8777fc132e589d48a0ba854fdbb5d8203b58e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:46 +0100 Subject: [PATCH 142/640] perf_events, x86: Fix event constraint masks Since constraints are specified on the event number, not number and unit mask shorten the constraint masks so that we'll actually match something. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100127221121.967610372@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 2 +- arch/x86/kernel/cpu/perf_event.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index dbc082685d52..ff5ede128bae 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -49,7 +49,7 @@ INTEL_ARCH_INV_MASK| \ INTEL_ARCH_EDGE_MASK|\ INTEL_ARCH_UNIT_MASK|\ - INTEL_ARCH_EVENT_MASK) + INTEL_ARCH_EVTSEL_MASK) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 07fa0c2faa09..951213a51489 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -100,12 +100,17 @@ struct cpu_hw_events { .weight = HWEIGHT64((u64)(n)), \ } -#define INTEL_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) -#define FIXED_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) +#define INTEL_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) -#define EVENT_CONSTRAINT_END EVENT_CONSTRAINT(0, 0, 0) +#define FIXED_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) -#define for_each_event_constraint(e, c) for ((e) = (c); (e)->cmask; (e)++) +#define EVENT_CONSTRAINT_END \ + EVENT_CONSTRAINT(0, 0, 0) + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu From 1a6e21f791fe85b40a9ddbafe999ab8ccffc3f78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:47 +0100 Subject: [PATCH 143/640] perf_events, x86: Clean up hw_perf_*_all() implementation Put the recursion avoidance code in the generic hook instead of replicating it in each implementation. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100127221122.057507285@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 59 ++++++++------------------------ 1 file changed, 14 insertions(+), 45 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 951213a51489..cf10839f20ea 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1099,15 +1099,8 @@ static int __hw_perf_event_init(struct perf_event *event) static void p6_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -1118,12 +1111,6 @@ static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) @@ -1135,17 +1122,6 @@ static void amd_pmu_disable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - /* - * ensure we write the disable before we start disabling the - * events proper, so that amd_pmu_enable_event() does the - * right thing. - */ - barrier(); - for (idx = 0; idx < x86_pmu.num_events; idx++) { u64 val; @@ -1166,23 +1142,20 @@ void hw_perf_disable(void) if (!x86_pmu_initialized()) return; - if (cpuc->enabled) - cpuc->n_added = 0; + if (!cpuc->enabled) + return; + + cpuc->n_added = 0; + cpuc->enabled = 0; + barrier(); x86_pmu.disable_all(); } static void p6_pmu_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); unsigned long val; - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -1193,12 +1166,6 @@ static void intel_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { @@ -1217,12 +1184,6 @@ static void amd_pmu_enable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - for (idx = 0; idx < x86_pmu.num_events; idx++) { struct perf_event *event = cpuc->events[idx]; u64 val; @@ -1417,6 +1378,10 @@ void hw_perf_enable(void) if (!x86_pmu_initialized()) return; + + if (cpuc->enabled) + return; + if (cpuc->n_added) { /* * apply assignment obtained either from @@ -1461,6 +1426,10 @@ void hw_perf_enable(void) cpuc->n_added = 0; perf_events_lapic_init(); } + + cpuc->enabled = 1; + barrier(); + x86_pmu.enable_all(); } From 452a339a976e7f782c786eb3f73080401e2fa3a6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:48 +0100 Subject: [PATCH 144/640] perf_events, x86: Implement Intel Westmere support The new Intel documentation includes Westmere arch specific event maps that are significantly different from the Nehalem ones. Add support for this generation. Found the CPUID model numbers on wikipedia. Also ammend some Nehalem constraints, spotted those when looking for the differences between Nehalem and Westmere. Signed-off-by: Peter Zijlstra Cc: Arjan van de Ven Cc: "H. Peter Anvin" Cc: Stephane Eranian LKML-Reference: <20100127221122.151865645@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 124 +++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index cf10839f20ea..3fac0bfc2dee 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -244,18 +244,26 @@ static struct event_constraint intel_core_event_constraints[] = static struct event_constraint intel_nehalem_event_constraints[] = { - FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ - INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ - INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ - INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_westmere_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ EVENT_CONSTRAINT_END }; @@ -286,6 +294,97 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +static __initconst u64 westmere_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -2423,7 +2522,9 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_core_event_constraints; pr_cont("Core2 events, "); break; - case 26: + + case 26: /* 45 nm nehalem, "Bloomfield" */ + case 30: /* 45 nm nehalem, "Lynnfield" */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2437,6 +2538,15 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; + + case 37: /* 32 nm nehalem, "Clarkdale" */ + case 44: /* 32 nm nehalem, "Gulftown" */ + memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = intel_westmere_event_constraints; + pr_cont("Westmere events, "); + break; default: /* * default constraints for v2 and up From 18c01f8abff51e4910cc5ffb4b710e8c6eea60c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:49 +0100 Subject: [PATCH 145/640] perf_events, x86: Remove spurious counter reset from x86_pmu_enable() At enable time the counter might still have a ->idx pointing to a previously occupied location that might now be taken by another event. Resetting the counter at that location with data from this event will destroy the other counter's count. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100127221122.261477183@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3fac0bfc2dee..518eb3e39577 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1762,9 +1762,6 @@ static int x86_pmu_enable(struct perf_event *event) cpuc->n_events = n; cpuc->n_added = n - n0; - if (hwc->idx != -1) - x86_perf_event_set_period(event, hwc, hwc->idx); - return 0; } From 75c9f3284a7ff957829f44baace82406a6354ceb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Jan 2010 09:04:26 +0100 Subject: [PATCH 146/640] perf_events: Fix sample_period transfer on inherit One problem with frequency driven counters is that we cannot predict the rate at which they trigger, therefore we have to start them at period=1, this causes a ramp up effect. However, if we fail to propagate the stable state on fork each new child will have to ramp up again. This can lead to significant artifacts in sample data. Signed-off-by: Peter Zijlstra Cc: eranian@google.com Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1264752266.4283.2121.camel@laptop> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 251fb9552492..53dc2a362111 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5002,8 +5002,15 @@ inherit_event(struct perf_event *parent_event, else child_event->state = PERF_EVENT_STATE_OFF; - if (parent_event->attr.freq) - child_event->hw.sample_period = parent_event->hw.sample_period; + if (parent_event->attr.freq) { + u64 sample_period = parent_event->hw.sample_period; + struct hw_perf_event *hwc = &child_event->hw; + + hwc->sample_period = sample_period; + hwc->last_period = sample_period; + + atomic64_set(&hwc->period_left, sample_period); + } child_event->overflow_handler = parent_event->overflow_handler; From 72b8fa1730207274f6818b47b891ce5dff79287e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 27 Jan 2010 21:05:49 -0200 Subject: [PATCH 147/640] perf top: Exit if specified --vmlinux can't be used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we do lazy loading of symtabs we only will know if the specified vmlinux file is invalid when we actually have a hit in kernel space and then try to load it. So if we get kernel hits and there are _no_ symbols in the DSO backing the kernel map, bail out. Reported-by: Mike Galbraith Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264633557-17597-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2227b84aa002..78f9c4576a0c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -951,9 +951,31 @@ static void event__process_sample(const event_t *self, } if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || - al.sym == NULL || al.filtered) + al.filtered) return; + if (al.sym == NULL) { + /* + * As we do lazy loading of symtabs we only will know if the + * specified vmlinux file is invalid when we actually have a + * hit in kernel space and then try to load it. So if we get + * here and there are _no_ symbols in the DSO backing the + * kernel map, bail out. + * + * We may never get here, for instance, if we use -K/ + * --hide-kernel-symbols, even if the user specifies an + * invalid --vmlinux ;-) + */ + if (al.map == session->vmlinux_maps[MAP__FUNCTION] && + RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { + pr_err("The %s file can't be used\n", + symbol_conf.vmlinux_name); + exit(1); + } + + return; + } + syme = symbol__priv(al.sym); if (!syme->skip) { syme->count[counter]++; From a19afe46412452fef89cc623873a8931b3685944 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 27 Jan 2010 21:05:50 -0200 Subject: [PATCH 148/640] perf symbols: Factor out dso__load_vmlinux_path() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that we can call it directly from regression tests, and also to reduce the size of dso__load_kernel_sym(), making it more clear. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264633557-17597-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 38 ++++++++++++++++++++++++-------------- tools/perf/util/symbol.h | 2 ++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f1f609dcf9a1..26ec603083e0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1578,6 +1578,27 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return err; } +int dso__load_vmlinux_path(struct dso *self, struct map *map, + struct perf_session *session, symbol_filter_t filter) +{ + int i, err = 0; + + pr_debug("Looking at the vmlinux_path (%d entries long)\n", + vmlinux_path__nr_entries); + + for (i = 0; i < vmlinux_path__nr_entries; ++i) { + err = dso__load_vmlinux(self, map, session, vmlinux_path[i], + filter); + if (err > 0) { + pr_debug("Using %s for symbols\n", vmlinux_path[i]); + dso__set_long_name(self, strdup(vmlinux_path[i])); + break; + } + } + + return err; +} + static int dso__load_kernel_sym(struct dso *self, struct map *map, struct perf_session *session, symbol_filter_t filter) { @@ -1606,20 +1627,9 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, } if (vmlinux_path != NULL) { - int i; - pr_debug("Looking at the vmlinux_path (%d entries long)\n", - vmlinux_path__nr_entries); - for (i = 0; i < vmlinux_path__nr_entries; ++i) { - err = dso__load_vmlinux(self, map, session, - vmlinux_path[i], filter); - if (err > 0) { - pr_debug("Using %s for symbols\n", - vmlinux_path[i]); - dso__set_long_name(self, - strdup(vmlinux_path[i])); - goto out_fixup; - } - } + err = dso__load_vmlinux_path(self, map, session, filter); + if (err > 0) + goto out_fixup; } /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ffe0b0f2e5d3..a94997aeb334 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -129,6 +129,8 @@ struct perf_session; int dso__load(struct dso *self, struct map *map, struct perf_session *session, symbol_filter_t filter); +int dso__load_vmlinux_path(struct dso *self, struct map *map, + struct perf_session *session, symbol_filter_t filter); void dsos__fprintf(FILE *fp); size_t dsos__fprintf_buildid(FILE *fp, bool with_hits); From fd1d908c543fbdfae82839d24b0872c542fceedc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 27 Jan 2010 21:05:51 -0200 Subject: [PATCH 149/640] perf symbols: Split helpers used when creating kernel dso object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make it clear and allow for direct usage by, for instance, regression test suites. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264633557-17597-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 28 +++++++++++++++++++++------- tools/perf/util/symbol.h | 2 ++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 26ec603083e0..f9049d12ead6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1762,24 +1762,38 @@ size_t dsos__fprintf_buildid(FILE *fp, bool with_hits) __dsos__fprintf_buildid(&dsos__user, fp, with_hits)); } +struct dso *dso__new_kernel(const char *name) +{ + struct dso *self = dso__new(name ?: "[kernel.kallsyms]"); + + if (self != NULL) { + self->short_name = "[kernel]"; + self->kernel = 1; + } + + return self; +} + +void dso__read_running_kernel_build_id(struct dso *self) +{ + if (sysfs__read_build_id("/sys/kernel/notes", self->build_id, + sizeof(self->build_id)) == 0) + self->has_build_id = true; +} + static struct dso *dsos__create_kernel(const char *vmlinux) { - struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]"); + struct dso *kernel = dso__new_kernel(vmlinux); if (kernel == NULL) return NULL; - kernel->short_name = "[kernel]"; - kernel->kernel = 1; - vdso = dso__new("[vdso]"); if (vdso == NULL) goto out_delete_kernel_dso; dso__set_loaded(vdso, MAP__FUNCTION); - if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, - sizeof(kernel->build_id)) == 0) - kernel->has_build_id = true; + dso__read_running_kernel_build_id(kernel); dsos__add(&dsos__kernel, kernel); dsos__add(&dsos__user, vdso); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a94997aeb334..124302778c09 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -109,6 +109,7 @@ struct dso { }; struct dso *dso__new(const char *name); +struct dso *dso__new_kernel(const char *name); void dso__delete(struct dso *self); bool dso__loaded(const struct dso *self, enum map_type type); @@ -139,6 +140,7 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); char dso__symtab_origin(const struct dso *self); void dso__set_long_name(struct dso *self, char *name); void dso__set_build_id(struct dso *self, void *build_id); +void dso__read_running_kernel_build_id(struct dso *self); struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type, const char *name); From 64abebf731df87e6f4ae7d9ffc340bdf0c033e44 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 27 Jan 2010 21:05:52 -0200 Subject: [PATCH 150/640] perf session: Create kernel maps in the constructor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing one extra step needed in the tools that need this, fixing a bug in 'perf probe' where this was not being done. Signed-off-by: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1264633557-17597-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-kmem.c | 5 ----- tools/perf/builtin-record.c | 5 ----- tools/perf/builtin-top.c | 5 ----- tools/perf/util/session.c | 13 +++++++++++-- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 7323d9dfbce8..38b8ca900eda 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -491,11 +491,6 @@ static int __cmd_kmem(void) if (!perf_session__has_traces(session, "kmem record")) goto out_delete; - if (perf_session__create_kernel_maps(session) < 0) { - pr_err("Problems creating kernel maps\n"); - return -1; - } - setup_pager(); err = perf_session__process_events(session, &event_ops); if (err != 0) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7bb9ca1b30fa..90345223908c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -477,11 +477,6 @@ static int __cmd_record(int argc, const char **argv) return -1; } - if (perf_session__create_kernel_maps(session) < 0) { - pr_err("Problems creating kernel maps\n"); - return -1; - } - if (!file_new) { err = perf_header__read(&session->header, output); if (err < 0) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 78f9c4576a0c..1fc018e048e1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1191,11 +1191,6 @@ static int __cmd_top(void) if (session == NULL) return -ENOMEM; - if (perf_session__create_kernel_maps(session) < 0) { - pr_err("Problems creating kernel maps\n"); - return -1; - } - if (target_pid != -1) event__synthesize_thread(target_pid, event__process, session); else diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1951e330377c..8e7c1896eaa2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -70,8 +70,17 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc self->unknown_events = 0; map_groups__init(&self->kmaps); - if (mode == O_RDONLY && perf_session__open(self, force) < 0) - goto out_delete; + if (mode == O_RDONLY) { + if (perf_session__open(self, force) < 0) + goto out_delete; + } else if (mode == O_WRONLY) { + /* + * In O_RDONLY mode this will be performed when reading the + * kernel MMAP event, in event__process_mmap(). + */ + if (perf_session__create_kernel_maps(self) < 0) + goto out_delete; + } self->sample_type = perf_header__sample_type(&self->header); out: From 66ddfc62caec65a25fd5a8b20f535a2958ee94e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 28 Jan 2010 20:50:39 +0100 Subject: [PATCH 151/640] mx35: add a missing comma in a pad definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Tim Sander Signed-off-by: Uwe Kleine-König Signed-off-by: Sascha Hauer --- arch/arm/plat-mxc/include/mach/iomux-mx35.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx35.h b/arch/arm/plat-mxc/include/mach/iomux-mx35.h index 00b0ac1db225..c88d40795f7a 100644 --- a/arch/arm/plat-mxc/include/mach/iomux-mx35.h +++ b/arch/arm/plat-mxc/include/mach/iomux-mx35.h @@ -671,7 +671,7 @@ #define MX35_PAD_LD8__SDMA_SDMA_DEBUG_PC_8 IOMUX_PAD(0x634, 0x1d0, 6, 0x0, 0, NO_PAD_CTRL) #define MX35_PAD_LD9__IPU_DISPB_DAT_9 IOMUX_PAD(0x638, 0x1d4, 0, 0x0, 0, NO_PAD_CTRL) -#define MX35_PAD_LD9__GPIO2_9 IOMUX_PAD(0x638, 0x1d4, 5, 0x8e4 0, NO_PAD_CTRL) +#define MX35_PAD_LD9__GPIO2_9 IOMUX_PAD(0x638, 0x1d4, 5, 0x8e4, 0, NO_PAD_CTRL) #define MX35_PAD_LD9__SDMA_SDMA_DEBUG_PC_9 IOMUX_PAD(0x638, 0x1d4, 6, 0x0, 0, NO_PAD_CTRL) #define MX35_PAD_LD10__IPU_DISPB_DAT_10 IOMUX_PAD(0x63c, 0x1d8, 0, 0x0, 0, NO_PAD_CTRL) From 4c574159d03f4d8a136a7adff2d0b1d82cadcb18 Mon Sep 17 00:00:00 2001 From: Thiago Farina Date: Wed, 27 Jan 2010 21:05:55 -0200 Subject: [PATCH 152/640] tools/perf/perf.c: Clean up trivial style issues Checked with: ./../scripts/checkpatch.pl --terse --file perf.c perf.c: 51: ERROR: open brace '{' following function declarations go on the next line perf.c: 73: ERROR: "foo*** bar" should be "foo ***bar" perf.c:112: ERROR: space prohibited before that close parenthesis ')' perf.c:127: ERROR: space prohibited before that close parenthesis ')' perf.c:171: ERROR: "foo** bar" should be "foo **bar" perf.c:213: ERROR: "(foo*)" should be "(foo *)" perf.c:216: ERROR: "(foo*)" should be "(foo *)" perf.c:217: ERROR: space required before that '*' (ctx:OxV) perf.c:452: ERROR: do not initialise statics to 0 or NULL perf.c:453: ERROR: do not initialise statics to 0 or NULL Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Masami Hiramatsu LKML-Reference: <1264633557-17597-7-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/perf.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 05c861c045d5..109b89b30ced 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -48,7 +48,8 @@ int check_pager_config(const char *cmd) return c.val; } -static void commit_pager_choice(void) { +static void commit_pager_choice(void) +{ switch (use_pager) { case 0: setenv("PERF_PAGER", "cat", 1); @@ -70,7 +71,7 @@ static void set_debugfs_path(void) "tracing/events"); } -static int handle_options(const char*** argv, int* argc, int* envchanged) +static int handle_options(const char ***argv, int *argc, int *envchanged) { int handled = 0; @@ -109,7 +110,7 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--perf-dir")) { if (*argc < 2) { - fprintf(stderr, "No directory given for --perf-dir.\n" ); + fprintf(stderr, "No directory given for --perf-dir.\n"); usage(perf_usage_string); } setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); @@ -124,7 +125,7 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--work-tree")) { if (*argc < 2) { - fprintf(stderr, "No directory given for --work-tree.\n" ); + fprintf(stderr, "No directory given for --work-tree.\n"); usage(perf_usage_string); } setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); @@ -168,7 +169,7 @@ static int handle_alias(int *argcp, const char ***argv) { int envchanged = 0, ret = 0, saved_errno = errno; int count, option_count; - const char** new_argv; + const char **new_argv; const char *alias_command; char *alias_string; @@ -210,11 +211,11 @@ static int handle_alias(int *argcp, const char ***argv) if (!strcmp(alias_command, new_argv[0])) die("recursive alias: %s", alias_command); - new_argv = realloc(new_argv, sizeof(char*) * + new_argv = realloc(new_argv, sizeof(char *) * (count + *argcp + 1)); /* insert after command name */ - memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp); - new_argv[count+*argcp] = NULL; + memcpy(new_argv + count, *argv + 1, sizeof(char *) * *argcp); + new_argv[count + *argcp] = NULL; *argv = new_argv; *argcp += count - 1; @@ -450,8 +451,8 @@ int main(int argc, const char **argv) setup_path(); while (1) { - static int done_help = 0; - static int was_alias = 0; + static int done_help; + static int was_alias; was_alias = run_argv(&argc, &argv); if (errno != ENOENT) From 6a1b751fb89b61ef7240f2e3ed65a2e2776e7cfd Mon Sep 17 00:00:00 2001 From: John Kacur Date: Wed, 27 Jan 2010 21:05:54 -0200 Subject: [PATCH 153/640] perf: Ignore perf-archive temp file Tell git to ignore perf-archive. Signed-off-by: John Kacur Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1264633557-17597-6-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 124760bb37b5..e1d60d780784 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -14,6 +14,7 @@ perf*.html common-cmds.h perf.data perf.data.old +perf-archive tags TAGS cscope* From 12b336a8b4ea8652372f88521bf55cef7f5d5283 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Jan 2010 20:43:21 +0000 Subject: [PATCH 154/640] wm97xx_battery: Handle missing platform data gracefully Don't unconditionally dereference the WM97xx core platform data since it may not be present, causing an oops. Signed-off-by: Mark Brown Signed-off-by: Anton Vorontsov --- drivers/power/wm97xx_battery.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c index fa39e759a275..6ea3cb5837c7 100644 --- a/drivers/power/wm97xx_battery.c +++ b/drivers/power/wm97xx_battery.c @@ -175,8 +175,14 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev) dev_err(&dev->dev, "Do not pass platform_data through " "wm97xx_bat_set_pdata!\n"); return -EINVAL; - } else - pdata = wmdata->batt_pdata; + } + + if (!wmdata) { + dev_err(&dev->dev, "No platform data supplied\n"); + return -EINVAL; + } + + pdata = wmdata->batt_pdata; if (dev->id != -1) return -EINVAL; From 7415c7602ddb14a9a9c39bc8c38afa667092a527 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 29 Jan 2010 06:51:08 -0300 Subject: [PATCH 155/640] V4L/DVB: saa7134: remove stray unlock_kernel An earlier commit removed the lock_kernel/unlock_kernel pair but forgot to remove the unlock_kernel call in the cleanup path at the end of the function. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/saa7134/saa7134-empress.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c index 7dfecfc6017c..ee5bff02a92c 100644 --- a/drivers/media/video/saa7134/saa7134-empress.c +++ b/drivers/media/video/saa7134/saa7134-empress.c @@ -93,9 +93,9 @@ static int ts_open(struct file *file) dprintk("open dev=%s\n", video_device_node_name(vdev)); err = -EBUSY; if (!mutex_trylock(&dev->empress_tsq.vb_lock)) - goto done; + return err; if (atomic_read(&dev->empress_users)) - goto done_up; + goto done; /* Unmute audio */ saa_writeb(SAA7134_AUDIO_MUTE_CTRL, @@ -105,10 +105,8 @@ static int ts_open(struct file *file) file->private_data = dev; err = 0; -done_up: - mutex_unlock(&dev->empress_tsq.vb_lock); done: - unlock_kernel(); + mutex_unlock(&dev->empress_tsq.vb_lock); return err; } From 9198bcd39f558dd56823f1c9983e2252fc99a501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 29 Jan 2010 14:20:05 -0800 Subject: [PATCH 156/640] omap: define _toggle_gpio_edge_triggering only for OMAP1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only usage of _toggle_gpio_edge_triggering is in an #ifdef CONFIG_ARCH_OMAP1 block, so only provide it if CONFIG_ARCH_OMAP1 is defined, too. This fixes a compiler warning: arch/arm/plat-omap/gpio.c:758: warning: '_toggle_gpio_edge_triggering' defined but not used when compiling for ARCH_OMAP2, ARCH_OMAP3 or ARCH_OMAP4. Signed-off-by: Uwe Kleine-König Acked-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/gpio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index d17620c50c28..d2422c766cca 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -750,6 +750,7 @@ static inline void set_24xx_gpio_triggering(struct gpio_bank *bank, int gpio, } #endif +#ifdef CONFIG_ARCH_OMAP1 /* * This only applies to chips that can't do both rising and falling edge * detection at once. For all other chips, this function is a noop. @@ -760,11 +761,9 @@ static void _toggle_gpio_edge_triggering(struct gpio_bank *bank, int gpio) u32 l = 0; switch (bank->method) { -#ifdef CONFIG_ARCH_OMAP1 case METHOD_MPUIO: reg += OMAP_MPUIO_GPIO_INT_EDGE; break; -#endif #ifdef CONFIG_ARCH_OMAP15XX case METHOD_GPIO_1510: reg += OMAP1510_GPIO_INT_CONTROL; @@ -787,6 +786,7 @@ static void _toggle_gpio_edge_triggering(struct gpio_bank *bank, int gpio) __raw_writel(l, reg); } +#endif static int _set_gpio_triggering(struct gpio_bank *bank, int gpio, int trigger) { From 643ced9b0b4810b5725910667604f1a373f30f2f Mon Sep 17 00:00:00 2001 From: Li Peng Date: Thu, 28 Jan 2010 01:05:09 +0800 Subject: [PATCH 157/640] drm/i915: don't trigger ironlake vblank interrupt at irq install Zhenyu noticed that the ironlake vblank enabling patch has one issue that it will trigger vblank starting from irq postinstall, this isn't necessary. This patch addresses this issue by only adding the vblank into DEIER but mask them in DEIMR, so that it won't trigger vblank interrupt at irq install. Signed-off-by: Li Peng Acked-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_irq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e7472d82132a..fcd87ad75fec 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1022,14 +1022,13 @@ static int ironlake_irq_postinstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; /* enable kind of interrupts always enabled */ - u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT | - DE_PIPEA_VBLANK | DE_PIPEB_VBLANK; + u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT; u32 render_mask = GT_USER_INTERRUPT; u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG | SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG; dev_priv->irq_mask_reg = ~display_mask; - dev_priv->de_irq_enable_reg = display_mask; + dev_priv->de_irq_enable_reg = display_mask | DE_PIPEA_VBLANK | DE_PIPEB_VBLANK; /* should always can generate irq */ I915_WRITE(DEIIR, I915_READ(DEIIR)); From 2dd873838805a6e84c1afdfbf13e8709bfb5c70f Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 27 Jan 2010 16:32:46 +0800 Subject: [PATCH 158/640] drm/i915: Add support for SDVO composite TV Signed-off-by: Zhao Yakui Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_sdvo.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index eaacfd0920df..82678d30ab06 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2345,6 +2345,14 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags) connector->connector_type = DRM_MODE_CONNECTOR_VGA; intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) | (1 << INTEL_ANALOG_CLONE_BIT); + } else if (flags & SDVO_OUTPUT_CVBS0) { + + sdvo_priv->controlled_output = SDVO_OUTPUT_CVBS0; + encoder->encoder_type = DRM_MODE_ENCODER_TVDAC; + connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO; + sdvo_priv->is_tv = true; + intel_output->needs_tv_clock = true; + intel_output->clone_mask = 1 << INTEL_SDVO_TV_CLONE_BIT; } else if (flags & SDVO_OUTPUT_LVDS0) { sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; From f034b12dbb5749b11e9390e15e93ffa87ece8038 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 21 Jan 2010 15:20:18 +0800 Subject: [PATCH 159/640] drm/i915: Fix the incorrect DMI string for Samsung SX20S laptop Signed-off-by: Zhao Yakui Reported-by: Philipp Kohlbecher Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_lvds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index aa74e59bec61..75a9772061cb 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -611,7 +611,7 @@ static const struct dmi_system_id bad_lid_status[] = { { .ident = "Samsung SX20S", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Phoenix Technologies LTD"), + DMI_MATCH(DMI_SYS_VENDOR, "Samsung Electronics"), DMI_MATCH(DMI_BOARD_NAME, "SX20S"), }, }, From 013d5aa2bbb2ceacba7a0dad7f2a0eb20133323f Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 29 Jan 2010 11:18:31 -0800 Subject: [PATCH 160/640] drm/i915: page flip support for Ironlake This patch adds support for page flipping on Ironlake, which uses different interrupt bits for triggering flip submit IRQs. Signed-off-by: Jesse Barnes [anholt: hand-resolved for rebasing off of render power saving patch] Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_irq.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fcd87ad75fec..50ddf4a95c5e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -309,11 +309,21 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev) if (de_iir & DE_GSE) ironlake_opregion_gse_intr(dev); - if (de_iir & DE_PIPEA_VBLANK) - drm_handle_vblank(dev, 0); + if (de_iir & DE_PLANEA_FLIP_DONE) + intel_prepare_page_flip(dev, 0); - if (de_iir & DE_PIPEB_VBLANK) + if (de_iir & DE_PLANEB_FLIP_DONE) + intel_prepare_page_flip(dev, 1); + + if (de_iir & DE_PIPEA_VBLANK) { + drm_handle_vblank(dev, 0); + intel_finish_page_flip(dev, 0); + } + + if (de_iir & DE_PIPEB_VBLANK) { drm_handle_vblank(dev, 1); + intel_finish_page_flip(dev, 1); + } /* check event from PCH */ if ((de_iir & DE_PCH_EVENT) && @@ -1022,7 +1032,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; /* enable kind of interrupts always enabled */ - u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT; + u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT | + DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE; u32 render_mask = GT_USER_INTERRUPT; u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG | SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG; From df2e615a3b3a66d0731e3309e9731970a6c51268 Mon Sep 17 00:00:00 2001 From: Colin Tuckley Date: Fri, 29 Jan 2010 12:52:55 +0100 Subject: [PATCH 161/640] ARM: 5907/1: ARM: Fix the reset on the RealView PBX Development board Signed-off-by: Colin Tuckley Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mach-realview/realview_pbx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c index a21a4b395f73..d94857eb0690 100644 --- a/arch/arm/mach-realview/realview_pbx.c +++ b/arch/arm/mach-realview/realview_pbx.c @@ -334,8 +334,8 @@ static void realview_pbx_reset(char mode) * in the system FPGA */ __raw_writel(REALVIEW_SYS_LOCK_VAL, lock_ctrl); - __raw_writel(0x0000, reset_ctrl); - __raw_writel(0x0004, reset_ctrl); + __raw_writel(0x00F0, reset_ctrl); + __raw_writel(0x00F4, reset_ctrl); } static void __init realview_pbx_init(void) From c540b9ff0f8679ba924fac072aeb7d63fa473190 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 26 Jan 2010 19:09:48 +0100 Subject: [PATCH 162/640] ARM: 5904/1: ARM: Always generate the IT instruction when compiling for Thumb-2 Current behaviour is to generate the IT instruction only for Thumb-2 code. However, the kernel helpers in entry-armv.S are compiled to ARM in a unified syntax file (if THUMB2_KERNEL). Recent compilers warn about missing IT instruction in unified assembly syntax files. The patch changes the "-mimplicit-it" gas option to "always". Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 9e7582572741..356d702c0808 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -94,7 +94,7 @@ CFLAGS_ABI +=-funwind-tables endif ifeq ($(CONFIG_THUMB2_KERNEL),y) -AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=thumb,-Wa$(comma)-mauto-it) +AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) CFLAGS_THUMB2 :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) AFLAGS_THUMB2 :=$(CFLAGS_THUMB2) -Wa$(comma)-mthumb From 3256a05531b1164a9c138da701b922a113bddf82 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 31 Jan 2010 12:39:50 +0900 Subject: [PATCH 163/640] nilfs2: fix potential leak of dirty data on umount This fixes incorrect usage of nilfs_segctor_confirm() test function in nilfs_segctor_destroy(); nilfs_segctor_confirm() returns zero if the filesystem is not clean, so its use in nilfs_segctor_destroy() needs inversion. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 17584c524486..105b508b47a8 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2829,7 +2829,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); - if (flag || nilfs_segctor_confirm(sci)) + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); WARN_ON(!list_empty(&sci->sc_copied_buffers)); From a8e6f734ce9a79d44ebb296f2a341f435227b34e Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sat, 30 Jan 2010 20:55:41 +0900 Subject: [PATCH 164/640] Revert "perf record: Intercept all events" This reverts commit f5a2c3dce03621b55f84496f58adc2d1a87ca16f. This patch is required for making "perf lock rec" work. The commit f5a2c3dce0 changes write_event() of builtin-record.c . And changed write_event() sometimes doesn't stop with perf lock rec. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: [ that commit also causes perf record to not be Ctrl-C-able, and it's concetually wrong to parse the data at record time (unconditionally - even when not needed), as we eventually want to be able to do zero-copy recording, at least for non-archive recordings. ] Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 90345223908c..eea56910b91c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -113,24 +113,16 @@ static void write_output(void *buf, size_t size) static void write_event(event_t *buf, size_t size) { - size_t processed_size = buf->header.size; - event_t *ev = buf; - - do { - /* - * Add it to the list of DSOs, so that when we finish this - * record session we can pick the available build-ids. - */ - if (ev->header.type == PERF_RECORD_MMAP) { - struct list_head *head = &dsos__user; - if (ev->header.misc == 1) - head = &dsos__kernel; - __dsos__findnew(head, ev->mmap.filename); - } - - ev = ((void *)ev) + ev->header.size; - processed_size += ev->header.size; - } while (processed_size < size); + /* + * Add it to the list of DSOs, so that when we finish this + * record session we can pick the available build-ids. + */ + if (buf->header.type == PERF_RECORD_MMAP) { + struct list_head *head = &dsos__user; + if (buf->mmap.header.misc == 1) + head = &dsos__kernel; + __dsos__findnew(head, buf->mmap.filename); + } write_output(buf, size); } From 86d8d29634de4464d568e7c335c0da6cba64e8ab Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sat, 30 Jan 2010 20:43:23 +0900 Subject: [PATCH 165/640] perf tools: Add __data_loc support This patch is required to test the next patch for perf lock. At 064739bc4b3d7f424b2f25547e6611bcf0132415 , support for the modifier "__data_loc" of format is added. But, when I wanted to parse format of lock_acquired (or some event else), raw_field_ptr() did not returned correct pointer. So I modified raw_field_ptr() like this patch. Then raw_field_ptr() works well. Signed-off-by: Hitoshi Mitake Acked-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Tom Zanussi Cc: Steven Rostedt LKML-Reference: <1264851813-8413-2-git-send-email-mitake@dcl.info.waseda.ac.jp> [ v3: fixed minor stylistic detail ] Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-parse.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c5c32be040bf..c4b3cb8a02b1 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1925,6 +1925,15 @@ void *raw_field_ptr(struct event *event, const char *name, void *data) if (!field) return NULL; + if (field->flags & FIELD_IS_STRING) { + int offset; + + offset = *(int *)(data + field->offset); + offset &= 0xffff; + + return data + offset; + } + return data + field->offset; } From 18e97e06b5fb2d7f6cf272ca07d26d8247db8723 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sat, 30 Jan 2010 20:43:24 +0900 Subject: [PATCH 166/640] perf: Add util/include/linuxhash.h to include hash.h of kernel linux/hash.h, hash header of kernel, is also useful for perf. util/include/linuxhash.h includes linux/hash.h, so we can use hash facilities (e.g. hash_long()) in perf now. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1264851813-8413-3-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/util/include/linux/hash.h | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tools/perf/util/include/linux/hash.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9b173e66fb41..b2bce1fb4ae1 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -333,6 +333,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h +LIB_H += ../../include/linux/hash.h LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h LIB_H += util/include/linux/bitops.h diff --git a/tools/perf/util/include/linux/hash.h b/tools/perf/util/include/linux/hash.h new file mode 100644 index 000000000000..201f57397997 --- /dev/null +++ b/tools/perf/util/include/linux/hash.h @@ -0,0 +1,5 @@ +#include "../../../../include/linux/hash.h" + +#ifndef PERF_HASH_H +#define PERF_HASH_H +#endif From c965be10ca3cb0bdd04016c852764afaf8e647c8 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sat, 30 Jan 2010 20:43:32 +0900 Subject: [PATCH 167/640] perf lock: Enhance information of lock trace events Add wait time and lock identification details. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1264851813-8413-11-git-send-email-mitake@dcl.info.waseda.ac.jp> [ removed the file/line bits as we can do that better via IPs ] Signed-off-by: Ingo Molnar --- include/trace/events/lock.h | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index a870ba125aa8..5c1dcfc16c60 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -20,14 +20,17 @@ TRACE_EVENT(lock_acquire, TP_STRUCT__entry( __field(unsigned int, flags) __string(name, lock->name) + __field(void *, lockdep_addr) ), TP_fast_assign( __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); __assign_str(name, lock->name); + __entry->lockdep_addr = lock; ), - TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + TP_printk("%p %s%s%s", __entry->lockdep_addr, + (__entry->flags & 1) ? "try " : "", (__entry->flags & 2) ? "read " : "", __get_str(name)) ); @@ -40,13 +43,16 @@ TRACE_EVENT(lock_release, TP_STRUCT__entry( __string(name, lock->name) + __field(void *, lockdep_addr) ), TP_fast_assign( __assign_str(name, lock->name); + __entry->lockdep_addr = lock; ), - TP_printk("%s", __get_str(name)) + TP_printk("%p %s", + __entry->lockdep_addr, __get_str(name)) ); #ifdef CONFIG_LOCK_STAT @@ -59,13 +65,16 @@ TRACE_EVENT(lock_contended, TP_STRUCT__entry( __string(name, lock->name) + __field(void *, lockdep_addr) ), TP_fast_assign( __assign_str(name, lock->name); + __entry->lockdep_addr = lock; ), - TP_printk("%s", __get_str(name)) + TP_printk("%p %s", + __entry->lockdep_addr, __get_str(name)) ); TRACE_EVENT(lock_acquired, @@ -75,16 +84,18 @@ TRACE_EVENT(lock_acquired, TP_STRUCT__entry( __string(name, lock->name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) + __field(s64, wait_nsec) + __field(void *, lockdep_addr) ), + TP_fast_assign( __assign_str(name, lock->name); - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; + __entry->wait_nsec = waittime; + __entry->lockdep_addr = lock; ), - TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, - __entry->wait_nsec_rem) + TP_printk("%p %s (%llu ns)", __entry->lockdep_addr, + __get_str(name), + __entry->wait_nsec) ); #endif From 9b5e350c7a46a471d5b452836dbafe9aeaeca435 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sat, 30 Jan 2010 20:43:33 +0900 Subject: [PATCH 168/640] perf lock: Introduce new tool "perf lock", for analyzing lock statistics Adding new subcommand "perf lock" to perf. I have a lot of remaining ToDos, but for now perf lock can already provide minimal functionality for analyzing lock statistics. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1264851813-8413-12-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/builtin-lock.c | 724 ++++++++++++++++++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/perf.c | 1 + 4 files changed, 727 insertions(+) create mode 100644 tools/perf/builtin-lock.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index b2bce1fb4ae1..42969303e20b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -456,6 +456,7 @@ BUILTIN_OBJS += builtin-top.o BUILTIN_OBJS += builtin-trace.o BUILTIN_OBJS += builtin-probe.o BUILTIN_OBJS += builtin-kmem.o +BUILTIN_OBJS += builtin-lock.o PERFLIBS = $(LIB_FILE) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c new file mode 100644 index 000000000000..2b5f88754c26 --- /dev/null +++ b/tools/perf/builtin-lock.c @@ -0,0 +1,724 @@ +#include "builtin.h" +#include "perf.h" + +#include "util/util.h" +#include "util/cache.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/header.h" + +#include "util/parse-options.h" +#include "util/trace-event.h" + +#include "util/debug.h" +#include "util/session.h" + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* based on kernel/lockdep.c */ +#define LOCKHASH_BITS 12 +#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) + +static struct list_head lockhash_table[LOCKHASH_SIZE]; + +#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) +#define lockhashentry(key) (lockhash_table + __lockhashfn((key))) + +#define LOCK_STATE_UNLOCKED 0 /* initial state */ +#define LOCK_STATE_LOCKED 1 + +struct lock_stat { + struct list_head hash_entry; + struct rb_node rb; /* used for sorting */ + + /* FIXME: raw_field_value() returns unsigned long long, + * so address of lockdep_map should be dealed as 64bit. + * Is there more better solution? */ + void *addr; /* address of lockdep_map, used as ID */ + char *name; /* for strcpy(), we cannot use const */ + char *file; + unsigned int line; + + int state; + u64 prev_event_time; /* timestamp of previous event */ + + unsigned int nr_acquired; + unsigned int nr_acquire; + unsigned int nr_contended; + unsigned int nr_release; + + /* these times are in nano sec. */ + u64 wait_time_total; + u64 wait_time_min; + u64 wait_time_max; +}; + +/* build simple key function one is bigger than two */ +#define SINGLE_KEY(member) \ + static int lock_stat_key_ ## member(struct lock_stat *one, \ + struct lock_stat *two) \ + { \ + return one->member > two->member; \ + } + +SINGLE_KEY(nr_acquired) +SINGLE_KEY(nr_contended) +SINGLE_KEY(wait_time_total) +SINGLE_KEY(wait_time_min) +SINGLE_KEY(wait_time_max) + +struct lock_key { + /* + * name: the value for specify by user + * this should be simpler than raw name of member + * e.g. nr_acquired -> acquired, wait_time_total -> wait_total + */ + const char *name; + int (*key)(struct lock_stat*, struct lock_stat*); +}; + +static const char *sort_key = "acquired"; +static int (*compare)(struct lock_stat *, struct lock_stat *); + +#define DEF_KEY_LOCK(name, fn_suffix) \ + { #name, lock_stat_key_ ## fn_suffix } +struct lock_key keys[] = { + DEF_KEY_LOCK(acquired, nr_acquired), + DEF_KEY_LOCK(contended, nr_contended), + DEF_KEY_LOCK(wait_total, wait_time_total), + DEF_KEY_LOCK(wait_min, wait_time_min), + DEF_KEY_LOCK(wait_max, wait_time_max), + + /* extra comparisons much complicated should be here */ + + { NULL, NULL } +}; + +static void select_key(void) +{ + int i; + + for (i = 0; keys[i].name; i++) { + if (!strcmp(keys[i].name, sort_key)) { + compare = keys[i].key; + return; + } + } + + die("Unknown compare key:%s\n", sort_key); +} + +static struct rb_root result; /* place to store sorted data */ + +static void insert_to_result(struct lock_stat *st, + int (*bigger)(struct lock_stat *, + struct lock_stat *)) +{ + struct rb_node **rb = &result.rb_node; + struct rb_node *parent = NULL; + struct lock_stat *p; + + while (*rb) { + p = container_of(*rb, struct lock_stat, rb); + parent = *rb; + + if (bigger(st, p)) + rb = &(*rb)->rb_left; + else + rb = &(*rb)->rb_right; + } + + rb_link_node(&st->rb, parent, rb); + rb_insert_color(&st->rb, &result); +} + +/* returns left most element of result, and erase it */ +static struct lock_stat *pop_from_result(void) +{ + struct rb_node *node = result.rb_node; + + if (!node) + return NULL; + + while (node->rb_left) + node = node->rb_left; + + rb_erase(node, &result); + return container_of(node, struct lock_stat, rb); +} + +static struct lock_stat *lock_stat_findnew(void *addr, const char *name, + const char *file, unsigned int line) +{ + struct list_head *entry = lockhashentry(addr); + struct lock_stat *ret, *new; + + list_for_each_entry(ret, entry, hash_entry) { + if (ret->addr == addr) + return ret; + } + + new = zalloc(sizeof(struct lock_stat)); + if (!new) + goto alloc_failed; + + new->addr = addr; + new->name = zalloc(sizeof(char) * strlen(name) + 1); + if (!new->name) + goto alloc_failed; + strcpy(new->name, name); + new->file = zalloc(sizeof(char) * strlen(file) + 1); + if (!new->file) + goto alloc_failed; + strcpy(new->file, file); + new->line = line; + + /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */ + new->state = LOCK_STATE_UNLOCKED; + new->wait_time_min = ULLONG_MAX; + + list_add(&new->hash_entry, entry); + return new; + +alloc_failed: + die("memory allocation failed\n"); +} + +static char const *input_name = "perf.data"; + +static int profile_cpu = -1; + +struct raw_event_sample { + u32 size; + char data[0]; +}; + +struct trace_acquire_event { + void *addr; + const char *name; + const char *file; + unsigned int line; +}; + +struct trace_acquired_event { + void *addr; + const char *name; + const char *file; + unsigned int line; +}; + +struct trace_contended_event { + void *addr; + const char *name; + const char *file; + unsigned int line; +}; + +struct trace_release_event { + void *addr; + const char *name; + const char *file; + unsigned int line; +}; + +struct trace_lock_handler { + void (*acquire_event)(struct trace_acquire_event *, + struct event *, + int cpu, + u64 timestamp, + struct thread *thread); + + void (*acquired_event)(struct trace_acquired_event *, + struct event *, + int cpu, + u64 timestamp, + struct thread *thread); + + void (*contended_event)(struct trace_contended_event *, + struct event *, + int cpu, + u64 timestamp, + struct thread *thread); + + void (*release_event)(struct trace_release_event *, + struct event *, + int cpu, + u64 timestamp, + struct thread *thread); +}; + +static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event, + struct event *__event __used, + int cpu __used, + u64 timestamp, + struct thread *thread __used) +{ + struct lock_stat *st; + + st = lock_stat_findnew(acquire_event->addr, acquire_event->name, + acquire_event->file, acquire_event->line); + + switch (st->state) { + case LOCK_STATE_UNLOCKED: + break; + case LOCK_STATE_LOCKED: + break; + default: + BUG_ON(1); + break; + } + + st->prev_event_time = timestamp; +} + +static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event, + struct event *__event __used, + int cpu __used, + u64 timestamp, + struct thread *thread __used) +{ + struct lock_stat *st; + + st = lock_stat_findnew(acquired_event->addr, acquired_event->name, + acquired_event->file, acquired_event->line); + + switch (st->state) { + case LOCK_STATE_UNLOCKED: + st->state = LOCK_STATE_LOCKED; + st->nr_acquired++; + break; + case LOCK_STATE_LOCKED: + break; + default: + BUG_ON(1); + break; + } + + st->prev_event_time = timestamp; +} + +static void prof_lock_contended_event(struct trace_contended_event *contended_event, + struct event *__event __used, + int cpu __used, + u64 timestamp, + struct thread *thread __used) +{ + struct lock_stat *st; + + st = lock_stat_findnew(contended_event->addr, contended_event->name, + contended_event->file, contended_event->line); + + switch (st->state) { + case LOCK_STATE_UNLOCKED: + break; + case LOCK_STATE_LOCKED: + st->nr_contended++; + break; + default: + BUG_ON(1); + break; + } + + st->prev_event_time = timestamp; +} + +static void prof_lock_release_event(struct trace_release_event *release_event, + struct event *__event __used, + int cpu __used, + u64 timestamp, + struct thread *thread __used) +{ + struct lock_stat *st; + u64 hold_time; + + st = lock_stat_findnew(release_event->addr, release_event->name, + release_event->file, release_event->line); + + switch (st->state) { + case LOCK_STATE_UNLOCKED: + break; + case LOCK_STATE_LOCKED: + st->state = LOCK_STATE_UNLOCKED; + hold_time = timestamp - st->prev_event_time; + + if (timestamp < st->prev_event_time) { + /* terribly, this can happen... */ + goto end; + } + + if (st->wait_time_min > hold_time) + st->wait_time_min = hold_time; + if (st->wait_time_max < hold_time) + st->wait_time_max = hold_time; + st->wait_time_total += hold_time; + + st->nr_release++; + break; + default: + BUG_ON(1); + break; + } + +end: + st->prev_event_time = timestamp; +} + +/* lock oriented handlers */ +/* TODO: handlers for CPU oriented, thread oriented */ +static struct trace_lock_handler prof_lock_ops = { + .acquire_event = prof_lock_acquire_event, + .acquired_event = prof_lock_acquired_event, + .contended_event = prof_lock_contended_event, + .release_event = prof_lock_release_event, +}; + +static struct trace_lock_handler *trace_handler; + +static void +process_lock_acquire_event(void *data, + struct event *event __used, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used) +{ + struct trace_acquire_event acquire_event; + u64 tmp; /* this is required for casting... */ + + tmp = raw_field_value(event, "lockdep_addr", data); + memcpy(&acquire_event.addr, &tmp, sizeof(void *)); + acquire_event.name = (char *)raw_field_ptr(event, "name", data); + acquire_event.file = (char *)raw_field_ptr(event, "file", data); + acquire_event.line = + (unsigned int)raw_field_value(event, "line", data); + + if (trace_handler->acquire_event) { + trace_handler->acquire_event(&acquire_event, + event, cpu, timestamp, thread); + } +} + +static void +process_lock_acquired_event(void *data, + struct event *event __used, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used) +{ + struct trace_acquired_event acquired_event; + u64 tmp; /* this is required for casting... */ + + tmp = raw_field_value(event, "lockdep_addr", data); + memcpy(&acquired_event.addr, &tmp, sizeof(void *)); + acquired_event.name = (char *)raw_field_ptr(event, "name", data); + acquired_event.file = (char *)raw_field_ptr(event, "file", data); + acquired_event.line = + (unsigned int)raw_field_value(event, "line", data); + + if (trace_handler->acquire_event) { + trace_handler->acquired_event(&acquired_event, + event, cpu, timestamp, thread); + } +} + +static void +process_lock_contended_event(void *data, + struct event *event __used, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used) +{ + struct trace_contended_event contended_event; + u64 tmp; /* this is required for casting... */ + + tmp = raw_field_value(event, "lockdep_addr", data); + memcpy(&contended_event.addr, &tmp, sizeof(void *)); + contended_event.name = (char *)raw_field_ptr(event, "name", data); + contended_event.file = (char *)raw_field_ptr(event, "file", data); + contended_event.line = + (unsigned int)raw_field_value(event, "line", data); + + if (trace_handler->acquire_event) { + trace_handler->contended_event(&contended_event, + event, cpu, timestamp, thread); + } +} + +static void +process_lock_release_event(void *data, + struct event *event __used, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used) +{ + struct trace_release_event release_event; + u64 tmp; /* this is required for casting... */ + + tmp = raw_field_value(event, "lockdep_addr", data); + memcpy(&release_event.addr, &tmp, sizeof(void *)); + release_event.name = (char *)raw_field_ptr(event, "name", data); + release_event.file = (char *)raw_field_ptr(event, "file", data); + release_event.line = + (unsigned int)raw_field_value(event, "line", data); + + if (trace_handler->acquire_event) { + trace_handler->release_event(&release_event, + event, cpu, timestamp, thread); + } +} + +static void +process_raw_event(void *data, int cpu, + u64 timestamp, struct thread *thread) +{ + struct event *event; + int type; + + type = trace_parse_common_type(data); + event = trace_find_event(type); + + if (!strcmp(event->name, "lock_acquire")) + process_lock_acquire_event(data, event, cpu, timestamp, thread); + if (!strcmp(event->name, "lock_acquired")) + process_lock_acquired_event(data, event, cpu, timestamp, thread); + if (!strcmp(event->name, "lock_contended")) + process_lock_contended_event(data, event, cpu, timestamp, thread); + if (!strcmp(event->name, "lock_release")) + process_lock_release_event(data, event, cpu, timestamp, thread); +} + +static int process_sample_event(event_t *event, struct perf_session *session) +{ + struct thread *thread; + struct sample_data data; + + bzero(&data, sizeof(struct sample_data)); + event__parse_sample(event, session->sample_type, &data); + thread = perf_session__findnew(session, data.pid); + + /* + * FIXME: this causes warn on 32bit environment + * because of (void *)data.ip (type of data.ip is u64) + */ +/* dump_printf("(IP, %d): %d/%d: %p period: %llu\n", */ +/* event->header.misc, */ +/* data.pid, data.tid, (void *)data.ip, data.period); */ + + if (thread == NULL) { + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + + if (profile_cpu != -1 && profile_cpu != (int) data.cpu) + return 0; + + process_raw_event(data.raw_data, data.cpu, data.time, thread); + + return 0; +} + +/* TODO: various way to print, coloring, nano or milli sec */ +static void print_result(void) +{ + struct lock_stat *st; + char cut_name[20]; + + printf("%18s ", "ID"); + printf("%20s ", "Name"); + printf("%10s ", "acquired"); + printf("%10s ", "contended"); + + printf("%15s ", "total wait (ns)"); + printf("%15s ", "max wait (ns)"); + printf("%15s ", "min wait (ns)"); + + printf("\n\n"); + + while ((st = pop_from_result())) { + bzero(cut_name, 20); + + printf("%p ", st->addr); + + if (strlen(st->name) < 16) { + /* output raw name */ + printf("%20s ", st->name); + } else { + strncpy(cut_name, st->name, 16); + cut_name[16] = '.'; + cut_name[17] = '.'; + cut_name[18] = '.'; + cut_name[19] = '\0'; + /* cut off name for saving output style */ + printf("%20s ", cut_name); + } + + printf("%10u ", st->nr_acquired); + printf("%10u ", st->nr_contended); + + printf("%15llu ", st->wait_time_total); + printf("%15llu ", st->wait_time_max); + printf("%15llu ", st->wait_time_min == ULLONG_MAX ? + 0 : st->wait_time_min); + printf("\n"); + } +} + +static void dump_map(void) +{ + unsigned int i; + struct lock_stat *st; + + for (i = 0; i < LOCKHASH_SIZE; i++) { + list_for_each_entry(st, &lockhash_table[i], hash_entry) { + printf("%p: %s (src: %s, line: %u)\n", + st->addr, st->name, st->file, st->line); + } + } +} + +static struct perf_event_ops eops = { + .sample = process_sample_event, + .comm = event__process_comm, +}; + +static struct perf_session *session; + +static int read_events(void) +{ + session = perf_session__new(input_name, O_RDONLY, 0); + if (!session) + die("Initializing perf session failed\n"); + + return perf_session__process_events(session, &eops); +} + +static void sort_result(void) +{ + unsigned int i; + struct lock_stat *st; + + for (i = 0; i < LOCKHASH_SIZE; i++) { + list_for_each_entry(st, &lockhash_table[i], hash_entry) { + insert_to_result(st, compare); + } + } +} + +static void __cmd_prof(void) +{ + setup_pager(); + select_key(); + read_events(); + sort_result(); + print_result(); +} + +static const char * const prof_usage[] = { + "perf sched prof []", + NULL +}; + +static const struct option prof_options[] = { + OPT_STRING('k', "key", &sort_key, "acquired", + "key for sorting"), + /* TODO: type */ + OPT_END() +}; + +static const char * const lock_usage[] = { + "perf lock [] {record|trace|prof}", + NULL +}; + +static const struct option lock_options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() +}; + +static const char *record_args[] = { + "record", + "-a", + "-R", + "-M", + "-f", + "-m", "1024", + "-c", "1", + "-e", "lock:lock_acquire:r", + "-e", "lock:lock_acquired:r", + "-e", "lock:lock_contended:r", + "-e", "lock:lock_release:r", +}; + +static int __cmd_record(int argc, const char **argv) +{ + unsigned int rec_argc, i, j; + const char **rec_argv; + + rec_argc = ARRAY_SIZE(record_args) + argc - 1; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + + for (i = 0; i < ARRAY_SIZE(record_args); i++) + rec_argv[i] = strdup(record_args[i]); + + for (j = 1; j < (unsigned int)argc; j++, i++) + rec_argv[i] = argv[j]; + + BUG_ON(i != rec_argc); + + return cmd_record(i, rec_argv, NULL); +} + +int cmd_lock(int argc, const char **argv, const char *prefix __used) +{ + unsigned int i; + + symbol__init(); + for (i = 0; i < LOCKHASH_SIZE; i++) + INIT_LIST_HEAD(lockhash_table + i); + + argc = parse_options(argc, argv, lock_options, lock_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (!argc) + usage_with_options(lock_usage, lock_options); + + if (!strncmp(argv[0], "rec", 3)) { + return __cmd_record(argc, argv); + } else if (!strncmp(argv[0], "prof", 4)) { + trace_handler = &prof_lock_ops; + if (argc) { + argc = parse_options(argc, argv, + prof_options, prof_usage, 0); + if (argc) + usage_with_options(prof_usage, prof_options); + } + __cmd_prof(); + } else if (!strcmp(argv[0], "trace")) { + /* Aliased to 'perf trace' */ + return cmd_trace(argc, argv, prefix); + } else if (!strcmp(argv[0], "map")) { + /* recycling prof_lock_ops */ + trace_handler = &prof_lock_ops; + setup_pager(); + read_events(); + dump_map(); + } else { + usage_with_options(lock_usage, lock_options); + } + + return 0; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index dee97cfe3794..10fe49e7048a 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -31,5 +31,6 @@ extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_probe(int argc, const char **argv, const char *prefix); extern int cmd_kmem(int argc, const char **argv, const char *prefix); +extern int cmd_lock(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 109b89b30ced..57cb107c1f13 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -303,6 +303,7 @@ static void handle_internal_command(int argc, const char **argv) { "sched", cmd_sched, 0 }, { "probe", cmd_probe, 0 }, { "kmem", cmd_kmem, 0 }, + { "lock", cmd_lock, 0 }, }; unsigned int i; static const char ext[] = STRIP_EXTENSION; From 59f411b62c9282891274e721fea29026b0eda3cc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 31 Jan 2010 08:27:58 +0100 Subject: [PATCH 169/640] perf lock: Clean up various details Fix up a few small stylistic details: - use consistent vertical spacing/alignment - remove line80 artifacts - group some global variables better - remove dead code Plus rename 'prof' to 'report' to make it more in line with other tools, and remove the line/file keying as we really want to use IPs like the other tools do. Signed-off-by: Ingo Molnar Cc: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1264851813-8413-12-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar --- tools/perf/builtin-lock.c | 210 +++++++++++++++----------------------- 1 file changed, 82 insertions(+), 128 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2b5f88754c26..fb9ab2ad3f92 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -32,37 +32,37 @@ static struct list_head lockhash_table[LOCKHASH_SIZE]; #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) -#define LOCK_STATE_UNLOCKED 0 /* initial state */ -#define LOCK_STATE_LOCKED 1 +#define LOCK_STATE_UNLOCKED 0 /* initial state */ +#define LOCK_STATE_LOCKED 1 struct lock_stat { - struct list_head hash_entry; - struct rb_node rb; /* used for sorting */ + struct list_head hash_entry; + struct rb_node rb; /* used for sorting */ - /* FIXME: raw_field_value() returns unsigned long long, + /* + * FIXME: raw_field_value() returns unsigned long long, * so address of lockdep_map should be dealed as 64bit. - * Is there more better solution? */ - void *addr; /* address of lockdep_map, used as ID */ - char *name; /* for strcpy(), we cannot use const */ - char *file; - unsigned int line; + * Is there more better solution? + */ + void *addr; /* address of lockdep_map, used as ID */ + char *name; /* for strcpy(), we cannot use const */ - int state; - u64 prev_event_time; /* timestamp of previous event */ + int state; + u64 prev_event_time; /* timestamp of previous event */ - unsigned int nr_acquired; - unsigned int nr_acquire; - unsigned int nr_contended; - unsigned int nr_release; + unsigned int nr_acquired; + unsigned int nr_acquire; + unsigned int nr_contended; + unsigned int nr_release; /* these times are in nano sec. */ - u64 wait_time_total; - u64 wait_time_min; - u64 wait_time_max; + u64 wait_time_total; + u64 wait_time_min; + u64 wait_time_max; }; /* build simple key function one is bigger than two */ -#define SINGLE_KEY(member) \ +#define SINGLE_KEY(member) \ static int lock_stat_key_ ## member(struct lock_stat *one, \ struct lock_stat *two) \ { \ @@ -81,12 +81,15 @@ struct lock_key { * this should be simpler than raw name of member * e.g. nr_acquired -> acquired, wait_time_total -> wait_total */ - const char *name; - int (*key)(struct lock_stat*, struct lock_stat*); + const char *name; + int (*key)(struct lock_stat*, struct lock_stat*); }; -static const char *sort_key = "acquired"; -static int (*compare)(struct lock_stat *, struct lock_stat *); +static const char *sort_key = "acquired"; + +static int (*compare)(struct lock_stat *, struct lock_stat *); + +static struct rb_root result; /* place to store sorted data */ #define DEF_KEY_LOCK(name, fn_suffix) \ { #name, lock_stat_key_ ## fn_suffix } @@ -116,11 +119,8 @@ static void select_key(void) die("Unknown compare key:%s\n", sort_key); } -static struct rb_root result; /* place to store sorted data */ - static void insert_to_result(struct lock_stat *st, - int (*bigger)(struct lock_stat *, - struct lock_stat *)) + int (*bigger)(struct lock_stat *, struct lock_stat *)) { struct rb_node **rb = &result.rb_node; struct rb_node *parent = NULL; @@ -155,8 +155,7 @@ static struct lock_stat *pop_from_result(void) return container_of(node, struct lock_stat, rb); } -static struct lock_stat *lock_stat_findnew(void *addr, const char *name, - const char *file, unsigned int line) +static struct lock_stat *lock_stat_findnew(void *addr, const char *name) { struct list_head *entry = lockhashentry(addr); struct lock_stat *ret, *new; @@ -175,11 +174,6 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name, if (!new->name) goto alloc_failed; strcpy(new->name, name); - new->file = zalloc(sizeof(char) * strlen(file) + 1); - if (!new->file) - goto alloc_failed; - strcpy(new->file, file); - new->line = line; /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */ new->state = LOCK_STATE_UNLOCKED; @@ -197,36 +191,28 @@ static char const *input_name = "perf.data"; static int profile_cpu = -1; struct raw_event_sample { - u32 size; - char data[0]; + u32 size; + char data[0]; }; struct trace_acquire_event { - void *addr; - const char *name; - const char *file; - unsigned int line; + void *addr; + const char *name; }; struct trace_acquired_event { - void *addr; - const char *name; - const char *file; - unsigned int line; + void *addr; + const char *name; }; struct trace_contended_event { - void *addr; - const char *name; - const char *file; - unsigned int line; + void *addr; + const char *name; }; struct trace_release_event { - void *addr; - const char *name; - const char *file; - unsigned int line; + void *addr; + const char *name; }; struct trace_lock_handler { @@ -255,7 +241,8 @@ struct trace_lock_handler { struct thread *thread); }; -static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event, +static void +report_lock_acquire_event(struct trace_acquire_event *acquire_event, struct event *__event __used, int cpu __used, u64 timestamp, @@ -263,8 +250,7 @@ static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event, { struct lock_stat *st; - st = lock_stat_findnew(acquire_event->addr, acquire_event->name, - acquire_event->file, acquire_event->line); + st = lock_stat_findnew(acquire_event->addr, acquire_event->name); switch (st->state) { case LOCK_STATE_UNLOCKED: @@ -279,7 +265,8 @@ static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event, st->prev_event_time = timestamp; } -static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event, +static void +report_lock_acquired_event(struct trace_acquired_event *acquired_event, struct event *__event __used, int cpu __used, u64 timestamp, @@ -287,8 +274,7 @@ static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event { struct lock_stat *st; - st = lock_stat_findnew(acquired_event->addr, acquired_event->name, - acquired_event->file, acquired_event->line); + st = lock_stat_findnew(acquired_event->addr, acquired_event->name); switch (st->state) { case LOCK_STATE_UNLOCKED: @@ -305,7 +291,8 @@ static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event st->prev_event_time = timestamp; } -static void prof_lock_contended_event(struct trace_contended_event *contended_event, +static void +report_lock_contended_event(struct trace_contended_event *contended_event, struct event *__event __used, int cpu __used, u64 timestamp, @@ -313,8 +300,7 @@ static void prof_lock_contended_event(struct trace_contended_event *contended_ev { struct lock_stat *st; - st = lock_stat_findnew(contended_event->addr, contended_event->name, - contended_event->file, contended_event->line); + st = lock_stat_findnew(contended_event->addr, contended_event->name); switch (st->state) { case LOCK_STATE_UNLOCKED: @@ -330,7 +316,8 @@ static void prof_lock_contended_event(struct trace_contended_event *contended_ev st->prev_event_time = timestamp; } -static void prof_lock_release_event(struct trace_release_event *release_event, +static void +report_lock_release_event(struct trace_release_event *release_event, struct event *__event __used, int cpu __used, u64 timestamp, @@ -339,8 +326,7 @@ static void prof_lock_release_event(struct trace_release_event *release_event, struct lock_stat *st; u64 hold_time; - st = lock_stat_findnew(release_event->addr, release_event->name, - release_event->file, release_event->line); + st = lock_stat_findnew(release_event->addr, release_event->name); switch (st->state) { case LOCK_STATE_UNLOCKED: @@ -373,11 +359,11 @@ end: /* lock oriented handlers */ /* TODO: handlers for CPU oriented, thread oriented */ -static struct trace_lock_handler prof_lock_ops = { - .acquire_event = prof_lock_acquire_event, - .acquired_event = prof_lock_acquired_event, - .contended_event = prof_lock_contended_event, - .release_event = prof_lock_release_event, +static struct trace_lock_handler report_lock_ops = { + .acquire_event = report_lock_acquire_event, + .acquired_event = report_lock_acquired_event, + .contended_event = report_lock_contended_event, + .release_event = report_lock_release_event, }; static struct trace_lock_handler *trace_handler; @@ -395,14 +381,9 @@ process_lock_acquire_event(void *data, tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&acquire_event.addr, &tmp, sizeof(void *)); acquire_event.name = (char *)raw_field_ptr(event, "name", data); - acquire_event.file = (char *)raw_field_ptr(event, "file", data); - acquire_event.line = - (unsigned int)raw_field_value(event, "line", data); - if (trace_handler->acquire_event) { - trace_handler->acquire_event(&acquire_event, - event, cpu, timestamp, thread); - } + if (trace_handler->acquire_event) + trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread); } static void @@ -418,14 +399,9 @@ process_lock_acquired_event(void *data, tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&acquired_event.addr, &tmp, sizeof(void *)); acquired_event.name = (char *)raw_field_ptr(event, "name", data); - acquired_event.file = (char *)raw_field_ptr(event, "file", data); - acquired_event.line = - (unsigned int)raw_field_value(event, "line", data); - if (trace_handler->acquire_event) { - trace_handler->acquired_event(&acquired_event, - event, cpu, timestamp, thread); - } + if (trace_handler->acquire_event) + trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread); } static void @@ -441,14 +417,9 @@ process_lock_contended_event(void *data, tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&contended_event.addr, &tmp, sizeof(void *)); contended_event.name = (char *)raw_field_ptr(event, "name", data); - contended_event.file = (char *)raw_field_ptr(event, "file", data); - contended_event.line = - (unsigned int)raw_field_value(event, "line", data); - if (trace_handler->acquire_event) { - trace_handler->contended_event(&contended_event, - event, cpu, timestamp, thread); - } + if (trace_handler->acquire_event) + trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread); } static void @@ -464,14 +435,9 @@ process_lock_release_event(void *data, tmp = raw_field_value(event, "lockdep_addr", data); memcpy(&release_event.addr, &tmp, sizeof(void *)); release_event.name = (char *)raw_field_ptr(event, "name", data); - release_event.file = (char *)raw_field_ptr(event, "file", data); - release_event.line = - (unsigned int)raw_field_value(event, "line", data); - if (trace_handler->acquire_event) { - trace_handler->release_event(&release_event, - event, cpu, timestamp, thread); - } + if (trace_handler->acquire_event) + trace_handler->release_event(&release_event, event, cpu, timestamp, thread); } static void @@ -503,14 +469,6 @@ static int process_sample_event(event_t *event, struct perf_session *session) event__parse_sample(event, session->sample_type, &data); thread = perf_session__findnew(session, data.pid); - /* - * FIXME: this causes warn on 32bit environment - * because of (void *)data.ip (type of data.ip is u64) - */ -/* dump_printf("(IP, %d): %d/%d: %p period: %llu\n", */ -/* event->header.misc, */ -/* data.pid, data.tid, (void *)data.ip, data.period); */ - if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); @@ -580,15 +538,14 @@ static void dump_map(void) for (i = 0; i < LOCKHASH_SIZE; i++) { list_for_each_entry(st, &lockhash_table[i], hash_entry) { - printf("%p: %s (src: %s, line: %u)\n", - st->addr, st->name, st->file, st->line); + printf("%p: %s\n", st->addr, st->name); } } } static struct perf_event_ops eops = { - .sample = process_sample_event, - .comm = event__process_comm, + .sample = process_sample_event, + .comm = event__process_comm, }; static struct perf_session *session; @@ -614,7 +571,7 @@ static void sort_result(void) } } -static void __cmd_prof(void) +static void __cmd_report(void) { setup_pager(); select_key(); @@ -623,12 +580,12 @@ static void __cmd_prof(void) print_result(); } -static const char * const prof_usage[] = { - "perf sched prof []", +static const char * const report_usage[] = { + "perf lock report []", NULL }; -static const struct option prof_options[] = { +static const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting"), /* TODO: type */ @@ -636,17 +593,14 @@ static const struct option prof_options[] = { }; static const char * const lock_usage[] = { - "perf lock [] {record|trace|prof}", + "perf lock [] {record|trace|report}", NULL }; static const struct option lock_options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_BOOLEAN('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), + OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_END() }; @@ -698,21 +652,21 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used) if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); - } else if (!strncmp(argv[0], "prof", 4)) { - trace_handler = &prof_lock_ops; + } else if (!strncmp(argv[0], "report", 6)) { + trace_handler = &report_lock_ops; if (argc) { argc = parse_options(argc, argv, - prof_options, prof_usage, 0); + report_options, report_usage, 0); if (argc) - usage_with_options(prof_usage, prof_options); + usage_with_options(report_usage, report_options); } - __cmd_prof(); + __cmd_report(); } else if (!strcmp(argv[0], "trace")) { /* Aliased to 'perf trace' */ return cmd_trace(argc, argv, prefix); } else if (!strcmp(argv[0], "map")) { - /* recycling prof_lock_ops */ - trace_handler = &prof_lock_ops; + /* recycling report_lock_ops */ + trace_handler = &report_lock_ops; setup_pager(); read_events(); dump_map(); From d2f6650a950dadd20667a04a9dc785f240d43695 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 29 Jan 2010 17:48:51 +0100 Subject: [PATCH 170/640] ACPI: Add NULL pointer check in acpi_bus_start If acpi_bus_add does not return a device and it's passed to acpi_bus_start, bad things will happen: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] acpi_bus_start+0x14/0x24 ... [] acpiphp_bus_add+0xba/0x130 [acpiphp] [] enable_device+0x132/0x2ff [acpiphp] [] acpiphp_enable_slot+0xb8/0x130 [acpiphp] [] handle_hotplug_event_func+0x87/0x190 [acpiphp] Next patch would make this NULL pointer check obsolete, but better having one more than one missing... Signed-off-by: Thomas Renninger Acked-by: Bjorn Helgaas CC: stable@kernel.org Signed-off-by: Len Brown --- drivers/acpi/scan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ff9f6226085d..8044583f3034 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1357,6 +1357,9 @@ int acpi_bus_start(struct acpi_device *device) { struct acpi_bus_ops ops; + if (!device) + return -EINVAL; + memset(&ops, 0, sizeof(ops)); ops.acpi_op_start = 1; From 7779688fc3d1ceddad84846a7b0affbe8e78ec6e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 29 Jan 2010 17:48:52 +0100 Subject: [PATCH 171/640] ACPI: acpi_bus_{scan,bus,add}: return -ENODEV if no device was found Callers (acpi_memhotplug.c, dock.c and others) check for the return value of acpi_bus_add() and assume a valid device was returned in case zero was returned. Thus return -ENODEV if no device was found in acpi_bus_scan and propagate this through acpi_bus_add and acpi_bus_start. Also remove a confusing comment in acpiphp_glue.c, acpi_bus_scan will and cannot invoke if acpi_bus_add returns no valid device. Signed-off-by: Thomas Renninger Acked-by: Bjorn Helgaas Signed-off-by: Len Brown --- drivers/acpi/scan.c | 24 +++++++++++++++++++----- drivers/pci/hotplug/acpiphp_glue.c | 6 ------ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 8044583f3034..3e009674f333 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1336,9 +1336,25 @@ static int acpi_bus_scan(acpi_handle handle, struct acpi_bus_ops *ops, if (child) *child = device; - return 0; + + if (device) + return 0; + else + return -ENODEV; } +/* + * acpi_bus_add and acpi_bus_start + * + * scan a given ACPI tree and (probably recently hot-plugged) + * create and add or starts found devices. + * + * If no devices were found -ENODEV is returned which does not + * mean that this is a real error, there just have been no suitable + * ACPI objects in the table trunk from which the kernel could create + * a device and add/start an appropriate driver. + */ + int acpi_bus_add(struct acpi_device **child, struct acpi_device *parent, acpi_handle handle, int type) @@ -1348,8 +1364,7 @@ acpi_bus_add(struct acpi_device **child, memset(&ops, 0, sizeof(ops)); ops.acpi_op_add = 1; - acpi_bus_scan(handle, &ops, child); - return 0; + return acpi_bus_scan(handle, &ops, child); } EXPORT_SYMBOL(acpi_bus_add); @@ -1363,8 +1378,7 @@ int acpi_bus_start(struct acpi_device *device) memset(&ops, 0, sizeof(ops)); ops.acpi_op_start = 1; - acpi_bus_scan(device->handle, &ops, NULL); - return 0; + return acpi_bus_scan(device->handle, &ops, NULL); } EXPORT_SYMBOL(acpi_bus_start); diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 8e952fdab764..cb2fd01eddae 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -720,12 +720,6 @@ static int acpiphp_bus_add(struct acpiphp_func *func) -ret_val); goto acpiphp_bus_add_out; } - /* - * try to start anyway. We could have failed to add - * simply because this bus had previously been added - * on another add. Don't bother with the return value - * we just keep going. - */ ret_val = acpi_bus_start(device); acpiphp_bus_add_out: From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 27 Jan 2010 16:25:22 -0600 Subject: [PATCH 172/640] softlockup: Add sched_clock_tick() to avoid kernel warning on kgdb resume When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets the time from hardware such as the TSC on x86. In this configuration kgdb will report a softlock warning message on resuming or detaching from a debug session. Sequence of events in the problem case: 1) "cpu sched clock" and "hardware time" are at 100 sec prior to a call to kgdb_handle_exception() 2) Debugger waits in kgdb_handle_exception() for 80 sec and on exit the following is called ... touch_softlockup_watchdog() --> __raw_get_cpu_var(touch_timestamp) = 0; 3) "cpu sched clock" = 100s (it was not updated, because the interrupt was disabled in kgdb) but the "hardware time" = 180 sec 4) The first timer interrupt after resuming from kgdb_handle_exception updates the watchdog from the "cpu sched clock" update_process_times() { ... run_local_timers() --> softlockup_tick() --> check (touch_timestamp == 0) (it is "YES" here, we have set "touch_timestamp = 0" at kgdb) --> __touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp" to "get_timestamp()" (Here, the "touch_timestamp" will still be set to 100s.) ... scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched clock" to "hardware time" = 180s) ... } 5) The Second timer interrupt handler appears to have a large jump and trips the softlockup warning. update_process_times() { ... run_local_timers() --> softlockup_tick() --> "cpu sched clock" - "touch_timestamp" = 180s-100s > 60s --> printk "soft lockup error messages" ... } note: ***(A) reset "touch_timestamp" to "get_timestamp(this_cpu)" Why is "touch_timestamp" 100 sec, instead of 180 sec? When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of get_timestamp() is: get_timestamp(this_cpu) -->cpu_clock(this_cpu) -->sched_clock_cpu(this_cpu) -->__update_sched_clock(sched_clock_data, now) The __update_sched_clock() function uses the GTOD tick value to create a window to normalize the "now" values. So if "now" value is too big for sched_clock_data, it will be ignored. The fix is to invoke sched_clock_tick() to update "cpu sched clock" in order to recover from this state. This is done by introducing the function touch_softlockup_watchdog_sync(). This allows kgdb to request that the sched clock is updated when the watchdog thread runs the first time after a resume from kgdb. [yong.zhang0@gmail.com: Use per cpu instead of an array] Signed-off-by: Jason Wessel Signed-off-by: Dongdong Deng Cc: kgdb-bugreport@lists.sourceforge.net Cc: peterz@infradead.org LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ kernel/kgdb.c | 6 +++--- kernel/softlockup.c | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7bba93929b..89232151a9d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); +extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, void __user *buffer, @@ -323,6 +324,9 @@ static inline void softlockup_tick(void) static inline void touch_softlockup_watchdog(void) { } +static inline void touch_softlockup_watchdog_sync(void) +{ +} static inline void touch_all_softlockup_watchdogs(void) { } diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 2eb517e23514..87f2cc557553 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -596,7 +596,7 @@ static void kgdb_wait(struct pt_regs *regs) /* Signal the primary CPU that we are done: */ atomic_set(&cpu_in_kgdb[cpu], 0); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); } @@ -1450,7 +1450,7 @@ acquirelock: (kgdb_info[cpu].task && kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); @@ -1550,7 +1550,7 @@ kgdb_restore: } /* Free kgdb_active */ atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d22579087e27..0d4c7898ab80 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock); static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); +static DEFINE_PER_CPU(bool, softlock_touch_sync); static int __read_mostly did_panic; int __read_mostly softlockup_thresh = 60; @@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void) } EXPORT_SYMBOL(touch_softlockup_watchdog); +void touch_softlockup_watchdog_sync(void) +{ + __raw_get_cpu_var(softlock_touch_sync) = true; + __raw_get_cpu_var(softlockup_touch_ts) = 0; +} + void touch_all_softlockup_watchdogs(void) { int cpu; @@ -118,6 +125,14 @@ void softlockup_tick(void) } if (touch_ts == 0) { + if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) { + /* + * If the time stamp was touched atomically + * make sure the scheduler tick is up to date. + */ + per_cpu(softlock_touch_sync, this_cpu) = false; + sched_clock_tick(); + } __touch_softlockup_watchdog(); return; } From dbead405801c8d5aa1bc21ab6e2a47f060d47c06 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 1 Feb 2010 18:50:40 +0100 Subject: [PATCH 173/640] ARM: 5909/1: ARM: Correct the FPSCR bits setting when raising exceptions Commit c98929c07a removed the clearing of the FPSCR[31:28] bits from the vfp_raise_exceptions() function and the new bits are or'ed with the old FPSCR bits leading to unexpected results (the original commit was referring to the cumulative bits - FPSCR[4:0]). Reported-by: Tom Hameenanttila Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/vfp/vfpmodule.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index f60a5400a25b..a63c4be99b36 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -197,10 +197,13 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ } /* - * Update the FPSCR with the additional exception flags. + * If any of the status flags are set, update the FPSCR. * Comparison instructions always return at least one of * these flags set. */ + if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) + fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V); + fpscr |= exceptions; fmxr(FPSCR, fpscr); From 110f82d7a2e0ff5a17617a9672f1ccb7e44bc0c6 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Mon, 18 Jan 2010 22:36:49 +0100 Subject: [PATCH 174/640] firewire: net: fix panic in fwnet_write_complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the transmit path of firewire-net (IPv4 over 1394), the following race condition may occur: - The networking soft IRQ inserts a datagram into the 1394 async request transmit DMA. - The 1394 async transmit completion tasklet runs to finish cleaning up (unlink datagram from list of pending ones, release skb and outbound 1394 transaction object) --- before the networking soft IRQ had a chance to proceed and add the datagram to the list of pending datagrams. This caused a panic in the 1394 async transmit completion tasklet when it dereferenced unitialized list heads: http://bugzilla.kernel.org/show_bug.cgi?id=15077 The fix is to add checks in the tx soft IRQ and in the tasklet to determine which of these two is the last referrer to the transaction object. Then handle the cleanup of the object by the last referrer rather than assuming that the tasklet is always the last one. There is another similar race: Between said tasklet and fwnet_close, i.e. at ifdown. However, that race is much less likely to occur in practice and shall be fixed in a separate update. Reported-by: Илья Басин Signed-off-by: Stefan Richter --- drivers/firewire/net.c | 53 +++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index cbaf420c36c5..2d3dc7ded0a9 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -893,20 +893,31 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context, static struct kmem_cache *fwnet_packet_task_cache; +static void fwnet_free_ptask(struct fwnet_packet_task *ptask) +{ + dev_kfree_skb_any(ptask->skb); + kmem_cache_free(fwnet_packet_task_cache, ptask); +} + static int fwnet_send_packet(struct fwnet_packet_task *ptask); static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) { - struct fwnet_device *dev; + struct fwnet_device *dev = ptask->dev; unsigned long flags; - - dev = ptask->dev; + bool free; spin_lock_irqsave(&dev->lock, flags); - list_del(&ptask->pt_link); - spin_unlock_irqrestore(&dev->lock, flags); - ptask->outstanding_pkts--; /* FIXME access inside lock */ + ptask->outstanding_pkts--; + + /* Check whether we or the networking TX soft-IRQ is last user. */ + free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link)); + + if (ptask->outstanding_pkts == 0) + list_del(&ptask->pt_link); + + spin_unlock_irqrestore(&dev->lock, flags); if (ptask->outstanding_pkts > 0) { u16 dg_size; @@ -951,10 +962,10 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) ptask->max_payload = skb->len + RFC2374_FRAG_HDR_SIZE; } fwnet_send_packet(ptask); - } else { - dev_kfree_skb_any(ptask->skb); - kmem_cache_free(fwnet_packet_task_cache, ptask); } + + if (free) + fwnet_free_ptask(ptask); } static void fwnet_write_complete(struct fw_card *card, int rcode, @@ -977,6 +988,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) unsigned tx_len; struct rfc2734_header *bufhdr; unsigned long flags; + bool free; dev = ptask->dev; tx_len = ptask->max_payload; @@ -1022,12 +1034,16 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) generation, SCODE_100, 0ULL, ptask->skb->data, tx_len + 8, fwnet_write_complete, ptask); - /* FIXME race? */ spin_lock_irqsave(&dev->lock, flags); - list_add_tail(&ptask->pt_link, &dev->broadcasted_list); + + /* If the AT tasklet already ran, we may be last user. */ + free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link)); + if (!free) + list_add_tail(&ptask->pt_link, &dev->broadcasted_list); + spin_unlock_irqrestore(&dev->lock, flags); - return 0; + goto out; } fw_send_request(dev->card, &ptask->transaction, @@ -1035,12 +1051,19 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) ptask->generation, ptask->speed, ptask->fifo_addr, ptask->skb->data, tx_len, fwnet_write_complete, ptask); - /* FIXME race? */ spin_lock_irqsave(&dev->lock, flags); - list_add_tail(&ptask->pt_link, &dev->sent_list); + + /* If the AT tasklet already ran, we may be last user. */ + free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link)); + if (!free) + list_add_tail(&ptask->pt_link, &dev->sent_list); + spin_unlock_irqrestore(&dev->lock, flags); dev->netdev->trans_start = jiffies; + out: + if (free) + fwnet_free_ptask(ptask); return 0; } @@ -1298,6 +1321,8 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) spin_unlock_irqrestore(&dev->lock, flags); ptask->max_payload = max_payload; + INIT_LIST_HEAD(&ptask->pt_link); + fwnet_send_packet(ptask); return NETDEV_TX_OK; From ba9e9f3c08a5b58c1ffacf0cc6fb703ab0fa55ff Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 1 Feb 2010 21:23:46 -0200 Subject: [PATCH 175/640] saa7146: stop DMA before de-allocating DMA scatter/gather page buffers Thanks-to: Hartmut for pointing me the problem and testing the fix. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/saa7146_video.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c index becbaadb3b77..5ed75263340a 100644 --- a/drivers/media/common/saa7146_video.c +++ b/drivers/media/common/saa7146_video.c @@ -1333,9 +1333,9 @@ static void buffer_release(struct videobuf_queue *q, struct videobuf_buffer *vb) DEB_CAP(("vbuf:%p\n",vb)); - release_all_pagetables(dev, buf); - saa7146_dma_free(dev,q,buf); + + release_all_pagetables(dev, buf); } static struct videobuf_queue_ops video_qops = { From 4f48f8b7fd18c44f8478174f9925cc3c059c6ce4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 2 Feb 2010 15:32:09 +0800 Subject: [PATCH 176/640] tracing: Fix circular dead lock in stack trace When we cat /tracing/stack_trace, we may cause circular lock: sys_read() t_start() arch_spin_lock(&max_stack_lock); t_show() seq_printf(), vsnprintf() .... /* they are all trace-able, when they are traced, max_stack_lock may be required again. */ The following script can trigger this circular dead lock very easy: #!/bin/bash echo 1 > /proc/sys/kernel/stack_tracer_enabled mount -t debugfs xxx /mnt > /dev/null 2>&1 ( # make check_stack() zealous to require max_stack_lock for ((; ;)) { echo 1 > /mnt/tracing/stack_max_size } ) & for ((; ;)) { cat /mnt/tracing/stack_trace > /dev/null } To fix this bug, we increase the percpu trace_active before require the lock. Reported-by: Li Zefan Signed-off-by: Lai Jiangshan LKML-Reference: <4B67D4F9.9080905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_stack.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 678a5120ee30..f4bc9b27de5f 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, unsigned long val, flags; char buf[64]; int ret; + int cpu; if (count >= sizeof(buf)) return -EINVAL; @@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, return ret; local_irq_save(flags); + + /* + * In case we trace inside arch_spin_lock() or after (NMI), + * we will cause circular lock, so we also need to increase + * the percpu trace_active here. + */ + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); *ptr = val; arch_spin_unlock(&max_stack_lock); + + per_cpu(trace_active, cpu)--; local_irq_restore(flags); return count; @@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { + int cpu; + local_irq_disable(); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); if (*pos == 0) @@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void t_stop(struct seq_file *m, void *p) { + int cpu; + arch_spin_unlock(&max_stack_lock); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)--; + local_irq_enable(); } From adef477268ff5ddd0195611dc7e26d7a879fefe1 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Tue, 26 Jan 2010 10:26:06 +0100 Subject: [PATCH 177/640] dmaengine: fix memleak in dma_async_device_unregister While debugging a dma driver I noticed a memleak after unloading the driver module. Caught by kmemleak. Signed-off-by: Anatolij Gustschin Cc: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6f51a0a7a8bb..e7a3230fb7d5 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -826,6 +826,7 @@ void dma_async_device_unregister(struct dma_device *device) chan->dev->chan = NULL; mutex_unlock(&dma_list_mutex); device_unregister(&chan->dev->device); + free_percpu(chan->local); } } EXPORT_SYMBOL(dma_async_device_unregister); From ab09809f2eee1dc2d8f8bea636e77d176ba6c648 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Feb 2010 14:38:12 -0800 Subject: [PATCH 178/640] x86, doc: Fix minor spelling error in arch/x86/mm/gup.c Fix minor spelling error in comment. No code change. Signed-off-by: Andy Shevchenko LKML-Reference: <201002022238.o12McDiF018720@imap1.linux-foundation.org> Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bca13cb..738e6593799d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) #else /* * With get_user_pages_fast, we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atoimcally, + * any locks. For this we would like to load the pointers atomically, * but that is not possible (without expensive cmpxchg8b) on PAE. What * we do have is the guarantee that a pte will only either go from not * present to present, or present to not present or both -- it will not From d622b89a2f58613a9c1407b22b02aecdd2187a7c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 30 Jan 2010 23:32:19 +0800 Subject: [PATCH 179/640] ocfs2: Fix memory overflow in cow_by_page. In ocfs2_duplicate_clusters_by_page, we calculate map_end by shifting page_index. But actually in case we meet with a large offset(say in a i686 box, poff_t is only 32 bits and page_index=2056240), we will overflow. So change the type of page_index to loff_t. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/refcounttree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 74db2be75dd6..5b64468de0b0 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2945,7 +2945,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; - map_end = (page_index + 1) << PAGE_CACHE_SHIFT; + map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; if (map_end > end) map_end = end; @@ -3170,7 +3170,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; - map_end = (page_index + 1) << PAGE_CACHE_SHIFT; + map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; if (map_end > end) map_end = end; From 0a1ea437d87af830786605813972e8e277992917 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 1 Feb 2010 17:05:33 +0800 Subject: [PATCH 180/640] ocfs2: Only bug out when page size is larger than cluster size. In CoW, we have to make sure that the page is already written out to the disk. So we have a BUG_ON(PageDirty(page)). In ppc platform we have pagesize=64K, so if the cs=4K, if the file have fragmented clusters, we will map the page many times. See this file as an example. Tree Depth: 0 Count: 19 Next Free Rec: 14 ## Offset Clusters Block# Flags 0 0 4 2164864 0x2 Refcounted 1 4 2 9302792 0x2 Refcounted ... We have to replace the extent recs one by one, so the page with index 0 will be mapped and dirtied twice. I'd like to leave the BUG_ON there while adding a check so that in case we meet with an error in other platforms, we can find it easily. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/refcounttree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 5b64468de0b0..8ae65c9c020c 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2957,8 +2957,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, page = grab_cache_page(mapping, page_index); - /* This page can't be dirtied before we CoW it out. */ - BUG_ON(PageDirty(page)); + /* + * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page + * can't be dirtied before we CoW it out. + */ + if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) + BUG_ON(PageDirty(page)); if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); From 60c486744c9a30ea60fa863e9587242dde2fe4bd Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 3 Feb 2010 09:56:04 +0800 Subject: [PATCH 181/640] ocfs2: Add parenthesis to wrap the check for O_DIRECT. Add parenthesis to wrap the check for O_DIRECT. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 65e9375d2fb3..558ce0312421 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2013,8 +2013,8 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); - if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) || - (file->f_flags & O_DIRECT && has_refcount)) { + if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || + ((file->f_flags & O_DIRECT) && has_refcount)) { ret = filemap_fdatawrite_range(file->f_mapping, pos, pos + count - 1); if (ret < 0) From cd34edd8cf80b507bb84b3f0c2988fe05099ffb5 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 17:58:30 -0800 Subject: [PATCH 182/640] ocfs2/dlm: Handle EAGAIN for compatibility - v2 Mainline commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8 made the dlm_begin_reco_handler() return -EAGAIN instead of EAGAIN. As this error is transmitted over the wire, we want the receiver, dlm_send_begin_reco_message(), to understand both the older EAGAIN and the newer -EAGAIN, to allow rolling upgrade of the cluster nodes. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index cfb2ae9ab538..ad712211d4ea 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2639,7 +2639,13 @@ retry: "begin reco msg (%d)\n", dlm->name, nodenum, ret); ret = 0; } - if (ret == -EAGAIN) { + + /* + * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8, + * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN. + * We are handling both for compatibility reasons. + */ + if (ret == -EAGAIN || ret == EAGAIN) { mlog(0, "%s: trying to start recovery of node " "%u, but node %u is waiting for last recovery " "to complete, backoff for a bit\n", dlm->name, From 34e6c59af06cbca07b1490ec0015ea2d303470d3 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 27 Jan 2010 10:21:52 +0800 Subject: [PATCH 183/640] ocfs2: Use compat_ptr in reflink_arguments. Although we use u64 to pass userspace pointers to the kernel to avoid compat_ioctl, it doesn't work in some ppc platform. So wrap them with compat_ptr and add compat_ioctl. The detailed discussion about compat_ptr can be found in thread http://lkml.org/lkml/2009/10/27/423. We indeed met with a bug when testing on ppc(-EFAULT is returned when using old_path). This patch try to fix this. I have tested in ppc64(with 32 bit reflink) and x86_64(with i686 reflink), both works. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/ioctl.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 31fbb0619510..7d9d9c132cef 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -7,6 +7,7 @@ #include #include +#include #define MLOG_MASK_PREFIX ML_INODE #include @@ -181,6 +182,10 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) #ifdef CONFIG_COMPAT long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) { + bool preserve; + struct reflink_arguments args; + struct inode *inode = file->f_path.dentry->d_inode; + switch (cmd) { case OCFS2_IOC32_GETFLAGS: cmd = OCFS2_IOC_GETFLAGS; @@ -195,8 +200,15 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case OCFS2_IOC_GROUP_EXTEND: case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: - case OCFS2_IOC_REFLINK: break; + case OCFS2_IOC_REFLINK: + if (copy_from_user(&args, (struct reflink_arguments *)arg, + sizeof(args))) + return -EFAULT; + preserve = (args.preserve != 0); + + return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), + compat_ptr(args.new_path), preserve); default: return -ENOIOCTLCMD; } From 7e55a70c5b9a57c12f49c44b0847c9343d4f54e4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 13 Jan 2010 13:33:12 -0700 Subject: [PATCH 184/640] ioat: fix infinite timeout checking in ioat2_quiesce Fix typo in ioat2_quiesce. check 'tmo' is zero, not 'end'. Also applies to 2.6.32.3 Cc: Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 5f7a500e18d0..5cc37afe2bc1 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -249,7 +249,7 @@ int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(chan); while (is_ioat_active(status) || is_ioat_idle(status)) { - if (end && time_after(jiffies, end)) { + if (tmo && time_after(jiffies, end)) { err = -ETIMEDOUT; break; } From 0b94a909eb2e2f6990d05fd486a0cb4902ef1ae7 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Thu, 21 Jan 2010 10:50:02 -0800 Subject: [PATCH 185/640] ocfs2: Fix setting of OCFS2_LOCK_BLOCKED during bast During bast, set the OCFS2_LOCK_BLOCKED flag only if the lock needs to downconverted. Signed-off-by: Wengang Wang Acked-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/dlmglue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 172f4c6ce1be..0cdf63042b76 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -907,8 +907,6 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, assert_spin_locked(&lockres->l_lock); - lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); - if (level > lockres->l_blocking) { /* only schedule a downconvert if we haven't already scheduled * one that goes low enough to satisfy the level we're @@ -921,6 +919,9 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, lockres->l_blocking = level; } + if (needs_downconvert) + lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); + mlog_exit(needs_downconvert); return needs_downconvert; } From a19128260107f951d1b4c421cf98b92f8092b069 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 21 Jan 2010 10:50:03 -0800 Subject: [PATCH 186/640] ocfs2: Prevent a livelock in dlmglue There is possibility of a livelock in __ocfs2_cluster_lock(). If a node were to get an ast for an upconvert request, followed immediately by a bast, there is a small window where the fs may downconvert the lock before the process requesting the upconvert is able to take the lock. This patch adds a new flag to indicate that the upconvert is still in progress and that the dc thread should not downconvert it right now. Wengang Wang and Joel Becker contributed heavily to this patch. Reported-by: David Teigland Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlmglue.c | 49 +++++++++++++++++++++++++++++++++++++++++++--- fs/ocfs2/ocfs2.h | 4 ++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0cdf63042b76..85d7c490755b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); lockres->l_level = lockres->l_requested; + + /* + * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing + * the OCFS2_LOCK_BUSY flag to prevent the dc thread from + * downconverting the lock before the upconvert has fully completed. + */ + lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); + lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); mlog_exit_void(); @@ -1134,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, mlog_entry_void(); spin_lock_irqsave(&lockres->l_lock, flags); lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); + lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); if (convert) lockres->l_action = OCFS2_AST_INVALID; else @@ -1324,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, again: wait = 0; + spin_lock_irqsave(&lockres->l_lock, flags); + if (catch_signals && signal_pending(current)) { ret = -ERESTARTSYS; - goto out; + goto unlock; } - spin_lock_irqsave(&lockres->l_lock, flags); - mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, "Cluster lock called on freeing lockres %s! flags " "0x%lx\n", lockres->l_name, lockres->l_flags); @@ -1347,6 +1356,25 @@ again: goto unlock; } + if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { + /* + * We've upconverted. If the lock now has a level we can + * work with, we take it. If, however, the lock is not at the + * required level, we go thru the full cycle. One way this could + * happen is if a process requesting an upconvert to PR is + * closely followed by another requesting upconvert to an EX. + * If the process requesting EX lands here, we want it to + * continue attempting to upconvert and let the process + * requesting PR take the lock. + * If multiple processes request upconvert to PR, the first one + * here will take the lock. The others will have to go thru the + * OCFS2_LOCK_BLOCKED check to ensure that there is no pending + * downconvert request. + */ + if (level <= lockres->l_level) + goto update_holders; + } + if (lockres->l_flags & OCFS2_LOCK_BLOCKED && !ocfs2_may_continue_on_blocked_lock(lockres, level)) { /* is the lock is currently blocked on behalf of @@ -1417,11 +1445,14 @@ again: goto again; } +update_holders: /* Ok, if we get here then we're good to go. */ ocfs2_inc_holders(lockres, level); ret = 0; unlock: + lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); + spin_unlock_irqrestore(&lockres->l_lock, flags); out: /* @@ -3402,6 +3433,18 @@ recheck: goto leave; } + /* + * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is + * set when the ast is received for an upconvert just before the + * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast + * on the heels of the ast, we want to delay the downconvert just + * enough to allow the up requestor to do its task. Because this + * lock is in the blocked queue, the lock will be downconverted + * as soon as the requestor is done with the lock. + */ + if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) + goto leave_requeue; + /* if we're blocking an exclusive and we have *any* holders, * then requeue. */ if ((lockres->l_blocking == DLM_LOCK_EX) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9362eea7424b..740f448041e2 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -136,6 +136,10 @@ enum ocfs2_unlock_action { #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a call to dlm_lock. Only exists with BUSY set. */ +#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread + * from downconverting + * before the upconvert + * has completed */ struct ocfs2_lock_res_ops; From 0d74125a6a68d4f1969ecaf0b3543f315916ccdc Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 29 Jan 2010 09:44:11 -0800 Subject: [PATCH 187/640] ocfs2: Do not downconvert if the lock level is already compatible During upconvert, if the master were to send a BAST, dlmglue will detect the upconversion in process and send a cancel convert to the master. Upon receiving the AST for the cancel convert, it will re-process the lock resource to determine whether it needs downconverting. Say, the up was from PR to EX and the BAST was for EX. After the cancel convert, it will need to downconvert to NL. However, if the node was originally upconverting from NL to EX, then there would be no reason to downconvert (assuming the same message sequence). This patch makes dlmglue consider the possibility that the current lock level is already compatible and that downconverting is not required. Joel Becker assisted in fixing this issue. Fixes ossbz#1178 http://oss.oracle.com/bugzilla/show_bug.cgi?id=1178 Reported-by: Coly Li Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlmglue.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 85d7c490755b..ac24f49ae2fb 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3445,6 +3445,19 @@ recheck: if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) goto leave_requeue; + /* + * How can we block and yet be at NL? We were trying to upconvert + * from NL and got canceled. The code comes back here, and now + * we notice and clear BLOCKING. + */ + if (lockres->l_level == DLM_LOCK_NL) { + BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); + lockres->l_blocking = DLM_LOCK_NL; + lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); + spin_unlock_irqrestore(&lockres->l_lock, flags); + goto leave; + } + /* if we're blocking an exclusive and we have *any* holders, * then requeue. */ if ((lockres->l_blocking == DLM_LOCK_EX) From db0f6ce69776370232431eb8be85a5b18b0019c0 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 1 Feb 2010 16:55:50 -0800 Subject: [PATCH 188/640] ocfs2: Remove overzealous BUG_ON during blocked lock processing During blocked lock processing, we should consider the possibility that the lock is no longer blocking. Joel Becker assisted in fixing this issue. Reported-by: David Teigland Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlmglue.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ac24f49ae2fb..ce8e061c9a22 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3392,9 +3392,17 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, spin_lock_irqsave(&lockres->l_lock, flags); - BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); - recheck: + /* + * Is it still blocking? If not, we have no more work to do. + */ + if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { + BUG_ON(lockres->l_blocking != DLM_LOCK_NL); + spin_unlock_irqrestore(&lockres->l_lock, flags); + ret = 0; + goto leave; + } + if (lockres->l_flags & OCFS2_LOCK_BUSY) { /* XXX * This is a *big* race. The OCFS2_LOCK_PENDING flag From b8f46c5a34fa64fd456295388d18f50ae69d9f37 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 3 Feb 2010 11:53:14 +0800 Subject: [PATCH 189/640] perf tools: Use O_LARGEFILE to open perf data file Open perf data file with O_LARGEFILE flag since its size is easily larger that 2G. For example: # rm -rf perf.data # ./perf kmem record sleep 300 [ perf record: Woken up 0 times to write data ] [ perf record: Captured and wrote 3142.147 MB perf.data (~137282513 samples) ] # ll -h perf.data -rw------- 1 root root 3.1G ..... Signed-off-by: Xiao Guangrong Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <4B68F32A.9040203@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 ++++- tools/perf/util/header.c | 22 +++++++++++++--------- tools/perf/util/session.c | 5 ++++- tools/perf/util/trace-event-read.c | 4 ++-- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index eea56910b91c..949167efa1ed 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -5,6 +5,9 @@ * (or a CPU, or a PID) into the perf.data output file - for * later analysis via perf report. */ +#define _LARGEFILE64_SOURCE +#define _FILE_OFFSET_BITS 64 + #include "builtin.h" #include "perf.h" @@ -451,7 +454,7 @@ static int __cmd_record(int argc, const char **argv) append_file = 0; } - flags = O_CREAT|O_RDWR; + flags = O_CREAT|O_RDWR|O_LARGEFILE; if (append_file) file_new = 0; else diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2bb2bdb1f456..ed3efd728b41 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,3 +1,6 @@ +#define _LARGEFILE64_SOURCE +#define _FILE_OFFSET_BITS 64 + #include #include #include @@ -382,7 +385,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd) sec_size = sizeof(*feat_sec) * nr_sections; sec_start = self->data_offset + self->data_size; - lseek(fd, sec_start + sec_size, SEEK_SET); + lseek64(fd, sec_start + sec_size, SEEK_SET); if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { struct perf_file_section *trace_sec; @@ -390,9 +393,9 @@ static int perf_header__adds_write(struct perf_header *self, int fd) trace_sec = &feat_sec[idx++]; /* Write trace info */ - trace_sec->offset = lseek(fd, 0, SEEK_CUR); + trace_sec->offset = lseek64(fd, 0, SEEK_CUR); read_tracing_data(fd, attrs, nr_counters); - trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; + trace_sec->size = lseek64(fd, 0, SEEK_CUR) - trace_sec->offset; } @@ -402,17 +405,18 @@ static int perf_header__adds_write(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; /* Write build-ids */ - buildid_sec->offset = lseek(fd, 0, SEEK_CUR); + buildid_sec->offset = lseek64(fd, 0, SEEK_CUR); err = dsos__write_buildid_table(fd); if (err < 0) { pr_debug("failed to write buildid table\n"); goto out_free; } - buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; + buildid_sec->size = lseek64(fd, 0, SEEK_CUR) - + buildid_sec->offset; dsos__cache_build_ids(); } - lseek(fd, sec_start, SEEK_SET); + lseek64(fd, sec_start, SEEK_SET); err = do_write(fd, feat_sec, sec_size); if (err < 0) pr_debug("failed to write feature section\n"); @@ -506,7 +510,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit) pr_debug("failed to write perf header\n"); return err; } - lseek(fd, self->data_offset + self->data_size, SEEK_SET); + lseek64(fd, self->data_offset + self->data_size, SEEK_SET); self->frozen = 1; return 0; @@ -560,7 +564,7 @@ int perf_header__process_sections(struct perf_header *self, int fd, sec_size = sizeof(*feat_sec) * nr_sections; - lseek(fd, self->data_offset + self->data_size, SEEK_SET); + lseek64(fd, self->data_offset + self->data_size, SEEK_SET); if (perf_header__getbuffer64(self, fd, feat_sec, sec_size)) goto out_free; @@ -634,7 +638,7 @@ static int perf_file_section__process(struct perf_file_section *self, struct perf_header *ph, int feat, int fd) { - if (lseek(fd, self->offset, SEEK_SET) < 0) { + if (lseek64(fd, self->offset, SEEK_SET) < 0) { pr_debug("Failed to lseek to %Ld offset for feature %d, " "continuing...\n", self->offset, feat); return 0; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8e7c1896eaa2..cf91d099f0aa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,3 +1,6 @@ +#define _LARGEFILE64_SOURCE +#define _FILE_OFFSET_BITS 64 + #include #include @@ -12,7 +15,7 @@ static int perf_session__open(struct perf_session *self, bool force) { struct stat input_stat; - self->fd = open(self->filename, O_RDONLY); + self->fd = open(self->filename, O_RDONLY|O_LARGEFILE); if (self->fd < 0) { pr_err("failed to open file: %s", self->filename); if (!strcmp(self->filename, "perf.data")) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1744422cafcb..ca3c26d466f3 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -83,7 +83,7 @@ static char *read_string(void) char *str = NULL; int size = 0; int i; - int r; + s64 r; for (;;) { r = read(input_fd, buf, BUFSIZ); @@ -117,7 +117,7 @@ static char *read_string(void) i++; /* move the file descriptor to the end of the string */ - r = lseek(input_fd, -(r - i), SEEK_CUR); + r = lseek64(input_fd, -(r - i), SEEK_CUR); if (r < 0) die("lseek"); From 7823860ca2904d6325eb636b77768f3e8183c861 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 3 Feb 2010 10:18:20 +0100 Subject: [PATCH 190/640] microblaze: Defconfig update There were several changes in Microblaze defconfig that's why is good to update defconfigs. Signed-off-by: Michal Simek --- arch/microblaze/configs/mmu_defconfig | 112 ++++++++++++++++++------ arch/microblaze/configs/nommu_defconfig | 101 +++++++++++++++++---- 2 files changed, 169 insertions(+), 44 deletions(-) diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index bb7c374713ad..6fced1fe3bf0 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.31 -# Thu Sep 24 10:28:50 2009 +# Linux kernel version: 2.6.33-rc6 +# Wed Feb 3 10:02:59 2010 # CONFIG_MICROBLAZE=y # CONFIG_SWAP is not set @@ -19,8 +19,12 @@ CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_GENERIC_GPIO=y CONFIG_GENERIC_CSUM=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y # CONFIG_PCI is not set CONFIG_NO_DMA=y +CONFIG_DTC=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_CONSTRUCTORS=y @@ -44,6 +48,7 @@ CONFIG_SYSVIPC_SYSCTL=y # CONFIG_TREE_RCU=y # CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_TINY_RCU is not set # CONFIG_RCU_TRACE is not set CONFIG_RCU_FANOUT=32 # CONFIG_RCU_FANOUT_EXACT is not set @@ -64,10 +69,12 @@ CONFIG_INITRAMFS_ROOT_GID=0 CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set +# CONFIG_RD_LZO is not set # CONFIG_INITRAMFS_COMPRESSION_NONE is not set CONFIG_INITRAMFS_COMPRESSION_GZIP=y # CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set # CONFIG_INITRAMFS_COMPRESSION_LZMA is not set +# CONFIG_INITRAMFS_COMPRESSION_LZO is not set # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -90,21 +97,20 @@ CONFIG_EVENTFD=y CONFIG_AIO=y # -# Performance Counters +# Kernel Performance Events And Counters # CONFIG_VM_EVENT_COUNTERS=y -# CONFIG_STRIP_ASM_SYMS is not set CONFIG_COMPAT_BRK=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set -# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y # # GCOV-based kernel profiling # -# CONFIG_SLOW_WORK is not set +CONFIG_SLOW_WORK=y # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_BASE_SMALL=1 @@ -123,14 +129,41 @@ CONFIG_LBDAF=y # IO Schedulers # CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set # CONFIG_DEFAULT_DEADLINE is not set CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +# CONFIG_INLINE_SPIN_UNLOCK is not set +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQ is not set +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +# CONFIG_INLINE_READ_UNLOCK is not set +# CONFIG_INLINE_READ_UNLOCK_BH is not set +# CONFIG_INLINE_READ_UNLOCK_IRQ is not set +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +# CONFIG_INLINE_WRITE_UNLOCK is not set +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQ is not set +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +# CONFIG_MUTEX_SPIN_ON_OWNER is not set # CONFIG_FREEZER is not set # @@ -139,11 +172,6 @@ CONFIG_DEFAULT_IOSCHED="cfq" CONFIG_PLATFORM_GENERIC=y CONFIG_OPT_LIB_FUNCTION=y CONFIG_OPT_LIB_ASM=y -CONFIG_ALLOW_EDIT_AUTO=y - -# -# Automatic platform settings from Kconfig.auto -# # # Definitions for MICROBLAZE0 @@ -203,12 +231,11 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_SPLIT_PTLOCK_CPUS=999999 # CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=0 CONFIG_VIRT_TO_BUS=y -CONFIG_HAVE_MLOCK=y -CONFIG_HAVE_MLOCKED_PAGE_BIT=y +# CONFIG_KSM is not set CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 # @@ -289,7 +316,13 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set -# CONFIG_WIRELESS is not set +CONFIG_WIRELESS=y +# CONFIG_CFG80211 is not set +# CONFIG_LIB80211 is not set + +# +# CFG80211 needs to be enabled for MAC80211 +# # CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -313,6 +346,10 @@ CONFIG_OF_DEVICE=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_COW_COMMON is not set # CONFIG_BLK_DEV_LOOP is not set + +# +# DRBD disabled because PROC_FS, INET or CONNECTOR not selected +# # CONFIG_BLK_DEV_NBD is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 @@ -349,7 +386,6 @@ CONFIG_NETDEVICES=y # CONFIG_PHYLIB is not set CONFIG_NET_ETHERNET=y # CONFIG_MII is not set -# CONFIG_ETHOC is not set # CONFIG_DNET is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set @@ -359,12 +395,12 @@ CONFIG_NET_ETHERNET=y # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_KS8842 is not set +# CONFIG_KS8851_MLL is not set CONFIG_XILINX_EMACLITE=y CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y CONFIG_WLAN=y -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +# CONFIG_HOSTAP is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers @@ -408,6 +444,7 @@ CONFIG_SERIAL_UARTLITE=y CONFIG_SERIAL_UARTLITE_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_GRLIB_GAISLER_APBUART is not set CONFIG_UNIX98_PTYS=y # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y @@ -433,7 +470,6 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y # CONFIG_POWER_SUPPLY is not set # CONFIG_HWMON is not set # CONFIG_THERMAL is not set -# CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set # @@ -526,8 +562,6 @@ CONFIG_PROC_FS=y CONFIG_PROC_SYSCTL=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y -CONFIG_TMPFS=y -# CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLB_PAGE is not set # CONFIG_CONFIGFS_FS is not set CONFIG_MISC_FILESYSTEMS=y @@ -638,11 +672,13 @@ CONFIG_NLS_DEFAULT="iso8859-1" # # Kernel hacking # +CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=1024 # CONFIG_MAGIC_SYSRQ is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_UNUSED_SYMBOLS is not set # CONFIG_DEBUG_FS is not set # CONFIG_HEADERS_CHECK is not set @@ -662,6 +698,9 @@ CONFIG_DEBUG_SLAB=y # CONFIG_DEBUG_SLAB_LEAK is not set CONFIG_DEBUG_SPINLOCK=y # CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set @@ -680,10 +719,29 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set # CONFIG_SYSCTL_SYSCALL_CHECK is not set # CONFIG_PAGE_POISONING is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_ENABLE_DEFAULT_TRACERS is not set +# CONFIG_BOOT_TRACER is not set +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_SAMPLES is not set -# CONFIG_KMEMCHECK is not set CONFIG_EARLY_PRINTK=y # CONFIG_HEART_BEAT is not set CONFIG_DEBUG_BOOTMEM=y @@ -694,7 +752,11 @@ CONFIG_DEBUG_BOOTMEM=y # CONFIG_KEYS is not set # CONFIG_SECURITY is not set # CONFIG_SECURITYFS is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set +# CONFIG_DEFAULT_SECURITY_SELINUX is not set +# CONFIG_DEFAULT_SECURITY_SMACK is not set +# CONFIG_DEFAULT_SECURITY_TOMOYO is not set +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" CONFIG_CRYPTO=y # diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig index adb839bab704..ce2da535246a 100644 --- a/arch/microblaze/configs/nommu_defconfig +++ b/arch/microblaze/configs/nommu_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.31 -# Thu Sep 24 10:29:43 2009 +# Linux kernel version: 2.6.33-rc6 +# Wed Feb 3 10:03:21 2010 # CONFIG_MICROBLAZE=y # CONFIG_SWAP is not set @@ -19,8 +19,12 @@ CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_GENERIC_GPIO=y CONFIG_GENERIC_CSUM=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y # CONFIG_PCI is not set CONFIG_NO_DMA=y +CONFIG_DTC=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_CONSTRUCTORS=y @@ -46,6 +50,7 @@ CONFIG_BSD_PROCESS_ACCT_V3=y # CONFIG_TREE_RCU=y # CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_TINY_RCU is not set # CONFIG_RCU_TRACE is not set CONFIG_RCU_FANOUT=32 # CONFIG_RCU_FANOUT_EXACT is not set @@ -81,16 +86,16 @@ CONFIG_EVENTFD=y CONFIG_AIO=y # -# Performance Counters +# Kernel Performance Events And Counters # CONFIG_VM_EVENT_COUNTERS=y -# CONFIG_STRIP_ASM_SYMS is not set CONFIG_COMPAT_BRK=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set +# CONFIG_MMAP_ALLOW_UNINITIALIZED is not set # CONFIG_PROFILING is not set -# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y # # GCOV-based kernel profiling @@ -116,14 +121,41 @@ CONFIG_LBDAF=y # IO Schedulers # CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set # CONFIG_DEFAULT_DEADLINE is not set CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +CONFIG_INLINE_SPIN_UNLOCK=y +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +CONFIG_INLINE_SPIN_UNLOCK_IRQ=y +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +CONFIG_INLINE_READ_UNLOCK=y +# CONFIG_INLINE_READ_UNLOCK_BH is not set +CONFIG_INLINE_READ_UNLOCK_IRQ=y +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +CONFIG_INLINE_WRITE_UNLOCK=y +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +CONFIG_INLINE_WRITE_UNLOCK_IRQ=y +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +# CONFIG_MUTEX_SPIN_ON_OWNER is not set # CONFIG_FREEZER is not set # @@ -132,7 +164,10 @@ CONFIG_DEFAULT_IOSCHED="cfq" CONFIG_PLATFORM_GENERIC=y # CONFIG_SELFMOD is not set # CONFIG_OPT_LIB_FUNCTION is not set -# CONFIG_ALLOW_EDIT_AUTO is not set + +# +# Definitions for MICROBLAZE0 +# CONFIG_KERNEL_BASE_ADDR=0x90000000 CONFIG_XILINX_MICROBLAZE0_FAMILY="virtex5" CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1 @@ -190,7 +225,6 @@ CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=0 CONFIG_VIRT_TO_BUS=y -CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_NOMMU_INITIAL_TRIM_EXCESS=1 # @@ -274,9 +308,6 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_AF_RXRPC is not set CONFIG_WIRELESS=y # CONFIG_CFG80211 is not set -CONFIG_CFG80211_DEFAULT_PS_VALUE=0 -CONFIG_WIRELESS_OLD_REGULATORY=y -# CONFIG_WIRELESS_EXT is not set # CONFIG_LIB80211 is not set # @@ -301,9 +332,9 @@ CONFIG_STANDALONE=y # CONFIG_CONNECTOR is not set CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_TESTS is not set CONFIG_MTD_CONCAT=y CONFIG_MTD_PARTITIONS=y -# CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y # CONFIG_MTD_OF_PARTS is not set @@ -387,6 +418,10 @@ CONFIG_OF_DEVICE=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_COW_COMMON is not set # CONFIG_BLK_DEV_LOOP is not set + +# +# DRBD disabled because PROC_FS, INET or CONNECTOR not selected +# CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 @@ -423,7 +458,6 @@ CONFIG_NETDEVICES=y # CONFIG_PHYLIB is not set CONFIG_NET_ETHERNET=y # CONFIG_MII is not set -# CONFIG_ETHOC is not set # CONFIG_DNET is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set @@ -433,12 +467,12 @@ CONFIG_NET_ETHERNET=y # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_KS8842 is not set +# CONFIG_KS8851_MLL is not set # CONFIG_XILINX_EMACLITE is not set CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y CONFIG_WLAN=y -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +# CONFIG_HOSTAP is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers @@ -482,6 +516,7 @@ CONFIG_SERIAL_UARTLITE=y CONFIG_SERIAL_UARTLITE_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_GRLIB_GAISLER_APBUART is not set CONFIG_UNIX98_PTYS=y # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y @@ -508,7 +543,6 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y # CONFIG_POWER_SUPPLY is not set # CONFIG_HWMON is not set # CONFIG_THERMAL is not set -# CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set # @@ -616,7 +650,6 @@ CONFIG_INOTIFY_USER=y CONFIG_PROC_FS=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y -# CONFIG_TMPFS is not set # CONFIG_HUGETLB_PAGE is not set # CONFIG_CONFIGFS_FS is not set CONFIG_MISC_FILESYSTEMS=y @@ -672,11 +705,13 @@ CONFIG_MSDOS_PARTITION=y # # Kernel hacking # +CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=1024 # CONFIG_MAGIC_SYSRQ is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set @@ -695,12 +730,16 @@ CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y CONFIG_DEBUG_OBJECTS_FREE=y CONFIG_DEBUG_OBJECTS_TIMERS=y +# CONFIG_DEBUG_OBJECTS_WORK is not set CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set @@ -720,8 +759,28 @@ CONFIG_DEBUG_SG=y # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_PAGE_POISONING is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_ENABLE_DEFAULT_TRACERS is not set +# CONFIG_BOOT_TRACER is not set +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_EARLY_PRINTK=y @@ -734,7 +793,11 @@ CONFIG_EARLY_PRINTK=y # CONFIG_KEYS is not set # CONFIG_SECURITY is not set # CONFIG_SECURITYFS is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set +# CONFIG_DEFAULT_SECURITY_SELINUX is not set +# CONFIG_DEFAULT_SECURITY_SMACK is not set +# CONFIG_DEFAULT_SECURITY_TOMOYO is not set +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" CONFIG_CRYPTO=y # From e402746a945ceb9d0486a8e3d5917c9228fa4404 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 25 Jan 2010 11:20:19 +0000 Subject: [PATCH 191/640] GFS2: Wait for unlock completion on umount This patch adds a wait on umount between the point at which we dispose of all glocks and the point at which we unmount the lock protocol. This ensures that we've received all the replies to our unlock requests before we stop the locking. Signed-off-by: Steven Whitehouse Reported-by: Fabio M. Di Nitto --- fs/gfs2/incore.h | 2 ++ fs/gfs2/lock_dlm.c | 7 ++++++- fs/gfs2/ops_fstype.c | 2 ++ fs/gfs2/super.c | 3 +++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 4792200978c8..bc0ad158e6b4 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -544,6 +544,8 @@ struct gfs2_sbd { struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_trans_gl; + wait_queue_head_t sd_glock_wait; + atomic_t sd_glock_disposal; /* Inode Stuff */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 46df988323bc..cdd0755d7823 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -21,6 +21,7 @@ static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; + struct gfs2_sbd *sdp = gl->gl_sbd; BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -30,6 +31,8 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ kmem_cache_free(gfs2_glock_cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); return; case -DLM_ECANCEL: /* Cancel while getting lock */ ret |= LM_OUT_CANCELED; @@ -167,7 +170,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) { struct gfs2_glock *gl = ptr; - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (gl->gl_lksb.sb_lkid == 0) { @@ -183,6 +187,7 @@ static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) (unsigned long long)gl->gl_name.ln_number, error); return; } + atomic_inc(&sdp->sd_glock_disposal); } static void gdlm_cancel(struct gfs2_glock *gl) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index edfee24f3636..9390fc7d8d40 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -82,6 +82,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) gfs2_tune_init(&sdp->sd_tune); + init_waitqueue_head(&sdp->sd_glock_wait); + atomic_set(&sdp->sd_glock_disposal, 0); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index c282ad41f3d1..66242b32db5b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -860,6 +861,8 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); + /* Wait for dlm to reply to all our unlock requests */ + wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); From 8f05228ee7c8f409ae3c6f9c3e13d7ccb9c18360 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 29 Jan 2010 15:21:27 +0000 Subject: [PATCH 192/640] GFS2: Extend umount wait coverage to full glock lifetime Although all glocks are, by the time of the umount glock wait, scheduled for demotion, some of them haven't made it far enough through the process for the original set of waiting code to wait for them. This extends the ref count to the whole glock lifetime in order to ensure that the waiting does catch all glocks. It does make it a bit more invasive, but it seems the only sensible solution at the moment. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++++ fs/gfs2/glock.h | 2 +- fs/gfs2/lock_dlm.c | 6 +++--- fs/gfs2/ops_fstype.c | 10 +++++++++- fs/gfs2/super.c | 2 -- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f455a03a09e2..f42663325931 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (!gl) return -ENOMEM; + atomic_inc(&sdp->sd_glock_disposal); gl->gl_flags = 0; gl->gl_name = name; atomic_set(&gl->gl_ref, 1); @@ -1538,6 +1539,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) up_write(&gfs2_umount_flush_sem); msleep(10); } + flush_workqueue(glock_workqueue); + wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); + gfs2_dump_lockstate(sdp); } void gfs2_glock_finish_truncate(struct gfs2_inode *ip) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 13f0bd228132..c0262faf4725 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -123,7 +123,7 @@ struct lm_lockops { int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); - void (*lm_put_lock) (struct kmem_cache *cachep, void *gl); + void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); unsigned int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index cdd0755d7823..0e5e0e7022e5 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -167,15 +167,16 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, return LM_OUT_ASYNC; } -static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) +static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) { - struct gfs2_glock *gl = ptr; struct gfs2_sbd *sdp = gl->gl_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (gl->gl_lksb.sb_lkid == 0) { kmem_cache_free(cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); return; } @@ -187,7 +188,6 @@ static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) (unsigned long long)gl->gl_name.ln_number, error); return; } - atomic_inc(&sdp->sd_glock_disposal); } static void gdlm_cancel(struct gfs2_glock *gl) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9390fc7d8d40..8a102f731003 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -985,9 +985,17 @@ static const match_table_t nolock_tokens = { { Opt_err, NULL }, }; +static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + kmem_cache_free(cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); +} + static const struct lm_lockops nolock_ops = { .lm_proto_name = "lock_nolock", - .lm_put_lock = kmem_cache_free, + .lm_put_lock = nolock_put_lock, .lm_tokens = &nolock_tokens, }; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 66242b32db5b..b9dd3da22c0a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -861,8 +861,6 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); - /* Wait for dlm to reply to all our unlock requests */ - wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); From 58424a49cb99c4ad9386b47f885b352476313a02 Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Mon, 1 Feb 2010 06:34:45 -0600 Subject: [PATCH 193/640] microblaze: fix interrupt state restore Interrupts must be disabled while an interrupt state restore (prep for interrupt return) is in progress. Code to do this was lost in the port to the mainline kernel. Signed-off-by: Steven J. Magnani Signed-off-by: Michal Simek --- arch/microblaze/kernel/entry-nommu.S | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S index 95b0855802df..391d6197fc3b 100644 --- a/arch/microblaze/kernel/entry-nommu.S +++ b/arch/microblaze/kernel/entry-nommu.S @@ -122,7 +122,7 @@ ENTRY(_interrupt) ret_from_intr: lwi r11, r1, PT_MODE - bneid r11, 3f + bneid r11, no_intr_resched lwi r6, r31, TS_THREAD_INFO /* get thread info */ lwi r19, r6, TI_FLAGS /* get flags in thread info */ @@ -133,16 +133,18 @@ ret_from_intr: bralid r15, schedule nop 1: andi r11, r19, _TIF_SIGPENDING - beqid r11, no_intr_reshed + beqid r11, no_intr_resched addk r5, r1, r0 addk r7, r0, r0 bralid r15, do_signal addk r6, r0, r0 -no_intr_reshed: +no_intr_resched: + /* Disable interrupts, we are now committed to the state restore */ + disable_irq + /* save mode indicator */ lwi r11, r1, PT_MODE -3: swi r11, r0, PER_CPU(KM) /* save r31 */ From 9f557cd8073104b39528794d44e129331ded649f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Feb 2010 08:27:22 -0500 Subject: [PATCH 194/640] NFS: Fix an Oops when truncating a file The VM/VFS does not allow mapping->a_ops->invalidatepage() to fail. Unfortunately, nfs_wb_page_cancel() may fail if a fatal signal occurs. Since the NFS code assumes that the page stays mapped for as long as the writeback is active, we can end up Oopsing (among other things). The only safe fix here is to convert nfs_wait_on_request(), so as to make it uninterruptible (as is already the case with wait_on_page_writeback()). Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/pagelist.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e2975939126a..a12c45b65dd4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req) kref_put(&req->wb_kref, nfs_free_request); } +static int nfs_wait_bit_uninterruptible(void *word) +{ + io_schedule(); + return 0; +} + /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. @@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req) int nfs_wait_on_request(struct nfs_page *req) { - int ret = 0; - - if (!test_bit(PG_BUSY, &req->wb_flags)) - goto out; - ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, - nfs_wait_bit_killable, TASK_KILLABLE); -out: - return ret; + return wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_uninterruptible, + TASK_UNINTERRUPTIBLE); } /** From 387c149b54b4321cbc790dadbd4f8eedb5a90468 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Feb 2010 08:27:35 -0500 Subject: [PATCH 195/640] NFS: Fix a umount race Ensure that we unregister the bdi before kill_anon_super() calls ida_remove() on our device name. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/super.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ce907efc5508..f1afee4eea77 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -243,6 +243,7 @@ static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs_put_super(struct super_block *); static void nfs_kill_super(struct super_block *); static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); @@ -266,6 +267,7 @@ static const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -335,6 +337,7 @@ static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, @@ -2257,6 +2260,17 @@ error_splat_super: goto out; } +/* + * Ensure that we unregister the bdi before kill_anon_super + * releases the device name + */ +static void nfs_put_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + bdi_unregister(&server->backing_dev_info); +} + /* * Destroy an NFS2/3 superblock */ @@ -2265,7 +2279,6 @@ static void nfs_kill_super(struct super_block *s) struct nfs_server *server = NFS_SB(s); kill_anon_super(s); - bdi_unregister(&server->backing_dev_info); nfs_fscache_release_super_cookie(s); nfs_free_server(server); } From 9b4b351346b41d923d69adec865814fdaac4dba9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Feb 2010 08:27:35 -0500 Subject: [PATCH 196/640] NFS: Don't clobber the attribute type in nfs_update_inode() If the NFS_ATTR_FATTR_TYPE field isn't set in fattr->valid, then we should not set the S_IFMT part of inode->i_mode. Reported-by: Al Viro Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index faa091865ad0..f141bde7756a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1261,8 +1261,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { + umode_t newmode = inode->i_mode & S_IFMT; + newmode |= fattr->mode & S_IALLUGO; + inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - inode->i_mode = fattr->mode; } } else if (server->caps & NFS_CAP_MODE) invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR From 5ecb01cfdf96c5f465192bdb2a4fd4a61a24c6cc Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Sat, 23 Jan 2010 22:36:29 +0100 Subject: [PATCH 197/640] futex_lock_pi() key refcnt fix This fixes a futex key reference count bug in futex_lock_pi(), where a key's reference count is incremented twice but decremented only once, causing the backing object to not be released. If the futex is created in a temporary file in an ext3 file system, this bug causes the file's inode to become an "undead" orphan, which causes an oops from a BUG_ON() in ext3_put_super() when the file system is unmounted. glibc's test suite is known to trigger this, see . The bug is a regression from 2.6.28-git3, namely Peter Zijlstra's 38d47c1b7075bd7ec3881141bb3629da58f88dab "[PATCH] futex: rely on get_user_pages() for shared futexes". That commit made get_futex_key() also increment the reference count of the futex key, and updated its callers to decrement the key's reference count before returning. Unfortunately the normal exit path in futex_lock_pi() wasn't corrected: the reference count is incremented by get_futex_key() and queue_lock(), but the normal exit path only decrements once, via unqueue_me_pi(). The fix is to put_futex_key() after unqueue_me_pi(), since 2.6.31 this is easily done by 'goto out_put_key' rather than 'goto out'. Signed-off-by: Mikael Pettersson Acked-by: Peter Zijlstra Acked-by: Darren Hart Signed-off-by: Thomas Gleixner Cc: --- kernel/futex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index d9b3a2228f9d..17828033a639 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1971,7 +1971,7 @@ retry_private: /* Unqueue and drop the lock */ unqueue_me_pi(&q); - goto out; + goto out_put_key; out_unlock_put_key: queue_unlock(&q, hb); From 51246bfd189064079c54421507236fd2723b18f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 2 Feb 2010 11:40:27 +0100 Subject: [PATCH 198/640] futex: Handle user space corruption gracefully If the owner of a PI futex dies we fix up the pi_state and set pi_state->owner to NULL. When a malicious or just sloppy programmed user space application sets the futex value to 0 e.g. by calling pthread_mutex_init(), then the futex can be acquired again. A new waiter manages to enqueue itself on the pi_state w/o damage, but on unlock the kernel dereferences pi_state->owner and oopses. Prevent this by checking pi_state->owner in the unlock path. If pi_state->owner is not current we know that user space manipulated the futex value. Ignore the mess and return -EINVAL. This catches the above case and also the case where a task hijacks the futex by setting the tid value and then tries to unlock it. Reported-by: Jermome Marchand Signed-off-by: Thomas Gleixner Acked-by: Darren Hart Acked-by: Peter Zijlstra Cc: --- kernel/futex.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 17828033a639..06e8240d2abe 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -758,6 +758,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (!pi_state) return -EINVAL; + /* + * If current does not own the pi_state then the futex is + * inconsistent and user space fiddled with the futex value. + */ + if (pi_state->owner != current) + return -EINVAL; + raw_spin_lock(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); From 59647b6ac3050dd964bc556fe6ef22f4db5b935c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Feb 2010 09:33:05 +0100 Subject: [PATCH 199/640] futex: Handle futex value corruption gracefully The WARN_ON in lookup_pi_state which complains about a mismatch between pi_state->owner->pid and the pid which we retrieved from the user space futex is completely bogus. The code just emits the warning and then continues despite the fact that it detected an inconsistent state of the futex. A conveniant way for user space to spam the syslog. Replace the WARN_ON by a consistency check. If the values do not match return -EINVAL and let user space deal with the mess it created. This also fixes the missing task_pid_vnr() when we compare the pi_state->owner pid with the futex value. Reported-by: Jermome Marchand Signed-off-by: Thomas Gleixner Acked-by: Darren Hart Acked-by: Peter Zijlstra Cc: --- kernel/futex.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 06e8240d2abe..e7a35f1039e7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return -EINVAL; WARN_ON(!atomic_read(&pi_state->refcount)); - WARN_ON(pid && pi_state->owner && - pi_state->owner->pid != pid); + + /* + * When pi_state->owner is NULL then the owner died + * and another waiter is on the fly. pi_state->owner + * is fixed up by the task which acquires + * pi_state->rt_mutex. + * + * We do not check for pid == 0 which can happen when + * the owner died and robust_list_exit() cleared the + * TID. + */ + if (pid && pi_state->owner) { + /* + * Bail out if user space manipulated the + * futex value. + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; + } atomic_inc(&pi_state->refcount); *ps = pi_state; From 4aba098c8d64329f0c4b24d12e1dc5398dd41a75 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Feb 2010 15:48:03 +0000 Subject: [PATCH 200/640] ARM: Fix wrong register in proc-arm6_7.S data abort handler Signed-off-by: Russell King --- arch/arm/mm/proc-arm6_7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S index 3f9cd3d8f6d5..795dc615f43b 100644 --- a/arch/arm/mm/proc-arm6_7.S +++ b/arch/arm/mm/proc-arm6_7.S @@ -41,7 +41,7 @@ ENTRY(cpu_arm7_dcache_clean_area) ENTRY(cpu_arm7_data_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r8, [r0] @ read arm instruction + ldr r8, [r2] @ read arm instruction tst r8, #1 << 20 @ L = 0 -> write? orreq r1, r1, #1 << 11 @ yes. and r7, r8, #15 << 24 From faccbcfb63af006e100d5b3b513131fe27aa66ab Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 29 Jan 2010 14:20:05 -0800 Subject: [PATCH 201/640] omap: Remove old unused defines for OMAP_32KSYNCT_BASE Remove old unused defines for OMAP_32KSYNCT_BASE Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/omap_device.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c index 1e5648d3e3d8..2ed72013c2e2 100644 --- a/arch/arm/plat-omap/omap_device.c +++ b/arch/arm/plat-omap/omap_device.c @@ -89,16 +89,6 @@ #define USE_WAKEUP_LAT 0 #define IGNORE_WAKEUP_LAT 1 -/* XXX this should be moved into a separate file */ -#if defined(CONFIG_ARCH_OMAP2420) -# define OMAP_32KSYNCT_BASE 0x48004000 -#elif defined(CONFIG_ARCH_OMAP2430) -# define OMAP_32KSYNCT_BASE 0x49020000 -#elif defined(CONFIG_ARCH_OMAP3430) -# define OMAP_32KSYNCT_BASE 0x48320000 -#else -# error Unknown OMAP device -#endif /* Private functions */ From 9af915da20bd405c232ebb93c3cb80c6d92a12f6 Mon Sep 17 00:00:00 2001 From: Sriram Date: Fri, 29 Jan 2010 14:20:05 -0800 Subject: [PATCH 202/640] ARCH OMAP : enable ARCH_HAS_HOLES_MEMORYMODEL for OMAP OMAP platforms(like OMAP3530) include DSP or other co-processors for media acceleration. when carving out memory for the accelerators we can end up creating a hole in the memory map of sort: To handle such a memory configuration ARCH_HAS_HOLES_MEMORYMODEL has to be enabled. For further information refer discussion at: http://www.mail-archive.com/linux-omap@vger.kernel.org/msg15262.html. Signed-off-by: Sriramakrishnan Signed-off-by: Tony Lindgren --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4c33ca82f9b1..184a6bd54825 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -702,6 +702,7 @@ config ARCH_OMAP select ARCH_HAS_CPUFREQ select GENERIC_TIME select GENERIC_CLOCKEVENTS + select ARCH_HAS_HOLES_MEMORYMODEL help Support for TI's OMAP platform (OMAP1 and OMAP2). From 74005a2b116203f618fe784d88ad7e6071bb1554 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 29 Jan 2010 14:20:06 -0800 Subject: [PATCH 203/640] OMAP2/3: IRQ: ensure valid base address Ensure valid base address during IRQ init. Fixes compiler warning about potential use of uninitialized variable. Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c index 27054025da2b..26aeef560aa3 100644 --- a/arch/arm/mach-omap2/irq.c +++ b/arch/arm/mach-omap2/irq.c @@ -194,7 +194,7 @@ void __init omap_init_irq(void) int i; for (i = 0; i < ARRAY_SIZE(irq_banks); i++) { - unsigned long base; + unsigned long base = 0; struct omap_irq_bank *bank = irq_banks + i; if (cpu_is_omap24xx()) @@ -202,6 +202,8 @@ void __init omap_init_irq(void) else if (cpu_is_omap34xx()) base = OMAP34XX_IC_BASE; + BUG_ON(!base); + /* Static mapping, never released */ bank->base_reg = ioremap(base, SZ_4K); if (!bank->base_reg) { From 8d08436d782d177747a0fac1e1455a44b932b7c6 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 29 Jan 2010 14:20:06 -0800 Subject: [PATCH 204/640] OMAP2/3: GPMC: ensure valid clock pointer Ensure valid clock pointer during GPMC init. Fixes compiler warning about potential use of uninitialized variable. Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 3f1334f62e7a..7027cdc1ba49 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -505,7 +505,7 @@ static void __init gpmc_mem_init(void) void __init gpmc_init(void) { u32 l; - char *ck; + char *ck = NULL; if (cpu_is_omap24xx()) { ck = "core_l3_ck"; @@ -521,6 +521,9 @@ void __init gpmc_init(void) l = OMAP44XX_GPMC_BASE; } + if (WARN_ON(!ck)) + return; + gpmc_l3_clk = clk_get(NULL, ck); if (IS_ERR(gpmc_l3_clk)) { printk(KERN_ERR "Could not get GPMC clock %s\n", ck); From 9ecef433204f9b06550dd45cf84f14706f8fe4f0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 1 Feb 2010 11:22:54 -0800 Subject: [PATCH 205/640] omap: Fix 3630 mux errors 3630 has more mux signals than 34xx. The additional pins exist in omap36xx_cbp_subset, but are not initialized as the superset is missing these offsets. This causes the following errors during the boot: mux: Unknown entry offset 0x236 mux: Unknown entry offset 0x22e mux: Unknown entry offset 0x1ec mux: Unknown entry offset 0x1ee mux: Unknown entry offset 0x1f4 mux: Unknown entry offset 0x1f6 mux: Unknown entry offset 0x1f8 mux: Unknown entry offset 0x1fa mux: Unknown entry offset 0x1fc mux: Unknown entry offset 0x22a mux: Unknown entry offset 0x226 mux: Unknown entry offset 0x230 mux: Unknown entry offset 0x22c mux: Unknown entry offset 0x228 Fix this by adding the missing offsets to omap3 superset. Note that additionally the uninitialized pins need to be skipped on 34xx. Based on an earlier patch by Allen Pais . Reported-by: Allen Pais Signed-off-by: Allen Pais Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/mux.c | 7 ++++++ arch/arm/mach-omap2/mux34xx.c | 47 +++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index 3f59bd12cbbf..19001dd8dd7e 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -968,6 +968,13 @@ static void __init omap_mux_init_list(struct omap_mux *superset) } #endif +#if defined(CONFIG_OMAP_MUX) && defined(CONFIG_DEBUG_FS) + if (!superset->muxnames || !superset->muxnames[0]) { + superset++; + continue; + } +#endif + entry = omap_mux_list_add(superset); if (!entry) { printk(KERN_ERR "mux: Could not add entry\n"); diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c index 68e0a595f9a1..07aa7b3c95f7 100644 --- a/arch/arm/mach-omap2/mux34xx.c +++ b/arch/arm/mach-omap2/mux34xx.c @@ -649,6 +649,53 @@ static struct omap_mux __initdata omap3_muxmodes[] = { _OMAP3_MUXENTRY(UART3_TX_IRTX, 166, "uart3_tx_irtx", NULL, NULL, NULL, "gpio_166", NULL, NULL, "safe_mode"), + + /* Only on 3630, see omap36xx_cbp_subset for the signals */ + _OMAP3_MUXENTRY(GPMC_A11, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MBUSFLAG, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MREAD, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MWRITE, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_SBUSFLAG, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_SREAD, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_SWRITE, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(GPMC_A11, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MCAD28, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MCAD29, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MCAD32, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MCAD33, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MCAD34, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MCAD35, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), + _OMAP3_MUXENTRY(SAD2D_MCAD36, 0, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL), { .reg_offset = OMAP_MUX_TERMINATOR }, }; From 78737ae1b0f0b425e8eb72a9c84125fa8cac8e8e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 1 Feb 2010 13:03:42 -0800 Subject: [PATCH 206/640] omap: Fix arch/arm/mach-omap2/mux.c: Off by one error David Binderman ran the sourceforge tool cppcheck over the source code of the new Linux kernel 2.6.33-rc6: [./arm/mach-omap2/mux.c:492]: (error) Buffer access out-of-bounds 13 characters + 1 digit + 1 zero byte is more than 14 characters. Also add a comment on mode0 name length in case new omaps start using longer names. Reported-by: David Binderman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/mux.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index 19001dd8dd7e..5fedc50c58e4 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -486,7 +486,7 @@ int __init omap_mux_init_signal(char *muxname, int val) static inline void omap_mux_decode(struct seq_file *s, u16 val) { char *flags[OMAP_MUX_MAX_NR_FLAGS]; - char mode[14]; + char mode[sizeof("OMAP_MUX_MODE") + 1]; int i = -1; sprintf(mode, "OMAP_MUX_MODE%d", val & 0x7); @@ -553,6 +553,7 @@ static int omap_mux_dbg_board_show(struct seq_file *s, void *unused) if (!m0_name) continue; + /* REVISIT: Needs to be updated if mode0 names get longer */ for (i = 0; i < OMAP_MUX_DEFNAME_LEN; i++) { if (m0_name[i] == '\0') { m0_def[i] = m0_name[i]; From 0825cc8a6ffa54c87ad7ad914a16d6c035627935 Mon Sep 17 00:00:00 2001 From: Marek Skuczynski Date: Sun, 31 Jan 2010 10:00:54 +0000 Subject: [PATCH 207/640] omap: Fix access to already released memory in clk_debugfs_register_one() I have found an access to already released memory in clk_debugfs_register_one() function. Signed-off-by: Marek Skuczynski Acked-by: Paul Walmsley Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index d9f8c844c385..4becbdd1935c 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -391,7 +391,7 @@ static struct dentry *clk_debugfs_root; static int clk_debugfs_register_one(struct clk *c) { int err; - struct dentry *d, *child; + struct dentry *d, *child, *child_tmp; struct clk *pa = c->parent; char s[255]; char *p = s; @@ -423,7 +423,7 @@ static int clk_debugfs_register_one(struct clk *c) err_out: d = c->dent; - list_for_each_entry(child, &d->d_subdirs, d_u.d_child) + list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child) debugfs_remove(child); debugfs_remove(c->dent); return err; From 301fe8eeee02c570c5bd30537aff9456f7f7955c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 1 Feb 2010 12:34:31 -0800 Subject: [PATCH 208/640] omap: Disable serial port autoidle by default Currently the omap serial clocks are autoidled after 5 seconds. However, this causes lost characters on the serial ports. As this is considered non-standard behaviour for Linux, disable the timeout. Note that this will also cause blocking of any deeper omap sleep states. To enable the autoidling of the serial ports, do something like this for each serial port: # echo 5 > /sys/devices/platform/serial8250.0/sleep_timeout # echo 5 > /sys/devices/platform/serial8250.1/sleep_timeout ... Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/serial.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index 8c964bec8159..e10a02df6e1d 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -36,7 +36,13 @@ #define UART_OMAP_NO_EMPTY_FIFO_READ_IP_REV 0x52 #define UART_OMAP_WER 0x17 /* Wake-up enable register */ -#define DEFAULT_TIMEOUT (5 * HZ) +/* + * NOTE: By default the serial timeout is disabled as it causes lost characters + * over the serial ports. This means that the UART clocks will stay on until + * disabled via sysfs. This also causes that any deeper omap sleep states are + * blocked. + */ +#define DEFAULT_TIMEOUT 0 struct omap_uart_state { int num; @@ -422,7 +428,8 @@ static void omap_uart_idle_init(struct omap_uart_state *uart) uart->timeout = DEFAULT_TIMEOUT; setup_timer(&uart->timer, omap_uart_idle_timer, (unsigned long) uart); - mod_timer(&uart->timer, jiffies + uart->timeout); + if (uart->timeout) + mod_timer(&uart->timer, jiffies + uart->timeout); omap_uart_smart_idle_enable(uart, 0); if (cpu_is_omap34xx()) { From b9c3032277f756e73f6c673419dc414155e04e46 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Feb 2010 18:08:52 +0100 Subject: [PATCH 209/640] hrtimer, softirq: Fix hrtimer->softirq trampoline hrtimers callbacks are always done from hardirq context, either the jiffy tick interrupt or the hrtimer device interrupt. [ there is currently one exception that can still call a hrtimer callback from softirq, but even in that case this will still work correctly. ] Reported-by: Wei Yongjun Signed-off-by: Peter Zijlstra Cc: Yury Polyanskiy Tested-by: Wei Yongjun Acked-by: David S. Miller LKML-Reference: <1265120401.24455.306.camel@laptop> Signed-off-by: Thomas Gleixner --- kernel/softirq.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index a09502e2ef75..7c1a67ef0274 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill); */ /* - * The trampoline is called when the hrtimer expires. If this is - * called from the hrtimer interrupt then we schedule the tasklet as - * the timer callback function expects to run in softirq context. If - * it's called in softirq context anyway (i.e. high resolution timers - * disabled) then the hrtimer callback is called right away. + * The trampoline is called when the hrtimer expires. It schedules a tasklet + * to run __tasklet_hrtimer_trampoline() which in turn will call the intended + * hrtimer callback, but from softirq context. */ static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) { struct tasklet_hrtimer *ttimer = container_of(timer, struct tasklet_hrtimer, timer); - if (hrtimer_is_hres_active(timer)) { - tasklet_hi_schedule(&ttimer->tasklet); - return HRTIMER_NORESTART; - } - return ttimer->function(timer); + tasklet_hi_schedule(&ttimer->tasklet); + return HRTIMER_NORESTART; } /* From 1038a00b458997661bcd0e780a24dc280a8841fc Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Wed, 3 Feb 2010 11:42:26 -0800 Subject: [PATCH 210/640] Bluetooth: Fallback eSCO to SCO on error 0x1a (Unsupported Remote Feature) General Motors carkits that use LGE BT chipsets return this error code when an eSCO is attempted, despite advertising eSCO support. 2009-08-13 14:41:39.755518 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17 handle 1 voice setting 0x0060 2009-08-13 14:41:39.757563 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 2009-08-13 14:41:39.789484 > HCI Event: Synchronous Connect Complete (0x2c) plen 17 status 0x1a handle 257 bdaddr 00:1E:B2:23:5E:B3 type eSCO Error: Unsupported Remote Feature / Unsupported LMP Feature Signed-off-by: Jaikumar Ganesh Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 28517bad796c..592da5c909c1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1699,6 +1699,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu break; case 0x1c: /* SCO interval rejected */ + case 0x1a: /* Unsupported Remote Feature */ case 0x1f: /* Unspecified error */ if (conn->out && conn->attempt < 2) { conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | From b6c3f5be7c6ac3375f44de4545c1ffe216b34022 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 2 Feb 2010 10:08:19 -0600 Subject: [PATCH 211/640] b43: Fix throughput regression Commit c7ab5ef9bcd281135c21b4732c9be779585181be entitled "b43: implement short slot and basic rate handling" reduced the transmit throughput for my BCM4311 device from 18 Mb/s to 0.7 Mb/s. The basic rate handling portion is OK, the problem is in the short slot handling. Prior to this change, the short slot enable/disable routines were never called. Experimentation showed that the critical part was changing the value at offset 0x0010 in the shared memory. This is supposed to contain the 802.11 Slot Time in usec, but if it is changed from its initial value of zero, performance is destroyed. On the other hand, changing the value in the MMIO register corresponding to the Interframe Slot Time increased performance from 18 to 22 Mb/s. A BCM4306/3 also shows dramatic improvement of the transmit rate from 5.3 to 19.0 Mb/s. Other changes in the patch include removal of the magic number for the MMIO register, and allowing the slot time to be set for any PHY operating in the 2.4 GHz band. Previously, the routine was executed only for G PHYs. Signed-off-by: Larry Finger Cc: Stable [Any stable version back through 2.6.28] Signed-off-by: John W. Linville --- drivers/net/wireless/b43/b43.h | 1 + drivers/net/wireless/b43/main.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index fe3bf9491997..c484cc253892 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -115,6 +115,7 @@ #define B43_MMIO_TSF_2 0x636 /* core rev < 3 only */ #define B43_MMIO_TSF_3 0x638 /* core rev < 3 only */ #define B43_MMIO_RNG 0x65A +#define B43_MMIO_IFSSLOT 0x684 /* Interframe slot time */ #define B43_MMIO_IFSCTL 0x688 /* Interframe space control */ #define B43_MMIO_IFSCTL_USE_EDCF 0x0004 #define B43_MMIO_POWERUP_DELAY 0x6A8 diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 19b4eae47b59..fcbf0e27d9f3 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -628,10 +628,17 @@ static void b43_upload_card_macaddress(struct b43_wldev *dev) static void b43_set_slot_time(struct b43_wldev *dev, u16 slot_time) { /* slot_time is in usec. */ - if (dev->phy.type != B43_PHYTYPE_G) + /* This test used to exit for all but a G PHY. */ + if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) return; - b43_write16(dev, 0x684, 510 + slot_time); - b43_shm_write16(dev, B43_SHM_SHARED, 0x0010, slot_time); + b43_write16(dev, B43_MMIO_IFSSLOT, 510 + slot_time); + /* Shared memory location 0x0010 is the slot time and should be + * set to slot_time; however, this register is initially 0 and changing + * the value adversely affects the transmit rate for BCM4311 + * devices. Until this behavior is unterstood, delete this step + * + * b43_shm_write16(dev, B43_SHM_SHARED, 0x0010, slot_time); + */ } static void b43_short_slot_timing_enable(struct b43_wldev *dev) From 391ae22ae5726d2a8cebfa62879635c54a349642 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 3 Feb 2010 18:24:35 +0100 Subject: [PATCH 212/640] ssb: Fix CONFIG_SSB_SDIOHOST typo This fixes a CONFIG_SSB_SDIOHOST typo. Signed-off-by: Michael Buesch Reported-by: Christoph Egger Tested-By: Albert Herranz Signed-off-by: John W. Linville --- drivers/ssb/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index 5681ebed9c65..03dfd27c4bfb 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -494,8 +494,7 @@ static int ssb_devices_register(struct ssb_bus *bus) #endif break; case SSB_BUSTYPE_SDIO: -#ifdef CONFIG_SSB_SDIO - sdev->irq = bus->host_sdio->dev.irq; +#ifdef CONFIG_SSB_SDIOHOST dev->parent = &bus->host_sdio->dev; #endif break; From 485f1eff73a7b932fd3abb0dfcf804e1a1f59025 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 3 Feb 2010 15:52:18 -0800 Subject: [PATCH 213/640] Bluetooth: Fix sleeping function in RFCOMM within invalid context With the commit 9e726b17422bade75fba94e625cd35fd1353e682 the rfcomm_session_put() gets accidentially called from a timeout callback and results in this: BUG: sleeping function called from invalid context at net/core/sock.c:1897 in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper Pid: 0, comm: swapper Tainted: P 2.6.32 #31 Call Trace: [] __might_sleep+0xf8/0xfa [] lock_sock_nested+0x29/0xc4 [] lock_sock+0xb/0xd [l2cap] [] l2cap_sock_shutdown+0x1c/0x76 [l2cap] [] ? clockevents_program_event+0x75/0x7e [] ? tick_dev_program_event+0x37/0xa5 [] l2cap_sock_release+0x27/0x67 [l2cap] [] sock_release+0x1a/0x67 [] rfcomm_session_del+0x34/0x53 [rfcomm] [] rfcomm_session_put+0x14/0x16 [rfcomm] [] rfcomm_session_timeout+0xe/0x1a [rfcomm] [] run_timer_softirq+0x1e2/0x29a [] ? rfcomm_session_timeout+0x0/0x1a [rfcomm] [] __do_softirq+0xfe/0x1c5 [] ? timer_interrupt+0x1a/0x21 [] call_softirq+0x1c/0x28 [] do_softirq+0x33/0x6b [] irq_exit+0x36/0x85 [] do_IRQ+0xa6/0xbd [] ret_from_intr+0x0/0xa [] ? acpi_idle_enter_bm+0x269/0x294 [] ? acpi_idle_enter_bm+0x25f/0x294 [] ? cpuidle_idle_call+0x97/0x107 [] ? cpu_idle+0x53/0xaa [] ? rest_init+0x7a/0x7c [] ? start_kernel+0x389/0x394 [] ? x86_64_start_reservations+0xac/0xb0 [] ? x86_64_start_kernel+0xe4/0xeb To fix this, the rfcomm_session_put() needs to be moved out of rfcomm_session_timeout() into rfcomm_process_sessions(). In that context it is perfectly fine to sleep and disconnect the socket. Signed-off-by: Marcel Holtmann Tested-by: David John --- net/bluetooth/rfcomm/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index fc5ee3296e22..2b506373957a 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -252,7 +252,6 @@ static void rfcomm_session_timeout(unsigned long arg) BT_DBG("session %p state %ld", s, s->state); set_bit(RFCOMM_TIMED_OUT, &s->flags); - rfcomm_session_put(s); rfcomm_schedule(RFCOMM_SCHED_TIMEO); } @@ -1920,6 +1919,7 @@ static inline void rfcomm_process_sessions(void) if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); + rfcomm_session_put(s); continue; } From 6c2718da59613d76013b501bf0f8bcf9d7794b2d Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Wed, 3 Feb 2010 16:18:36 -0800 Subject: [PATCH 214/640] Bluetooth: Do not call rfcomm_session_put() for RFCOMM UA on closed socket When processing a RFCOMM UA frame when the socket is closed and we were not the RFCOMM initiator would cause rfcomm_session_put() to be called twice during rfcomm_process_rx(). This would cause a kernel panic in rfcomm_session_close() then. This could be easily reproduced during disconnect with devices such as Motorola H270 that send RFCOMM UA followed quickly by L2CAP disconnect request. This trace for this looks like: 2009-09-21 17:22:37.788895 < ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0041 len 4 [psm 3] RFCOMM(s): DISC: cr 0 dlci 20 pf 1 ilen 0 fcs 0x7d 2009-09-21 17:22:37.906204 > HCI Event: Number of Completed Packets (0x13) plen 5 handle 1 packets 1 2009-09-21 17:22:37.933090 > ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0040 len 4 [psm 3] RFCOMM(s): UA: cr 0 dlci 20 pf 1 ilen 0 fcs 0x57 2009-09-21 17:22:38.636764 < ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0041 len 4 [psm 3] RFCOMM(s): DISC: cr 0 dlci 0 pf 1 ilen 0 fcs 0x9c 2009-09-21 17:22:38.744125 > HCI Event: Number of Completed Packets (0x13) plen 5 handle 1 packets 1 2009-09-21 17:22:38.763687 > ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0040 len 4 [psm 3] RFCOMM(s): UA: cr 0 dlci 0 pf 1 ilen 0 fcs 0xb6 2009-09-21 17:22:38.783554 > ACL data: handle 1 flags 0x02 dlen 12 L2CAP(s): Disconn req: dcid 0x0040 scid 0x0041 Avoid calling rfcomm_session_put() twice by skipping this call in rfcomm_recv_ua() if the socket is closed. Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 2b506373957a..89f4a59eb82b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1150,7 +1150,11 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) break; case BT_DISCONN: - rfcomm_session_put(s); + /* When socket is closed and we are not RFCOMM + * initiator rfcomm_process_rx already calls + * rfcomm_session_put() */ + if (s->sock->sk->sk_state != BT_CLOSED) + rfcomm_session_put(s); break; } } From 079b805782f94f4b278132286a8c9bc4655d1c51 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 3 Feb 2010 10:16:54 -0800 Subject: [PATCH 215/640] ocfs2: Plugs race between the dc thread and an unlock ast message This patch plugs a race between the downconvert thread and an unlock ast message. Specifically, after the downconvert worker has done its task, the dc thread needs to check whether an unlock ast made the downconvert moot. Reported-by: David Teigland Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/dlmglue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ce8e061c9a22..e044019cb3b1 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3384,6 +3384,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, unsigned long flags; int blocking; int new_level; + int level; int ret = 0; int set_lvb = 0; unsigned int gen; @@ -3503,6 +3504,7 @@ recheck: * may sleep, so we save off a copy of what we're blocking as * it may change while we're not holding the spin lock. */ blocking = lockres->l_blocking; + level = lockres->l_level; spin_unlock_irqrestore(&lockres->l_lock, flags); ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); @@ -3511,7 +3513,7 @@ recheck: goto leave; spin_lock_irqsave(&lockres->l_lock, flags); - if (blocking != lockres->l_blocking) { + if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { /* If this changed underneath us, then we can't drop * it just yet. */ goto recheck; From cda70ba8c05a8661f882862c4699a31d215ab151 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 1 Feb 2010 17:34:58 -0800 Subject: [PATCH 216/640] ocfs2/dlm: Remove BUG_ON in dlm recovery when freeing locks of a dead node During recovery, the dlm frees the locks for the dead node. If it finds a lock in a resource for the dead node, it expects that node to also have a ref in that lock resource. If not, it BUGs. ossbz#1175 was filed with the above BUG. Now, while it is correct that we should be expecting the ref, I see no reason why we have to BUG. After all, we are freeing up the lock and clearing the ref. This patch replaces the BUG_ON with a printk(). Hopefully, that will give us more clues next time this happens. http://oss.oracle.com/bugzilla/show_bug.cgi?id=1175 Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ad712211d4ea..344bcf90cbf4 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2243,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: freed %u locks for dead node %u, " "dropping ref from lockres\n", dlm->name, res->lockname.len, res->lockname.name, freed, dead_node); - BUG_ON(!test_bit(dead_node, res->refmap)); + if(!test_bit(dead_node, res->refmap)) { + mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, " + "but ref was not set\n", dlm->name, + res->lockname.len, res->lockname.name, freed, dead_node); + __dlm_print_one_lock_resource(res); + } dlm_lockres_clear_refmap_bit(dead_node, res); } else if (test_bit(dead_node, res->refmap)) { mlog(0, "%s:%.*s: dead node %u had a ref, but had " From 180211b841b5bf13ab10d19202adab3eb7749f6c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Jan 2010 02:53:27 +0000 Subject: [PATCH 217/640] af_key: fix netns ops ordering on module load/unload 1. After sock_register() returns, it's possible to create sockets, even if module still not initialized fully (blame generic module code for that!) 2. Consequently, pfkey_create() can be called with pfkey_net_id still not initialized which will BUG_ON in net_generic(): kernel BUG at include/net/netns/generic.h:43! 3. During netns shutdown, netns ops should be unregistered after key manager unregistered because key manager calls can be triggered from xfrm_user module: general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC pfkey_broadcast+0x111/0x210 [af_key] pfkey_send_notify+0x16a/0x300 [af_key] km_state_notify+0x41/0x70 xfrm_flush_sa+0x75/0x90 [xfrm_user] 4. Unregister netns ops after socket ops just in case and for symmetry. Reported by Luca Tettamanti. Signed-off-by: Alexey Dobriyan Tested-by: Luca Tettamanti Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/key/af_key.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 76fa6fef6473..539f43bc97db 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3794,9 +3794,9 @@ static struct pernet_operations pfkey_net_ops = { static void __exit ipsec_pfkey_exit(void) { - unregister_pernet_subsys(&pfkey_net_ops); xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); + unregister_pernet_subsys(&pfkey_net_ops); proto_unregister(&key_proto); } @@ -3807,21 +3807,22 @@ static int __init ipsec_pfkey_init(void) if (err != 0) goto out; - err = sock_register(&pfkey_family_ops); + err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_unregister_key_proto; + err = sock_register(&pfkey_family_ops); + if (err != 0) + goto out_unregister_pernet; err = xfrm_register_km(&pfkeyv2_mgr); if (err != 0) goto out_sock_unregister; - err = register_pernet_subsys(&pfkey_net_ops); - if (err != 0) - goto out_xfrm_unregister_km; out: return err; -out_xfrm_unregister_km: - xfrm_unregister_km(&pfkeyv2_mgr); + out_sock_unregister: sock_unregister(PF_KEY); +out_unregister_pernet: + unregister_pernet_subsys(&pfkey_net_ops); out_unregister_key_proto: proto_unregister(&key_proto); goto out; From 974c37e9d88c3e5a3e56eb98cb9c84232eb2bdcb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Jan 2010 10:05:05 +0000 Subject: [PATCH 218/640] netlink: fix for too early rmmod Netlink code does module autoload if protocol userspace is asking for is not ready. However, module can dissapear right after it was autoloaded. Example: modprobe/rmmod stress-testing and xfrm_user.ko providing NETLINK_XFRM. netlink_create() in such situation _will_ create userspace socket and _will_not_ pin module. Now if module was removed and we're going to call ->netlink_rcv into nothing: BUG: unable to handle kernel paging request at ffffffffa02f842a ^^^^^^^^^^^^^^^^ modules are loaded near these addresses here IP: [] 0xffffffffa02f842a PGD 161f067 PUD 1623063 PMD baa12067 PTE 0 Oops: 0010 [#1] PREEMPT SMP DEBUG_PAGEALLOC last sysfs file: /sys/devices/pci0000:00/0000:00:1f.2/host0/target0:0:0/0:0:0:0/block/sda/uevent CPU 1 Pid: 11515, comm: ip Not tainted 2.6.33-rc5-netns-00594-gaaa5728-dirty #6 P5E/P5E RIP: 0010:[] [] 0xffffffffa02f842a RSP: 0018:ffff8800baa3db48 EFLAGS: 00010292 RAX: ffff8800baa3dfd8 RBX: ffff8800be353640 RCX: 0000000000000000 RDX: ffffffff81959380 RSI: ffff8800bab7f130 RDI: 0000000000000001 RBP: ffff8800baa3db58 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000011 R13: ffff8800be353640 R14: ffff8800bcdec240 R15: ffff8800bd488010 FS: 00007f93749656f0(0000) GS:ffff880002300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffffffa02f842a CR3: 00000000ba82b000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process ip (pid: 11515, threadinfo ffff8800baa3c000, task ffff8800bab7eb30) Stack: ffffffff813637c0 ffff8800bd488000 ffff8800baa3dba8 ffffffff8136397d <0> 0000000000000000 ffffffff81344adc 7fffffffffffffff 0000000000000000 <0> ffff8800baa3ded8 ffff8800be353640 ffff8800bcdec240 0000000000000000 Call Trace: [] ? netlink_unicast+0x100/0x2d0 [] netlink_unicast+0x2bd/0x2d0 netlink_unicast_kernel: nlk->netlink_rcv(skb); [] ? memcpy_fromiovec+0x6c/0x90 [] netlink_sendmsg+0x1d3/0x2d0 [] sock_sendmsg+0xbb/0xf0 [] ? __lock_acquire+0x27b/0xa60 [] ? might_fault+0x73/0xd0 [] ? might_fault+0x73/0xd0 [] ? __lock_release+0x82/0x170 [] ? might_fault+0xbe/0xd0 [] ? might_fault+0x73/0xd0 [] ? verify_iovec+0x47/0xd0 [] sys_sendmsg+0x1a9/0x360 [] ? _raw_spin_unlock_irqrestore+0x65/0x70 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irqrestore+0x42/0x70 [] ? __up_read+0x84/0xb0 [] ? trace_hardirqs_on_caller+0x145/0x190 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [] 0xffffffffa02f842a RSP CR2: ffffffffa02f842a If module was quickly removed after autoloading, return -E. Return -EPROTONOSUPPORT if module was quickly removed after autoloading. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a4957bf2ca60..4c5972ba8c78 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -455,9 +455,14 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (nl_table[protocol].registered && try_module_get(nl_table[protocol].module)) module = nl_table[protocol].module; + else + err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); + if (err < 0) + goto out; + err = __netlink_create(net, sock, cb_mutex, protocol); if (err < 0) goto out_module; From f6815077e75c5b7f55b56fc3788e328514d4e72a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 1 Feb 2010 13:41:47 +0000 Subject: [PATCH 219/640] sky2: fix transmit DMA map leakage The book keeping structure for transmit always had the flags value cleared so transmit DMA maps were never released correctly. Based on patch by Jarek Poplawski, problem observed by Michael Breuer. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/sky2.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index d760650c5c04..67249c3c9f50 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1025,11 +1025,8 @@ static void sky2_prefetch_init(struct sky2_hw *hw, u32 qaddr, static inline struct sky2_tx_le *get_tx_le(struct sky2_port *sky2, u16 *slot) { struct sky2_tx_le *le = sky2->tx_le + *slot; - struct tx_ring_info *re = sky2->tx_ring + *slot; *slot = RING_NEXT(*slot, sky2->tx_ring_size); - re->flags = 0; - re->skb = NULL; le->ctrl = 0; return le; } @@ -1622,8 +1619,7 @@ static unsigned tx_le_req(const struct sk_buff *skb) return count; } -static void sky2_tx_unmap(struct pci_dev *pdev, - const struct tx_ring_info *re) +static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re) { if (re->flags & TX_MAP_SINGLE) pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr), @@ -1633,6 +1629,7 @@ static void sky2_tx_unmap(struct pci_dev *pdev, pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr), pci_unmap_len(re, maplen), PCI_DMA_TODEVICE); + re->flags = 0; } /* @@ -1839,6 +1836,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; + re->skb = NULL; dev_kfree_skb_any(skb); sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size); From 8ed030dd0aa400d18c63861c2c6deb7c38f4edde Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 1 Feb 2010 02:12:19 +0000 Subject: [PATCH 220/640] dccp: fix bug in cache allocation This fixes a bug introduced in commit de4ef86cfce60d2250111f34f8a084e769f23b16 ("dccp: fix dccp rmmod when kernel configured to use slub", 17 Jan): the vsnprintf used sizeof(slab_name_fmt), which became truncated to 4 bytes, since slab_name_fmt is now a 4-byte pointer and no longer a 32-character array. This lead to error messages such as FATAL: Error inserting dccp: No buffer space available >> kernel: [ 1456.341501] kmem_cache_create: duplicate cache cci generated due to the truncation after the 3rd character. Fixed for the moment by introducing a symbolic constant. Tested to fix the bug. Signed-off-by: Gerrit Renker Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/dccp/ccid.c | 2 +- net/dccp/ccid.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 57dfb9c8c4f2..ff16e9df1969 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -83,7 +83,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f va_list args; va_start(args, fmt); - vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args); + vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args); va_end(args); slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0, diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 269958bf7fe9..6df6f8ac9636 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -19,7 +19,9 @@ #include #include -#define CCID_MAX 255 +/* maximum value for a CCID (RFC 4340, 19.5) */ +#define CCID_MAX 255 +#define CCID_SLAB_NAME_LENGTH 32 struct tcp_info; @@ -49,8 +51,8 @@ struct ccid_operations { const char *ccid_name; struct kmem_cache *ccid_hc_rx_slab, *ccid_hc_tx_slab; - char ccid_hc_rx_slab_name[32]; - char ccid_hc_tx_slab_name[32]; + char ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH]; + char ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH]; __u32 ccid_hc_rx_obj_size, ccid_hc_tx_obj_size; /* Interface Routines */ From 1386be55e32a3c5d8ef4a2b243c530a7b664c02c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 2 Feb 2010 20:16:56 +0000 Subject: [PATCH 221/640] dccp: fix auto-loading of dccp(_probe) This fixes commit (38ff3e6bb987ec583268da8eb22628293095d43b) ("dccp_probe: Fix module load dependencies between dccp and dccp_probe", from 15 Jan). It fixes the construction of the first argument of try_then_request_module(), where only valid return codes from the first argument should be returned. What we do now is assign the result of register_jprobe() to ret, without the side effect of the comparison. Acked-by: Gerrit Renker Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/dccp/probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/probe.c b/net/dccp/probe.c index bace1d8cbcfd..f5b3464f1242 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -161,8 +161,8 @@ static __init int dccpprobe_init(void) if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; - ret = try_then_request_module((register_jprobe(&dccp_send_probe) == 0), - "dccp"); + try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0, + "dccp"); if (ret) goto err1; From 88d1a0cf659438a66135661538ae332b23f8635a Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Thu, 4 Feb 2010 09:55:51 +0900 Subject: [PATCH 222/640] Bluetooth: Fix memory leak in Marvell BT-over-SDIO driver Signed-off-by: Yoichi Yuasa Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btmrvl_sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index f36defa37764..57d965b7f521 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -808,6 +808,7 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv, exit: sdio_release_host(card->func); + kfree(tmpbuf); return ret; } From c390216b3e868b16d8154939f4b6f8c16dbd9a9f Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Fri, 13 Nov 2009 14:16:32 -0800 Subject: [PATCH 223/640] Bluetooth: Enter active mode before establishing a SCO link. When in sniff mode with a long interval time (1.28s) it can take 4+ seconds to establish a SCO link. Fix by requesting active mode before requesting SCO connection. This improves SCO setup time to ~500ms. Bluetooth headsets that use a long interval time, and exhibit the long SCO connection time include Motorola H790, HX1 and H17. They have a CSR 2.1 chipset. Verified this behavior and fix with host Bluetooth chipsets: BCM4329 and TI1271. 2009-10-13 14:17:46.183722 > HCI Event: Mode Change (0x14) plen 6 status 0x00 handle 1 mode 0x02 interval 2048 Mode: Sniff 2009-10-13 14:17:53.436285 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17 handle 1 voice setting 0x0060 2009-10-13 14:17:53.445593 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 2009-10-13 14:17:57.788855 > HCI Event: Synchronous Connect Complete 0x2c) plen 17 status 0x00 handle 257 bdaddr 00:1A:0E:F1:A4:7F type eSCO Air mode: CVSD Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b7c4224f4e7d..b10e3cdb08f8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -377,6 +377,9 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (acl->state == BT_CONNECTED && (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { + acl->power_save = 1; + hci_conn_enter_active_mode(acl); + if (lmp_esco_capable(hdev)) hci_setup_sync(sco, acl->handle); else From 41f3e0b1f524c47908e944a7e55f11c371ddddec Mon Sep 17 00:00:00 2001 From: Amit Kumar Salecha Date: Tue, 2 Feb 2010 04:16:20 +0000 Subject: [PATCH 224/640] netxen: fix tx timeout recovery for NX2031 chip For NX2031, first try to scrub interrupt before requesting firmware reset. Return statement was missing after scrubbbing interrupt. Signed-off-by: Vernon Mauery Signed-off-by: Amit Kumar Salecha Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 9f9d6081959b..5209095a8739 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1941,7 +1941,7 @@ static void netxen_tx_timeout_task(struct work_struct *work) netif_wake_queue(adapter->netdev); clear_bit(__NX_RESETTING, &adapter->state); - + return; } else { clear_bit(__NX_RESETTING, &adapter->state); if (!netxen_nic_reset_context(adapter)) { From e15eec2805565c7e31dbe402215637012f1e4616 Mon Sep 17 00:00:00 2001 From: Amit Kumar Salecha Date: Tue, 2 Feb 2010 04:16:21 +0000 Subject: [PATCH 225/640] netxen: protect resource cleanup by rtnl lock o context resources can be in used, while resource cleanup is in progress, during fw recover. o Null pointer execption can occur in send_cmd_desc, if fw recovery module frees tx ring without rtnl lock. o Same applies to ethtool register dump. Signed-off-by: Amit Kumar Salecha Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 5209095a8739..24279e6e55f5 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -2240,7 +2240,9 @@ netxen_detach_work(struct work_struct *work) netxen_nic_down(adapter, netdev); + rtnl_lock(); netxen_nic_detach(adapter); + rtnl_unlock(); status = NXRD32(adapter, NETXEN_PEG_HALT_STATUS1); From e44d38e1b72a2aa7f5e7024c5da83a879355a1cc Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 3 Feb 2010 13:12:51 +0000 Subject: [PATCH 226/640] ixgbe: Fix ixgbe_tx_map error path Commit e5a43549f7a58509a91b299a51337d386697b92c (ixgbe: remove skb_dma_map/unmap calls from driver) looks to have introduced a bug in ixgbe_tx_map. If we get an error from a PCI DMA call, we loop backwards through count until it becomes -1 and return that. The caller of ixgbe_tx_map expects 0 on error, so return that instead. Signed-off-by: Anton Blanchard Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index b5f64ad67975..37e2af0a6145 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -5179,7 +5179,7 @@ dma_error: ixgbe_unmap_and_free_tx_resource(adapter, tx_buffer_info); } - return count; + return 0; } static void ixgbe_tx_queue(struct ixgbe_adapter *adapter, From fdd3d631cddad20ad9d3e1eb7dbf26825a8a121f Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 3 Feb 2010 13:13:10 +0000 Subject: [PATCH 227/640] ixgbe: Fix return of invalid txq a developer had complained of getting lots of warnings: "eth16 selects TX queue 98, but real number of TX queues is 64" http://www.mail-archive.com/e1000-devel@lists.sourceforge.net/msg02200.html As there was no follow up on that bug, I am submitting this patch assuming that the other return points will not return invalid txq's, and also that this fixes the bug (not tested). Signed-off-by: Krishna Kumar Signed-off-by: Jesse Brandeburg Acked-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 37e2af0a6145..7b7c8486c0bf 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -5329,8 +5329,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb) struct ixgbe_adapter *adapter = netdev_priv(dev); int txq = smp_processor_id(); - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + while (unlikely(txq >= dev->real_num_tx_queues)) + txq -= dev->real_num_tx_queues; return txq; + } #ifdef IXGBE_FCOE if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && From 454debe45c86102528c90c12eb6a99245b773bfe Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 1 Feb 2010 08:21:34 +0000 Subject: [PATCH 228/640] irda: unbalanced lock_kernel in irnet_ppp Add the missing unlock_kernel in one ioctl operation. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 156020d138b5..7c22c126f0ea 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -706,7 +706,8 @@ dev_irnet_ioctl( lock_kernel(); if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), (int __user *)argp)) - err = 0; + err = 0; + unlock_kernel(); break; /* All these ioctls can be passed both directly and from ppp_generic, From 3fdde0a1602d20c02a7d66e07ab6718ab8d79b12 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 1 Feb 2010 08:21:35 +0000 Subject: [PATCH 229/640] irda: add missing BKL in irnet_ppp ioctl One ioctl has been forgotten when the BKL was push down into irnet_ppp ioctl function. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7c22c126f0ea..6b3602de359a 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -698,9 +698,11 @@ dev_irnet_ioctl( /* Query PPP channel and unit number */ case PPPIOCGCHAN: + lock_kernel(); if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), (int __user *)argp)) err = 0; + unlock_kernel(); break; case PPPIOCGUNIT: lock_kernel(); From bc496ed00ab1411d3efaf295b72e0c9eb343e1a3 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Mon, 1 Feb 2010 13:11:38 -0500 Subject: [PATCH 230/640] libata-scsi passthru: fix bug which truncated LBA48 return values Fix assignment which overwrote SAT ATA PASS-THROUGH command EXTEND bit setting (ATA_TFLAG_LBA48) Signed-off-by: Douglas Gilbert Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index f4ea5a8c325b..d096fbcbc771 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2875,7 +2875,7 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) * write indication (used for PIO/DMA setup), result TF is * copied back and we don't whine too much about its failure. */ - tf->flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; + tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; if (scmd->sc_data_direction == DMA_TO_DEVICE) tf->flags |= ATA_TFLAG_WRITE; From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Jan 2010 13:30:11 +0100 Subject: [PATCH 231/640] libata: fix ata_id_logical_per_physical_sectors The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not a plain multiple, but the log of it, so fix the helper to give the correct answer. Without this we'll get an incorrect minimal I/O size in the block limits VPD page for 4k sector drives. Also change the return value of ata_id_logical_per_physical_sectors to u16 for the unlikely case of very large logical sectors. Signed-off-by: Christoph Hellwig Signed-off-by: Jeff Garzik --- include/linux/ata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ata.h b/include/linux/ata.h index 38a6948ce0c2..20f31567ccee 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id) return id[ATA_ID_SECTOR_SIZE] & (1 << 13); } -static inline u8 ata_id_logical_per_physical_sectors(const u16 *id) +static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) { - return id[ATA_ID_SECTOR_SIZE] & 0xf; + return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); } static inline int ata_id_has_lba48(const u16 *id) From cedc9bf906dae044443d403371c887affdb44168 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 28 Jan 2010 16:04:15 +0900 Subject: [PATCH 232/640] ahci: add Acer G725 to broken suspend list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acer G725 shares the same suspend problem with the HP laptops which lose ATA devices on resume. New firmware which fixes the problem is already available. Add G725 with old firmwares to the broken suspend list. This problem has been reported in bko#15104. http://bugzilla.kernel.org/show_bug.cgi?id=15104 Signed-off-by: Tejun Heo Reported-by: Jani-Matti Hätinen Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index b8bea100a160..b34390347c16 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -2868,6 +2868,21 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) }, .driver_data = "F.23", /* cutoff BIOS version */ }, + /* + * Acer eMachines G725 has the same problem. BIOS + * V1.03 is known to be broken. V3.04 is known to + * work. Inbetween, there are V1.06, V2.06 and V3.03 + * that we don't have much idea about. For now, + * blacklist anything older than V3.04. + */ + { + .ident = "G725", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "eMachines"), + DMI_MATCH(DMI_PRODUCT_NAME, "eMachines G725"), + }, + .driver_data = "V3.04", /* cutoff BIOS version */ + }, { } /* terminate list */ }; const struct dmi_system_id *dmi = dmi_first_match(sysids); From 2d68b7fe55d9e19a8a868224ed0dfd6526568521 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 4 Feb 2010 01:04:50 -0500 Subject: [PATCH 233/640] [libata] Call flush_dcache_page after PIO data transfers in libata-sff.c flush_dcache_page() must be called after (!ATA_TFLAG_WRITE) the data copying to avoid D-cache aliasing with user space or I-D cache coherency issues (when reading data from an ATA device using PIO, the kernel dirties the D-cache but there is no flush_dcache_page() required on Harvard architectures). Signed-off-by: Catalin Marinas Signed-off-by: Jeff Garzik --- drivers/ata/libata-sff.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 741065c9da67..730ef3c384ca 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -893,6 +893,9 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) do_write); } + if (!do_write) + flush_dcache_page(page); + qc->curbytes += qc->sect_size; qc->cursg_ofs += qc->sect_size; From 06df6dafb5d9e3cfa3588c6ce79328b91582b6af Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 4 Feb 2010 14:43:38 +0900 Subject: [PATCH 234/640] x86/agp: Fix amd64-agp module initialization regression This fixes the regression introduced by commit 42590a75019a50012f25a962246498dead428433 ("x86/agp: Fix agp_amd64_init and agp_amd64_cleanup"). The commit 61684ceaad4f65d1a9832c722f7bd5e7fc714de9 fixed the above regression but it's not enough. When amd64-agp is built as a module, AGP isn't initialized, iommu is initialized, all the aperture is owned by the iommu. Reported-by: Marin Mitov Signed-off-by: FUJITA Tomonori Tested-by: Marin Mitov LKML-Reference: <20100204090802S.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- drivers/char/agp/amd64-agp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 34cf04e21795..fd50ead59c79 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -767,16 +767,19 @@ int __init agp_amd64_init(void) static int __init agp_amd64_mod_init(void) { +#ifndef MODULE if (gart_iommu_aperture) return agp_bridges_found ? 0 : -ENODEV; - +#endif return agp_amd64_init(); } static void __exit agp_amd64_cleanup(void) { +#ifndef MODULE if (gart_iommu_aperture) return; +#endif if (aperture_resource) release_resource(aperture_resource); pci_unregister_driver(&agp_amd64_pci_driver); From 9de89fe7c577847877ae00ea1aa6315559b10243 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2010 16:52:00 -0200 Subject: [PATCH 235/640] perf symbols: Remove perf_session usage in symbols layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I noticed while writing the first test in 'perf regtest' that to just test the symbol handling routines one needs to create a perf session, that is a layer centered on a perf.data file, events, etc, so I untied these layers. This reduces the complexity for the users as the number of parameters to most of the symbols and session APIs now was reduced while not adding more state to all the map instances by only having data that is needed to split the kernel (kallsyms and ELF symtab sections) maps and do vmlinux relocation on the main kernel map. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1265223128-11786-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-kmem.c | 2 +- tools/perf/builtin-probe.c | 5 +- tools/perf/util/event.c | 6 +- tools/perf/util/map.c | 20 +++--- tools/perf/util/map.h | 22 +++++-- tools/perf/util/session.c | 35 +++++++---- tools/perf/util/session.h | 22 ++++--- tools/perf/util/symbol.c | 122 +++++++++++++++++-------------------- tools/perf/util/symbol.h | 19 +++--- tools/perf/util/thread.c | 3 +- tools/perf/util/thread.h | 14 +++-- 11 files changed, 149 insertions(+), 121 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 5d5dc6b09617..924a9518931a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -369,7 +369,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session, if (is_caller) { addr = data->call_site; if (!raw_ip) - sym = map_groups__find_function(&session->kmaps, session, addr, NULL); + sym = map_groups__find_function(&session->kmaps, addr, NULL); } else addr = data->ptr; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 34f2acb1cc88..4fa73eca1d82 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -122,8 +122,7 @@ static int opt_del_probe_event(const struct option *opt __used, static void evaluate_probe_point(struct probe_point *pp) { struct symbol *sym; - sym = map__find_symbol_by_name(session.kmap, pp->function, - session.psession, NULL); + sym = map__find_symbol_by_name(session.kmap, pp->function, NULL); if (!sym) die("Kernel symbol \'%s\' not found - probe not added.", pp->function); @@ -132,7 +131,7 @@ static void evaluate_probe_point(struct probe_point *pp) #ifndef NO_LIBDWARF static int open_vmlinux(void) { - if (map__load(session.kmap, session.psession, NULL) < 0) { + if (map__load(session.kmap, NULL) < 0) { pr_debug("Failed to load kernel map.\n"); return -EINVAL; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bbaee61c1683..c3831f633dec 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -374,9 +374,7 @@ int event__process_mmap(event_t *self, struct perf_session *session) goto out_problem; kernel->kernel = 1; - if (__map_groups__create_kernel_maps(&session->kmaps, - session->vmlinux_maps, - kernel) < 0) + if (__perf_session__create_kernel_maps(session, kernel) < 0) goto out_problem; session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start; @@ -476,7 +474,7 @@ void thread__find_addr_location(struct thread *self, { thread__find_addr_map(self, session, cpumode, type, addr, al); if (al->map != NULL) - al->sym = map__find_symbol(al->map, session, al->addr, filter); + al->sym = map__find_symbol(al->map, al->addr, filter); else al->sym = NULL; } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index c4d55a0da2ea..36ff0bf0315d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -104,8 +104,7 @@ void map__fixup_end(struct map *self) #define DSO__DELETED "(deleted)" -int map__load(struct map *self, struct perf_session *session, - symbol_filter_t filter) +int map__load(struct map *self, symbol_filter_t filter) { const char *name = self->dso->long_name; int nr; @@ -113,7 +112,7 @@ int map__load(struct map *self, struct perf_session *session, if (dso__loaded(self->dso, self->type)) return 0; - nr = dso__load(self->dso, self, session, filter); + nr = dso__load(self->dso, self, filter); if (nr < 0) { if (self->dso->has_build_id) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -144,24 +143,29 @@ int map__load(struct map *self, struct perf_session *session, return -1; } + /* + * Only applies to the kernel, as its symtabs aren't relative like the + * module ones. + */ + if (self->dso->kernel) + map__reloc_vmlinux(self); return 0; } -struct symbol *map__find_symbol(struct map *self, struct perf_session *session, - u64 addr, symbol_filter_t filter) +struct symbol *map__find_symbol(struct map *self, u64 addr, + symbol_filter_t filter) { - if (map__load(self, session, filter) < 0) + if (map__load(self, filter) < 0) return NULL; return dso__find_symbol(self->dso, self->type, addr); } struct symbol *map__find_symbol_by_name(struct map *self, const char *name, - struct perf_session *session, symbol_filter_t filter) { - if (map__load(self, session, filter) < 0) + if (map__load(self, filter) < 0) return NULL; if (!dso__sorted_by_name(self->dso, self->type)) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 72f0b6ab5ea5..de048399d776 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -14,6 +14,8 @@ enum map_type { #define MAP__NR_TYPES (MAP__VARIABLE + 1) struct dso; +struct ref_reloc_sym; +struct map_groups; struct map { union { @@ -29,6 +31,16 @@ struct map { struct dso *dso; }; +struct kmap { + struct ref_reloc_sym *ref_reloc_sym; + struct map_groups *kmaps; +}; + +static inline struct kmap *map__kmap(struct map *self) +{ + return (struct kmap *)(self + 1); +} + static inline u64 map__map_ip(struct map *map, u64 ip) { return ip - map->start + map->pgoff; @@ -58,16 +70,14 @@ struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); -struct perf_session; - -int map__load(struct map *self, struct perf_session *session, - symbol_filter_t filter); -struct symbol *map__find_symbol(struct map *self, struct perf_session *session, +int map__load(struct map *self, symbol_filter_t filter); +struct symbol *map__find_symbol(struct map *self, u64 addr, symbol_filter_t filter); struct symbol *map__find_symbol_by_name(struct map *self, const char *name, - struct perf_session *session, symbol_filter_t filter); void map__fixup_start(struct map *self); void map__fixup_end(struct map *self); +void map__reloc_vmlinux(struct map *self); + #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index cf91d099f0aa..aa8a03120bbd 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -53,6 +53,11 @@ out_close: return -1; } +static inline int perf_session__create_kernel_maps(struct perf_session *self) +{ + return map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps); +} + struct perf_session *perf_session__new(const char *filename, int mode, bool force) { size_t len = filename ? strlen(filename) + 1 : 0; @@ -507,6 +512,7 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, u64 addr) { char *bracket; + enum map_type i; self->ref_reloc_sym.name = strdup(symbol_name); if (self->ref_reloc_sym.name == NULL) @@ -517,6 +523,12 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, *bracket = '\0'; self->ref_reloc_sym.addr = addr; + + for (i = 0; i < MAP__NR_TYPES; ++i) { + struct kmap *kmap = map__kmap(self->vmlinux_maps[i]); + kmap->ref_reloc_sym = &self->ref_reloc_sym; + } + return 0; } @@ -530,20 +542,21 @@ static u64 map__reloc_unmap_ip(struct map *map, u64 ip) return ip - (s64)map->pgoff; } -void perf_session__reloc_vmlinux_maps(struct perf_session *self, - u64 unrelocated_addr) +void map__reloc_vmlinux(struct map *self) { - enum map_type type; - s64 reloc = unrelocated_addr - self->ref_reloc_sym.addr; + struct kmap *kmap = map__kmap(self); + s64 reloc; + + if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr) + return; + + reloc = (kmap->ref_reloc_sym->unrelocated_addr - + kmap->ref_reloc_sym->addr); if (!reloc) return; - for (type = 0; type < MAP__NR_TYPES; ++type) { - struct map *map = self->vmlinux_maps[type]; - - map->map_ip = map__reloc_map_ip; - map->unmap_ip = map__reloc_unmap_ip; - map->pgoff = reloc; - } + self->map_ip = map__reloc_map_ip; + self->unmap_ip = map__reloc_unmap_ip; + self->pgoff = reloc; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 36d1a80c0b6c..752d75aebade 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -3,13 +3,13 @@ #include "event.h" #include "header.h" +#include "symbol.h" #include "thread.h" #include #include "../../../include/linux/perf_event.h" struct ip_callchain; struct thread; -struct symbol; struct perf_session { struct perf_header header; @@ -24,10 +24,7 @@ struct perf_session { unsigned long unknown_events; struct rb_root hists; u64 sample_type; - struct { - const char *name; - u64 addr; - } ref_reloc_sym; + struct ref_reloc_sym ref_reloc_sym; int fd; int cwdlen; char *cwd; @@ -69,9 +66,20 @@ int perf_header__read_build_ids(struct perf_header *self, int input, int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, const char *symbol_name, u64 addr); -void perf_session__reloc_vmlinux_maps(struct perf_session *self, - u64 unrelocated_addr); void mem_bswap_64(void *src, int byte_size); +static inline int __perf_session__create_kernel_maps(struct perf_session *self, + struct dso *kernel) +{ + return __map_groups__create_kernel_maps(&self->kmaps, + self->vmlinux_maps, kernel); +} + +static inline struct map * + perf_session__new_module_map(struct perf_session *self, + u64 start, const char *filename) +{ + return map_groups__new_module(&self->kmaps, start, filename); +} #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f9049d12ead6..613874260761 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1,6 +1,5 @@ #include "util.h" #include "../perf.h" -#include "session.h" #include "sort.h" #include "string.h" #include "symbol.h" @@ -34,7 +33,7 @@ enum dso_origin { static void dsos__add(struct list_head *head, struct dso *dso); static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static int dso__load_kernel_sym(struct dso *self, struct map *map, - struct perf_session *session, symbol_filter_t filter); + symbol_filter_t filter); static int vmlinux_path__nr_entries; static char **vmlinux_path; @@ -480,8 +479,9 @@ static int dso__load_all_kallsyms(struct dso *self, const char *filename, * the original ELF section names vmlinux have. */ static int dso__split_kallsyms(struct dso *self, struct map *map, - struct perf_session *session, symbol_filter_t filter) + symbol_filter_t filter) { + struct map_groups *kmaps = map__kmap(map)->kmaps; struct map *curr_map = map; struct symbol *pos; int count = 0; @@ -503,7 +503,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, *module++ = '\0'; if (strcmp(curr_map->dso->short_name, module)) { - curr_map = map_groups__find_by_name(&session->kmaps, map->type, module); + curr_map = map_groups__find_by_name(kmaps, map->type, module); if (curr_map == NULL) { pr_debug("/proc/{kallsyms,modules} " "inconsistency while looking " @@ -538,7 +538,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, } curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; - map_groups__insert(&session->kmaps, curr_map); + map_groups__insert(kmaps, curr_map); ++kernel_range; } @@ -557,9 +557,8 @@ discard_symbol: rb_erase(&pos->rb_node, root); return count; } - -static int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map, - struct perf_session *session, symbol_filter_t filter) +int dso__load_kallsyms(struct dso *self, const char *filename, + struct map *map, symbol_filter_t filter) { if (dso__load_all_kallsyms(self, filename, map) < 0) return -1; @@ -567,7 +566,7 @@ static int dso__load_kallsyms(struct dso *self, const char *filename, struct map symbols__fixup_end(&self->symbols[map->type]); self->origin = DSO__ORIG_KERNEL; - return dso__split_kallsyms(self, map, session, filter); + return dso__split_kallsyms(self, map, filter); } static int dso__load_perf_map(struct dso *self, struct map *map, @@ -893,10 +892,10 @@ static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type } } -static int dso__load_sym(struct dso *self, struct map *map, - struct perf_session *session, const char *name, int fd, - symbol_filter_t filter, int kernel, int kmodule) +static int dso__load_sym(struct dso *self, struct map *map, const char *name, + int fd, symbol_filter_t filter, int kmodule) { + struct kmap *kmap = self->kernel ? map__kmap(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = self; size_t dso_name_len = strlen(self->short_name); @@ -953,7 +952,7 @@ static int dso__load_sym(struct dso *self, struct map *map, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - if (!kernel) { + if (!self->kernel) { self->adjust_symbols = (ehdr.e_type == ET_EXEC || elf_section_by_name(elf, &ehdr, &shdr, ".gnu.prelink_undo", @@ -967,9 +966,9 @@ static int dso__load_sym(struct dso *self, struct map *map, int is_label = elf_sym__is_label(&sym); const char *section_name; - if (kernel && session->ref_reloc_sym.name != NULL && - strcmp(elf_name, session->ref_reloc_sym.name) == 0) - perf_session__reloc_vmlinux_maps(session, sym.st_value); + if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && + strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) + kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; if (!is_label && !elf_sym__is_a(&sym, map->type)) continue; @@ -985,7 +984,7 @@ static int dso__load_sym(struct dso *self, struct map *map, section_name = elf_sec__name(&shdr, secstrs); - if (kernel || kmodule) { + if (self->kernel || kmodule) { char dso_name[PATH_MAX]; if (strcmp(section_name, @@ -1001,7 +1000,7 @@ static int dso__load_sym(struct dso *self, struct map *map, snprintf(dso_name, sizeof(dso_name), "%s%s", self->short_name, section_name); - curr_map = map_groups__find_by_name(&session->kmaps, map->type, dso_name); + curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); if (curr_map == NULL) { u64 start = sym.st_value; @@ -1020,7 +1019,7 @@ static int dso__load_sym(struct dso *self, struct map *map, curr_map->map_ip = identity__map_ip; curr_map->unmap_ip = identity__map_ip; curr_dso->origin = DSO__ORIG_KERNEL; - map_groups__insert(&session->kmaps, curr_map); + map_groups__insert(kmap->kmaps, curr_map); dsos__add(&dsos__kernel, curr_dso); } else curr_dso = curr_map->dso; @@ -1236,8 +1235,7 @@ char dso__symtab_origin(const struct dso *self) return origin[self->origin]; } -int dso__load(struct dso *self, struct map *map, struct perf_session *session, - symbol_filter_t filter) +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) { int size = PATH_MAX; char *name; @@ -1249,7 +1247,7 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session, dso__set_loaded(self, map->type); if (self->kernel) - return dso__load_kernel_sym(self, map, session, filter); + return dso__load_kernel_sym(self, map, filter); name = malloc(size); if (!name) @@ -1320,7 +1318,7 @@ open_file: fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0); + ret = dso__load_sym(self, map, name, fd, filter, 0); close(fd); /* @@ -1376,7 +1374,7 @@ static int dso__kernel_module_get_build_id(struct dso *self) return 0; } -static int perf_session__set_modules_path_dir(struct perf_session *self, char *dirname) +static int map_groups__set_modules_path_dir(struct map_groups *self, char *dirname) { struct dirent *dent; DIR *dir = opendir(dirname); @@ -1396,7 +1394,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d snprintf(path, sizeof(path), "%s/%s", dirname, dent->d_name); - if (perf_session__set_modules_path_dir(self, path) < 0) + if (map_groups__set_modules_path_dir(self, path) < 0) goto failure; } else { char *dot = strrchr(dent->d_name, '.'), @@ -1410,7 +1408,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d (int)(dot - dent->d_name), dent->d_name); strxfrchar(dso_name, '-', '_'); - map = map_groups__find_by_name(&self->kmaps, MAP__FUNCTION, dso_name); + map = map_groups__find_by_name(self, MAP__FUNCTION, dso_name); if (map == NULL) continue; @@ -1431,7 +1429,7 @@ failure: return -1; } -static int perf_session__set_modules_path(struct perf_session *self) +static int map_groups__set_modules_path(struct map_groups *self) { struct utsname uts; char modules_path[PATH_MAX]; @@ -1442,7 +1440,7 @@ static int perf_session__set_modules_path(struct perf_session *self) snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", uts.release); - return perf_session__set_modules_path_dir(self, modules_path); + return map_groups__set_modules_path_dir(self, modules_path); } /* @@ -1452,8 +1450,8 @@ static int perf_session__set_modules_path(struct perf_session *self) */ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) { - struct map *self = malloc(sizeof(*self)); - + struct map *self = zalloc(sizeof(*self) + + (dso->kernel ? sizeof(struct kmap) : 0)); if (self != NULL) { /* * ->end will be filled after we load all the symbols @@ -1464,8 +1462,8 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return self; } -struct map *perf_session__new_module_map(struct perf_session *self, u64 start, - const char *filename) +struct map *map_groups__new_module(struct map_groups *self, u64 start, + const char *filename) { struct map *map; struct dso *dso = __dsos__findnew(&dsos__kernel, filename); @@ -1478,11 +1476,11 @@ struct map *perf_session__new_module_map(struct perf_session *self, u64 start, return NULL; dso->origin = DSO__ORIG_KMODULE; - map_groups__insert(&self->kmaps, map); + map_groups__insert(self, map); return map; } -static int perf_session__create_module_maps(struct perf_session *self) +static int map_groups__create_modules(struct map_groups *self) { char *line = NULL; size_t n; @@ -1520,7 +1518,7 @@ static int perf_session__create_module_maps(struct perf_session *self) *sep = '\0'; snprintf(name, sizeof(name), "[%s]", line); - map = perf_session__new_module_map(self, start, name); + map = map_groups__new_module(self, start, name); if (map == NULL) goto out_delete_line; dso__kernel_module_get_build_id(map->dso); @@ -1529,7 +1527,7 @@ static int perf_session__create_module_maps(struct perf_session *self) free(line); fclose(file); - return perf_session__set_modules_path(self); + return map_groups__set_modules_path(self); out_delete_line: free(line); @@ -1538,7 +1536,6 @@ out_failure: } static int dso__load_vmlinux(struct dso *self, struct map *map, - struct perf_session *session, const char *vmlinux, symbol_filter_t filter) { int err = -1, fd; @@ -1572,14 +1569,14 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return -1; dso__set_loaded(self, map->type); - err = dso__load_sym(self, map, session, vmlinux, fd, filter, 1, 0); + err = dso__load_sym(self, map, vmlinux, fd, filter, 0); close(fd); return err; } int dso__load_vmlinux_path(struct dso *self, struct map *map, - struct perf_session *session, symbol_filter_t filter) + symbol_filter_t filter) { int i, err = 0; @@ -1587,8 +1584,7 @@ int dso__load_vmlinux_path(struct dso *self, struct map *map, vmlinux_path__nr_entries); for (i = 0; i < vmlinux_path__nr_entries; ++i) { - err = dso__load_vmlinux(self, map, session, vmlinux_path[i], - filter); + err = dso__load_vmlinux(self, map, vmlinux_path[i], filter); if (err > 0) { pr_debug("Using %s for symbols\n", vmlinux_path[i]); dso__set_long_name(self, strdup(vmlinux_path[i])); @@ -1600,7 +1596,7 @@ int dso__load_vmlinux_path(struct dso *self, struct map *map, } static int dso__load_kernel_sym(struct dso *self, struct map *map, - struct perf_session *session, symbol_filter_t filter) + symbol_filter_t filter) { int err; const char *kallsyms_filename = NULL; @@ -1621,13 +1617,13 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, * match. */ if (symbol_conf.vmlinux_name != NULL) { - err = dso__load_vmlinux(self, map, session, + err = dso__load_vmlinux(self, map, symbol_conf.vmlinux_name, filter); goto out_try_fixup; } if (vmlinux_path != NULL) { - err = dso__load_vmlinux_path(self, map, session, filter); + err = dso__load_vmlinux_path(self, map, filter); if (err > 0) goto out_fixup; } @@ -1675,7 +1671,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, } do_kallsyms: - err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter); + err = dso__load_kallsyms(self, kallsyms_filename, map, filter); free(kallsyms_allocated_filename); out_try_fixup: @@ -1812,30 +1808,23 @@ int __map_groups__create_kernel_maps(struct map_groups *self, enum map_type type; for (type = 0; type < MAP__NR_TYPES; ++type) { + struct kmap *kmap; + vmlinux_maps[type] = map__new2(0, kernel, type); if (vmlinux_maps[type] == NULL) return -1; vmlinux_maps[type]->map_ip = vmlinux_maps[type]->unmap_ip = identity__map_ip; + + kmap = map__kmap(vmlinux_maps[type]); + kmap->kmaps = self; map_groups__insert(self, vmlinux_maps[type]); } return 0; } -static int map_groups__create_kernel_maps(struct map_groups *self, - struct map *vmlinux_maps[MAP__NR_TYPES], - const char *vmlinux) -{ - struct dso *kernel = dsos__create_kernel(vmlinux); - - if (kernel == NULL) - return -1; - - return __map_groups__create_kernel_maps(self, vmlinux_maps, kernel); -} - static void vmlinux_path__exit(void) { while (--vmlinux_path__nr_entries >= 0) { @@ -1941,19 +1930,22 @@ out_free_comm_list: return -1; } -int perf_session__create_kernel_maps(struct perf_session *self) +int map_groups__create_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES]) { - if (map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps, - symbol_conf.vmlinux_name) < 0) + struct dso *kernel = dsos__create_kernel(symbol_conf.vmlinux_name); + + if (kernel == NULL) return -1; - if (symbol_conf.use_modules && - perf_session__create_module_maps(self) < 0) - pr_debug("Failed to load list of modules for session %s, " - "continuing...\n", self->filename); + if (__map_groups__create_kernel_maps(self, vmlinux_maps, kernel) < 0) + return -1; + + if (symbol_conf.use_modules && map_groups__create_modules(self) < 0) + return -1; /* * Now that we have all the maps created, just set the ->end of them: */ - map_groups__fixup_end(&self->kmaps); + map_groups__fixup_end(self); return 0; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 124302778c09..e6a59e5c2bea 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -80,6 +80,12 @@ static inline void *symbol__priv(struct symbol *self) return ((void *)self) - symbol_conf.priv_size; } +struct ref_reloc_sym { + const char *name; + u64 addr; + u64 unrelocated_addr; +}; + struct addr_location { struct thread *thread; struct map *map; @@ -126,12 +132,11 @@ static inline struct dso *dsos__findnew(const char *name) return __dsos__findnew(&dsos__user, name); } -struct perf_session; - -int dso__load(struct dso *self, struct map *map, struct perf_session *session, - symbol_filter_t filter); +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); int dso__load_vmlinux_path(struct dso *self, struct map *map, - struct perf_session *session, symbol_filter_t filter); + symbol_filter_t filter); +int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map, + symbol_filter_t filter); void dsos__fprintf(FILE *fp); size_t dsos__fprintf_buildid(FILE *fp, bool with_hits); @@ -156,9 +161,5 @@ int kallsyms__parse(const char *filename, void *arg, int symbol__init(void); bool symbol_type__is_a(char symbol_type, enum map_type map_type); -int perf_session__create_kernel_maps(struct perf_session *self); - -struct map *perf_session__new_module_map(struct perf_session *self, u64 start, - const char *filename); extern struct dso *vdso; #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 4a08dcf50b68..634b7f7140d5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -282,14 +282,13 @@ size_t perf_session__fprintf(struct perf_session *self, FILE *fp) } struct symbol *map_groups__find_symbol(struct map_groups *self, - struct perf_session *session, enum map_type type, u64 addr, symbol_filter_t filter) { struct map *map = map_groups__find(self, type, addr); if (map != NULL) - return map__find_symbol(map, session, map->map_ip(map, addr), filter); + return map__find_symbol(map, map->map_ip(map, addr), filter); return NULL; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e35653c1817c..56f317b8a06c 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -59,15 +59,14 @@ void thread__find_addr_location(struct thread *self, struct addr_location *al, symbol_filter_t filter); struct symbol *map_groups__find_symbol(struct map_groups *self, - struct perf_session *session, enum map_type type, u64 addr, symbol_filter_t filter); -static inline struct symbol * -map_groups__find_function(struct map_groups *self, struct perf_session *session, - u64 addr, symbol_filter_t filter) +static inline struct symbol *map_groups__find_function(struct map_groups *self, + u64 addr, + symbol_filter_t filter) { - return map_groups__find_symbol(self, session, MAP__FUNCTION, addr, filter); + return map_groups__find_symbol(self, MAP__FUNCTION, addr, filter); } struct map *map_groups__find_by_name(struct map_groups *self, @@ -76,4 +75,9 @@ struct map *map_groups__find_by_name(struct map_groups *self, int __map_groups__create_kernel_maps(struct map_groups *self, struct map *vmlinux_maps[MAP__NR_TYPES], struct dso *kernel); +int map_groups__create_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES]); + +struct map *map_groups__new_module(struct map_groups *self, u64 start, + const char *filename); #endif /* __PERF_THREAD_H */ From 6275ce2d5f44ae4f8575c24724525cbb2a3a141b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2010 16:52:01 -0200 Subject: [PATCH 236/640] perf symbols: Fixup vsyscall maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While debugging a problem reported by Pekka Enberg by printing the IP and all the maps for a thread when we don't find a map for an IP I noticed that dso__load_sym needs to fixup these extra maps it creates to hold symbols in different ELF sections than the main kernel one. Now we're back showing things like: [root@doppio linux-2.6-tip]# perf report | grep vsyscall 0.02% mutt [kernel.kallsyms].vsyscall_fn [.] vread_hpet 0.01% named [kernel.kallsyms].vsyscall_fn [.] vread_hpet 0.01% NetworkManager [kernel.kallsyms].vsyscall_fn [.] vread_hpet 0.01% gconfd-2 [kernel.kallsyms].vsyscall_0 [.] vgettimeofday 0.01% hald-addon-rfki [kernel.kallsyms].vsyscall_fn [.] vread_hpet 0.00% dbus-daemon [kernel.kallsyms].vsyscall_fn [.] vread_hpet [root@doppio linux-2.6-tip]# Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1265223128-11786-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 613874260761..051d71b33df0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1011,7 +1011,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, if (curr_dso == NULL) goto out_elf_end; curr_map = map__new2(start, curr_dso, - MAP__FUNCTION); + map->type); if (curr_map == NULL) { dso__delete(curr_dso); goto out_elf_end; @@ -1021,6 +1021,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, curr_dso->origin = DSO__ORIG_KERNEL; map_groups__insert(kmap->kmaps, curr_map); dsos__add(&dsos__kernel, curr_dso); + dso__set_loaded(curr_dso, map->type); } else curr_dso = curr_map->dso; @@ -1058,8 +1059,16 @@ new_symbol: /* * For misannotated, zeroed, ASM function sizes. */ - if (nr > 0) + if (nr > 0) { symbols__fixup_end(&self->symbols[map->type]); + if (kmap) { + /* + * We need to fixup this here too because we create new + * maps here, for things like vsyscall sections. + */ + __map_groups__fixup_end(kmap->kmaps, map->type); + } + } err = nr; out_elf_end: elf_end(elf); From 8d92c02ab07602786eaa6d4e5b519395730b3fd3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2010 16:52:02 -0200 Subject: [PATCH 237/640] perf symbols: Ditch vdso global variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can check using strcmp, most DSOs don't start with '[' so the test is cheap enough and we had to test it there anyway since when reading perf.data files we weren't calling the routine that created this global variable and thus weren't setting it as "loaded", which was causing a bogus: Failed to open [vdso], continuing without symbols Message as the first line of 'perf report'. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1265223128-11786-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/map.c | 7 ++++++- tools/perf/util/symbol.c | 26 ++++---------------------- tools/perf/util/symbol.h | 6 +++++- 3 files changed, 15 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 36ff0bf0315d..f6626cc3df2e 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -68,8 +68,13 @@ struct map *map__new(struct mmap_event *event, enum map_type type, map__init(self, type, event->start, event->start + event->len, event->pgoff, dso); - if (self->dso == vdso || anon) + if (anon) { +set_identity: self->map_ip = self->unmap_ip = identity__map_ip; + } else if (strcmp(filename, "[vdso]") == 0) { + dso__set_loaded(dso, self->type); + goto set_identity; + } } return self; out_delete: diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 051d71b33df0..e752837363ee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -53,11 +53,6 @@ bool dso__sorted_by_name(const struct dso *self, enum map_type type) return self->sorted_by_name & (1 << type); } -static void dso__set_loaded(struct dso *self, enum map_type type) -{ - self->loaded |= (1 << type); -} - static void dso__set_sorted_by_name(struct dso *self, enum map_type type) { self->sorted_by_name |= (1 << type); @@ -1697,7 +1692,6 @@ out_fixup: LIST_HEAD(dsos__user); LIST_HEAD(dsos__kernel); -struct dso *vdso; static void dsos__add(struct list_head *head, struct dso *dso) { @@ -1790,24 +1784,12 @@ static struct dso *dsos__create_kernel(const char *vmlinux) { struct dso *kernel = dso__new_kernel(vmlinux); - if (kernel == NULL) - return NULL; - - vdso = dso__new("[vdso]"); - if (vdso == NULL) - goto out_delete_kernel_dso; - dso__set_loaded(vdso, MAP__FUNCTION); - - dso__read_running_kernel_build_id(kernel); - - dsos__add(&dsos__kernel, kernel); - dsos__add(&dsos__user, vdso); + if (kernel != NULL) { + dso__read_running_kernel_build_id(kernel); + dsos__add(&dsos__kernel, kernel); + } return kernel; - -out_delete_kernel_dso: - dso__delete(kernel); - return NULL; } int __map_groups__create_kernel_maps(struct map_groups *self, diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e6a59e5c2bea..e90568a9e467 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -121,6 +121,11 @@ void dso__delete(struct dso *self); bool dso__loaded(const struct dso *self, enum map_type type); bool dso__sorted_by_name(const struct dso *self, enum map_type type); +static inline void dso__set_loaded(struct dso *self, enum map_type type) +{ + self->loaded |= (1 << type); +} + void dso__sort_by_name(struct dso *self, enum map_type type); extern struct list_head dsos__user, dsos__kernel; @@ -161,5 +166,4 @@ int kallsyms__parse(const char *filename, void *arg, int symbol__init(void); bool symbol_type__is_a(char symbol_type, enum map_type map_type); -extern struct dso *vdso; #endif /* __PERF_SYMBOL */ From 8ad94c6052649a8e32120b464eefa0ffd8f2f04f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2010 16:52:03 -0200 Subject: [PATCH 238/640] perf probe: Don't use a perf_session instance just to resolve symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the recent modifications done to untie the session and symbol layers, 'perf probe' now can use just the symbols layer. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Cc: Frédéric Weisbecker Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/builtin-probe.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 4fa73eca1d82..ad47bd4c50ef 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -41,7 +41,6 @@ #include "util/debugfs.h" #include "util/symbol.h" #include "util/thread.h" -#include "util/session.h" #include "util/parse-options.h" #include "util/parse-events.h" /* For debugfs_path */ #include "util/probe-finder.h" @@ -59,8 +58,8 @@ static struct { int nr_probe; struct probe_point probes[MAX_PROBES]; struct strlist *dellist; - struct perf_session *psession; - struct map *kmap; + struct map_groups kmap_groups; + struct map *kmaps[MAP__NR_TYPES]; struct line_range line_range; } session; @@ -122,7 +121,8 @@ static int opt_del_probe_event(const struct option *opt __used, static void evaluate_probe_point(struct probe_point *pp) { struct symbol *sym; - sym = map__find_symbol_by_name(session.kmap, pp->function, NULL); + sym = map__find_symbol_by_name(session.kmaps[MAP__FUNCTION], + pp->function, NULL); if (!sym) die("Kernel symbol \'%s\' not found - probe not added.", pp->function); @@ -131,12 +131,13 @@ static void evaluate_probe_point(struct probe_point *pp) #ifndef NO_LIBDWARF static int open_vmlinux(void) { - if (map__load(session.kmap, NULL) < 0) { + if (map__load(session.kmaps[MAP__FUNCTION], NULL) < 0) { pr_debug("Failed to load kernel map.\n"); return -EINVAL; } - pr_debug("Try to open %s\n", session.kmap->dso->long_name); - return open(session.kmap->dso->long_name, O_RDONLY); + pr_debug("Try to open %s\n", + session.kmaps[MAP__FUNCTION]->dso->long_name); + return open(session.kmaps[MAP__FUNCTION]->dso->long_name, O_RDONLY); } static int opt_show_lines(const struct option *opt __used, @@ -212,12 +213,11 @@ static void init_vmlinux(void) pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); if (symbol__init() < 0) die("Failed to init symbol map."); - session.psession = perf_session__new(NULL, O_WRONLY, false); - if (session.psession == NULL) - die("Failed to init perf_session."); - session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION]; - if (!session.kmap) - die("Could not find kernel map.\n"); + + map_groups__init(&session.kmap_groups); + if (map_groups__create_kernel_maps(&session.kmap_groups, + session.kmaps) < 0) + die("Failed to create kernel maps."); } int cmd_probe(int argc, const char **argv, const char *prefix __used) From 7b2567c1f57c059de29d3f2ca03aca84473865c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2010 16:52:04 -0200 Subject: [PATCH 239/640] perf build-id: Move the routine to find DSOs with hits to the lib MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because 'perf record' will have to find the build-ids in after we stop recording, so as to reduce even more the impact in the workload while we do the measurement. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1265223128-11786-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 ++ tools/perf/builtin-buildid-list.c | 31 ++---------------------- tools/perf/util/build-id.c | 39 +++++++++++++++++++++++++++++++ tools/perf/util/build-id.h | 8 +++++++ 4 files changed, 51 insertions(+), 29 deletions(-) create mode 100644 tools/perf/util/build-id.c create mode 100644 tools/perf/util/build-id.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 42969303e20b..3a5fb36ccc97 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -357,6 +357,7 @@ LIB_H += util/include/asm/uaccess.h LIB_H += perf.h LIB_H += util/cache.h LIB_H += util/callchain.h +LIB_H += util/build-id.h LIB_H += util/debug.h LIB_H += util/debugfs.h LIB_H += util/event.h @@ -390,6 +391,7 @@ LIB_H += util/probe-event.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o +LIB_OBJS += util/build-id.o LIB_OBJS += util/config.o LIB_OBJS += util/ctype.o LIB_OBJS += util/debugfs.o diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 431f204bde64..d0675c02f81e 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -8,6 +8,7 @@ */ #include "builtin.h" #include "perf.h" +#include "util/build-id.h" #include "util/cache.h" #include "util/debug.h" #include "util/parse-options.h" @@ -33,34 +34,6 @@ static const struct option options[] = { OPT_END() }; -static int build_id_list__process_event(event_t *event, - struct perf_session *session) -{ - struct addr_location al; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - struct thread *thread = perf_session__findnew(session, event->ip.pid); - - if (thread == NULL) { - pr_err("problem processing %d event, skipping it.\n", - event->header.type); - return -1; - } - - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, - event->ip.ip, &al); - - if (al.map != NULL) - al.map->dso->hit = 1; - - return 0; -} - -static struct perf_event_ops build_id_list__event_ops = { - .sample = build_id_list__process_event, - .mmap = event__process_mmap, - .fork = event__process_task, -}; - static int __cmd_buildid_list(void) { int err = -1; @@ -71,7 +44,7 @@ static int __cmd_buildid_list(void) return -1; if (with_hits) - perf_session__process_events(session, &build_id_list__event_ops); + perf_session__process_events(session, &build_id__mark_dso_hit_ops); dsos__fprintf_buildid(stdout, with_hits); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c new file mode 100644 index 000000000000..04904b35ba81 --- /dev/null +++ b/tools/perf/util/build-id.c @@ -0,0 +1,39 @@ +/* + * build-id.c + * + * build-id support + * + * Copyright (C) 2009, 2010 Red Hat Inc. + * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo + */ +#include "build-id.h" +#include "event.h" +#include "symbol.h" +#include + +static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) +{ + struct addr_location al; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread = perf_session__findnew(session, event->ip.pid); + + if (thread == NULL) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, + event->ip.ip, &al); + + if (al.map != NULL) + al.map->dso->hit = 1; + + return 0; +} + +struct perf_event_ops build_id__mark_dso_hit_ops = { + .sample = build_id__mark_dso_hit, + .mmap = event__process_mmap, + .fork = event__process_task, +}; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h new file mode 100644 index 000000000000..1d981d63cf9a --- /dev/null +++ b/tools/perf/util/build-id.h @@ -0,0 +1,8 @@ +#ifndef PERF_BUILD_ID_H_ +#define PERF_BUILD_ID_H_ 1 + +#include "session.h" + +extern struct perf_event_ops build_id__mark_dso_hit_ops; + +#endif From 6122e4e4f5d0913e319ef8a4dc60a47afe4abc0a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2010 16:52:05 -0200 Subject: [PATCH 240/640] perf record: Stop intercepting events, use postprocessing to get build-ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to stream events as fast as possible to perf.data, and also in the future we want to have splice working, when no interception will be possible. Using build_id__mark_dso_hit_ops to create the list of DSOs that back MMAPs we also optimize disk usage in the build-id cache by only caching DSOs that had hits. Suggested-by: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo Cc: Xiao Guangrong Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1265223128-11786-6-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 37 +++++++++++---------- tools/perf/util/header.c | 7 ++-- tools/perf/util/session.c | 64 ++++++++++++++++++++++--------------- tools/perf/util/session.h | 3 ++ tools/perf/util/symbol.c | 13 +++++--- tools/perf/util/symbol.h | 2 +- 6 files changed, 73 insertions(+), 53 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 949167efa1ed..706f00196b87 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -12,6 +12,7 @@ #include "perf.h" +#include "util/build-id.h" #include "util/util.h" #include "util/parse-options.h" #include "util/parse-events.h" @@ -65,6 +66,7 @@ static int nr_poll = 0; static int nr_cpu = 0; static int file_new = 1; +static off_t post_processing_offset; static struct perf_session *session; @@ -114,26 +116,10 @@ static void write_output(void *buf, size_t size) } } -static void write_event(event_t *buf, size_t size) -{ - /* - * Add it to the list of DSOs, so that when we finish this - * record session we can pick the available build-ids. - */ - if (buf->header.type == PERF_RECORD_MMAP) { - struct list_head *head = &dsos__user; - if (buf->mmap.header.misc == 1) - head = &dsos__kernel; - __dsos__findnew(head, buf->mmap.filename); - } - - write_output(buf, size); -} - static int process_synthesized_event(event_t *event, struct perf_session *self __used) { - write_event(event, event->header.size); + write_output(event, event->header.size); return 0; } @@ -185,14 +171,14 @@ static void mmap_read(struct mmap_data *md) size = md->mask + 1 - (old & md->mask); old += size; - write_event(buf, size); + write_output(buf, size); } buf = &data[old & md->mask]; size = head - old; old += size; - write_event(buf, size); + write_output(buf, size); md->prev = old; mmap_write_tail(md, old); @@ -402,10 +388,21 @@ static void open_counters(int cpu, pid_t pid) nr_cpu++; } +static int process_buildids(void) +{ + u64 size = lseek(output, 0, SEEK_CUR); + + session->fd = output; + return __perf_session__process_events(session, post_processing_offset, + size - post_processing_offset, + size, &build_id__mark_dso_hit_ops); +} + static void atexit_header(void) { session->header.data_size += bytes_written; + process_buildids(); perf_header__write(&session->header, output, true); } @@ -558,6 +555,8 @@ static int __cmd_record(int argc, const char **argv) return err; } + post_processing_offset = lseek(output, 0, SEEK_CUR); + err = event__synthesize_kernel_mmap(process_synthesized_event, session, "_text"); if (err < 0) { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ed3efd728b41..d5facd5ab1f7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -205,8 +205,11 @@ static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd) dsos__for_each_with_build_id(pos, head) { int err; struct build_id_event b; - size_t len = pos->long_name_len + 1; + size_t len; + if (!pos->hit) + continue; + len = pos->long_name_len + 1; len = ALIGN(len, NAME_ALIGN); memset(&b, 0, sizeof(b)); memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); @@ -371,7 +374,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd) u64 sec_start; int idx = 0, err; - if (dsos__read_build_ids()) + if (dsos__read_build_ids(true)) perf_header__set_feat(self, HEADER_BUILD_ID); nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index aa8a03120bbd..74cbc64a3a3c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -385,8 +385,9 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se return thread; } -int perf_session__process_events(struct perf_session *self, - struct perf_event_ops *ops) +int __perf_session__process_events(struct perf_session *self, + u64 data_offset, u64 data_size, + u64 file_size, struct perf_event_ops *ops) { int err, mmap_prot, mmap_flags; u64 head, shift; @@ -396,32 +397,11 @@ int perf_session__process_events(struct perf_session *self, uint32_t size; char *buf; - if (perf_session__register_idle_thread(self) == NULL) - return -ENOMEM; - perf_event_ops__fill_defaults(ops); page_size = sysconf(_SC_PAGESIZE); - head = self->header.data_offset; - - if (!symbol_conf.full_paths) { - char bf[PATH_MAX]; - - if (getcwd(bf, sizeof(bf)) == NULL) { - err = -errno; -out_getcwd_err: - pr_err("failed to get the current directory\n"); - goto out_err; - } - self->cwd = strdup(bf); - if (self->cwd == NULL) { - err = -ENOMEM; - goto out_getcwd_err; - } - self->cwdlen = strlen(self->cwd); - } - + head = data_offset; shift = page_size * (head / page_size); offset += shift; head -= shift; @@ -486,10 +466,10 @@ more: head += size; - if (offset + head >= self->header.data_offset + self->header.data_size) + if (offset + head >= data_offset + data_size) goto done; - if (offset + head < self->size) + if (offset + head < file_size) goto more; done: err = 0; @@ -497,6 +477,38 @@ out_err: return err; } +int perf_session__process_events(struct perf_session *self, + struct perf_event_ops *ops) +{ + int err; + + if (perf_session__register_idle_thread(self) == NULL) + return -ENOMEM; + + if (!symbol_conf.full_paths) { + char bf[PATH_MAX]; + + if (getcwd(bf, sizeof(bf)) == NULL) { + err = -errno; +out_getcwd_err: + pr_err("failed to get the current directory\n"); + goto out_err; + } + self->cwd = strdup(bf); + if (self->cwd == NULL) { + err = -ENOMEM; + goto out_getcwd_err; + } + self->cwdlen = strlen(self->cwd); + } + + err = __perf_session__process_events(self, self->header.data_offset, + self->header.data_size, + self->size, ops); +out_err: + return err; +} + bool perf_session__has_traces(struct perf_session *self, const char *msg) { if (!(self->sample_type & PERF_SAMPLE_RAW)) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 752d75aebade..31950fcd8a4d 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -50,6 +50,9 @@ void perf_session__delete(struct perf_session *self); void perf_event_header__bswap(struct perf_event_header *self); +int __perf_session__process_events(struct perf_session *self, + u64 data_offset, u64 data_size, u64 size, + struct perf_event_ops *ops); int perf_session__process_events(struct perf_session *self, struct perf_event_ops *event_ops); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e752837363ee..bfb055459670 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1076,25 +1076,28 @@ static bool dso__build_id_equal(const struct dso *self, u8 *build_id) return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; } -static bool __dsos__read_build_ids(struct list_head *head) +static bool __dsos__read_build_ids(struct list_head *head, bool with_hits) { bool have_build_id = false; struct dso *pos; - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, head, node) { + if (with_hits && !pos->hit) + continue; if (filename__read_build_id(pos->long_name, pos->build_id, sizeof(pos->build_id)) > 0) { have_build_id = true; pos->has_build_id = true; } + } return have_build_id; } -bool dsos__read_build_ids(void) +bool dsos__read_build_ids(bool with_hits) { - bool kbuildids = __dsos__read_build_ids(&dsos__kernel), - ubuildids = __dsos__read_build_ids(&dsos__user); + bool kbuildids = __dsos__read_build_ids(&dsos__kernel, with_hits), + ubuildids = __dsos__read_build_ids(&dsos__user, with_hits); return kbuildids || ubuildids; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e90568a9e467..1b4192ee5300 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -157,7 +157,7 @@ struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type, int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); -bool dsos__read_build_ids(void); +bool dsos__read_build_ids(bool with_hits); int build_id__sprintf(const u8 *self, int len, char *bf); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, From 29a9f66d703cb9464e24084e09edab5683e1b6b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 3 Feb 2010 16:52:06 -0200 Subject: [PATCH 241/640] perf tools: Adjust some verbosity levels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not to pollute too much 'perf annotate' debugging sessions. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1265223128-11786-7-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 33 ++++++++++---------------- tools/perf/util/include/linux/kernel.h | 1 + tools/perf/util/symbol.c | 9 +++---- 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 73c202ee0882..4fc3899bf83a 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -97,9 +97,7 @@ static void hist_hit(struct hist_entry *he, u64 ip) sym_size = sym->end - sym->start; offset = ip - sym->start; - if (verbose) - fprintf(stderr, "%s: ip=%Lx\n", __func__, - he->map->unmap_ip(he->map, ip)); + pr_debug3("%s: ip=%#Lx\n", __func__, he->map->unmap_ip(he->map, ip)); if (offset >= sym_size) return; @@ -108,12 +106,8 @@ static void hist_hit(struct hist_entry *he, u64 ip) h->sum++; h->ip[offset]++; - if (verbose >= 3) - printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", - (void *)(unsigned long)he->sym->start, - he->sym->name, - (void *)(unsigned long)ip, ip - he->sym->start, - h->ip[offset]); + pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", he->sym->start, + he->sym->name, ip, ip - he->sym->start, h->ip[offset]); } static int perf_session__add_hist_entry(struct perf_session *self, @@ -136,14 +130,14 @@ static int process_sample_event(event_t *event, struct perf_session *session) event->ip.pid, event->ip.ip); if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) { - fprintf(stderr, "problem processing %d event, skipping it.\n", - event->header.type); + pr_warning("problem processing %d event, skipping it.\n", + event->header.type); return -1; } if (!al.filtered && perf_session__add_hist_entry(session, &al, 1)) { - fprintf(stderr, "problem incrementing symbol count, " - "skipping event\n"); + pr_warning("problem incrementing symbol count, " + "skipping event\n"); return -1; } @@ -378,11 +372,9 @@ static void annotate_sym(struct hist_entry *he) if (!filename) return; - if (verbose) - fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n", - __func__, filename, sym->name, - map->unmap_ip(map, sym->start), - map->unmap_ip(map, sym->end)); + pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__, + filename, sym->name, map->unmap_ip(map, sym->start), + map->unmap_ip(map, sym->end)); if (full_paths) d_filename = filename; @@ -542,9 +534,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) setup_pager(); if (field_sep && *field_sep == '.') { - fputs("'.' is the only non valid --field-separator argument\n", - stderr); - exit(129); + pr_err("'.' is the only non valid --field-separator argument\n"); + return -1; } return __cmd_annotate(); diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index 21c0274c02fa..f2611655ab51 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -101,5 +101,6 @@ simple_strtoul(const char *nptr, char **endptr, int base) eprintf(n, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__) #endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index bfb055459670..a60ba2ba1044 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -137,7 +137,7 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name) self->start = start; self->end = len ? start + len - 1 : start; - pr_debug3("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end); + pr_debug4("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end); memcpy(self->name, name, namelen); @@ -1024,9 +1024,10 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, } if (curr_dso->adjust_symbols) { - pr_debug2("adjusting symbol: st_value: %Lx sh_addr: " - "%Lx sh_offset: %Lx\n", (u64)sym.st_value, - (u64)shdr.sh_addr, (u64)shdr.sh_offset); + pr_debug4("%s: adjusting symbol: st_value: %#Lx " + "sh_addr: %#Lx sh_offset: %#Lx\n", __func__, + (u64)sym.st_value, (u64)shdr.sh_addr, + (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; } /* From 7a2b6209863626cf8362e5ff4653491558f91e67 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 3 Feb 2010 16:52:07 -0200 Subject: [PATCH 242/640] perf annotate: Fix it for non-prelinked *.so The problem was we were incorrectly calculating objdump addresses for sym->start and sym->end, look: For simple ET_DYN type DSO (*.so) with one function, objdump -dS output is something like this: 000004ac : int my_strlen(const char *s) 4ac: 55 push %ebp 4ad: 89 e5 mov %esp,%ebp 4af: 83 ec 10 sub $0x10,%esp { i.e. we have relative-to-dso-mapping IPs (=RIP) there. For ET_EXEC type and probably for prelinked libs as well (sorry can't test - I don't use prelink) objdump outputs absolute IPs, e.g. 08048604 : extern "C" int zz_strlen(const char *s) 8048604: 55 push %ebp 8048605: 89 e5 mov %esp,%ebp 8048607: 83 ec 10 sub $0x10,%esp { So, if sym->start is always relative to dso mapping(*), we'll have to unmap it for ET_EXEC like cases, and leave as is for ET_DYN cases. (*) and it is - we've explicitely made it relative. Look for adjust_symbols handling in dso__load_sym() Previously we were always unmapping sym->start and for ET_DYN dsos resulting addresses were wrong, and so objdump output was empty. The end result was that perf annotate output for symbols from non-prelinked *.so had always 0.00% percents only, which is wrong. To fix it, let's introduce a helper for converting rip to objdump address, and also let's document what map_ip() and unmap_ip() do -- I had to study sources for several hours to understand it. Signed-off-by: Kirill Smelkov Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1265223128-11786-8-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 5 +++-- tools/perf/util/map.c | 12 ++++++++++++ tools/perf/util/map.h | 9 +++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4fc3899bf83a..28ea4e0c3658 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -189,7 +189,7 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len) line_ip = -1; } - start = he->map->unmap_ip(he->map, sym->start); + start = map__rip_2objdump(he->map, sym->start); if (line_ip != -1) { const char *path = NULL; @@ -397,7 +397,8 @@ static void annotate_sym(struct hist_entry *he) dso, dso->long_name, sym, sym->name); sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", - map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end), + map__rip_2objdump(map, sym->start), + map__rip_2objdump(map, sym->end), filename, filename); if (verbose >= 3) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f6626cc3df2e..af5805f51314 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -210,3 +210,15 @@ size_t map__fprintf(struct map *self, FILE *fp) return fprintf(fp, " %Lx-%Lx %Lx %s\n", self->start, self->end, self->pgoff, self->dso->name); } + +/* + * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. + * map->dso->adjust_symbols==1 for ET_EXEC-like cases. + */ +u64 map__rip_2objdump(struct map *map, u64 rip) +{ + u64 addr = map->dso->adjust_symbols ? + map->unmap_ip(map, rip) : /* RIP -> IP */ + rip; + return addr; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index de048399d776..9cee9c788dbf 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -26,8 +26,12 @@ struct map { u64 end; enum map_type type; u64 pgoff; + + /* ip -> dso rip */ u64 (*map_ip)(struct map *, u64); + /* dso rip -> ip */ u64 (*unmap_ip)(struct map *, u64); + struct dso *dso; }; @@ -56,6 +60,11 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip) return ip; } + +/* rip -> addr suitable for passing to `objdump --start-address=` */ +u64 map__rip_2objdump(struct map *map, u64 rip); + + struct symbol; struct mmap_event; From 6cff0e8dbaa4d5d822a814e5028683d7e71c3291 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 3 Feb 2010 16:52:08 -0200 Subject: [PATCH 243/640] perf top: Teach it to autolocate vmlinux By relying on logic in dso__load_kernel_sym(), we can automatically load vmlinux. The only thing which needs to be adjusted, is how --sym-annotate option is handled - now we can't rely on vmlinux been loaded until full successful pass of dso__load_vmlinux(), but that's not the case if we'll do sym_filter_entry setup in symbol_filter(). So move this step right after event__process_sample() where we know the whole dso__load_kernel_sym() pass is done. By the way, though conceptually similar `perf top` still can't annotate userspace - see next patches with fixes. Signed-off-by: Kirill Smelkov Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1265223128-11786-9-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-top.txt | 2 +- tools/perf/builtin-top.c | 39 ++++++++++++++++----------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 4a7d558dc309..785b9fc32a46 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -74,7 +74,7 @@ OPTIONS -s :: --sym-annotate=:: - Annotate this symbol. Requires -k option. + Annotate this symbol. -v:: --verbose:: diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1fc018e048e1..83c09c8f28ed 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -94,6 +94,7 @@ struct source_line { static char *sym_filter = NULL; struct sym_entry *sym_filter_entry = NULL; +struct sym_entry *sym_filter_entry_sched = NULL; static int sym_pcnt_filter = 5; static int sym_counter = 0; static int display_weighted = -1; @@ -695,11 +696,9 @@ static void print_mapped_keys(void) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); - if (symbol_conf.vmlinux_name) { - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); - fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); - fprintf(stdout, "\t[S] stop annotation.\n"); - } + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); + fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); + fprintf(stdout, "\t[S] stop annotation.\n"); if (nr_counters > 1) fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); @@ -725,14 +724,13 @@ static int key_mapped(int c) case 'Q': case 'K': case 'U': + case 'F': + case 's': + case 'S': return 1; case 'E': case 'w': return nr_counters > 1 ? 1 : 0; - case 'F': - case 's': - case 'S': - return symbol_conf.vmlinux_name ? 1 : 0; default: break; } @@ -910,8 +908,12 @@ static int symbol_filter(struct map *map, struct symbol *sym) syme = symbol__priv(sym); syme->map = map; syme->src = NULL; - if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) - sym_filter_entry = syme; + + if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { + /* schedule initial sym_filter_entry setup */ + sym_filter_entry_sched = syme; + sym_filter = NULL; + } for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { @@ -976,6 +978,13 @@ static void event__process_sample(const event_t *self, return; } + /* let's see, whether we need to install initial sym_filter_entry */ + if (sym_filter_entry_sched) { + sym_filter_entry = sym_filter_entry_sched; + sym_filter_entry_sched = NULL; + parse_source(sym_filter_entry); + } + syme = symbol__priv(al.sym); if (!syme->skip) { syme->count[counter]++; @@ -1270,7 +1279,7 @@ static const struct option options[] = { OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", - "symbol to annotate - requires -k option"), + "symbol to annotate"), OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, @@ -1306,16 +1315,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) symbol_conf.priv_size = (sizeof(struct sym_entry) + (nr_counters + 1) * sizeof(unsigned long)); - if (symbol_conf.vmlinux_name == NULL) - symbol_conf.try_vmlinux_path = true; + + symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); if (symbol__init() < 0) return -1; if (delay_secs < 1) delay_secs = 1; - parse_source(sym_filter_entry); - /* * User specified count overrides default frequency. */ From 57d818895f9d294ab9080e5a662675fdee943ff1 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 4 Feb 2010 07:31:46 +0100 Subject: [PATCH 244/640] perf annotate: Fix perf top module symbol annotation Signed-off-by: Mike Galbraith Cc: Kirill Smelkov Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <1265265106.6364.5.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 83c09c8f28ed..e4156bc4566d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -204,8 +204,8 @@ static void parse_source(struct sym_entry *syme) sprintf(command, "objdump --start-address=0x%016Lx " "--stop-address=0x%016Lx -dS %s", - map->unmap_ip(map, sym->start), - map->unmap_ip(map, sym->end), path); + map__rip_2objdump(map, sym->start), + map__rip_2objdump(map, sym->end), path); file = popen(command, "r"); if (!file) From 615d0ebbc782b67296e3226c293f520f93f93515 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:04 -0500 Subject: [PATCH 245/640] kprobes: Disable booster when CONFIG_PREEMPT=y Disable kprobe booster when CONFIG_PREEMPT=y at this time, because it can't ensure that all kernel threads preempted on kprobe's boosted slot run out from the slot even using freeze_processes(). The booster on preemptive kernel will be resumed if synchronize_tasks() or something like that is introduced. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Ananth N Mavinakayanahalli Cc: Frederic Weisbecker Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Steven Rostedt LKML-Reference: <20100202214904.4694.24330.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/ia64/kernel/kprobes.c | 2 +- arch/x86/kernel/kprobes.c | 2 +- kernel/kprobes.c | 29 ++--------------------------- 3 files changed, 4 insertions(+), 29 deletions(-) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 9adac441ac9b..7026b29e277a 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -870,7 +870,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; ss_probe: -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) +#if !defined(CONFIG_PREEMPT) if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ ia64_psr(regs)->ri = p->ainsn.slot; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 5b8c7505b3bc..9453815138fa 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -429,7 +429,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) +#if !defined(CONFIG_PREEMPT) if (p->ainsn.boostable == 1 && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b7df302a0204..9907a03c29f6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -124,30 +124,6 @@ static LIST_HEAD(kprobe_insn_pages); static int kprobe_garbage_slots; static int collect_garbage_slots(void); -static int __kprobes check_safety(void) -{ - int ret = 0; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) - ret = freeze_processes(); - if (ret == 0) { - struct task_struct *p, *q; - do_each_thread(p, q) { - if (p != current && p->state == TASK_RUNNING && - p->pid != 0) { - printk("Check failed: %s is running\n",p->comm); - ret = -1; - goto loop_end; - } - } while_each_thread(p, q); - } -loop_end: - thaw_processes(); -#else - synchronize_sched(); -#endif - return ret; -} - /** * __get_insn_slot() - Find a slot on an executable page for an instruction. * We allocate an executable page if there's no room on existing ones. @@ -235,9 +211,8 @@ static int __kprobes collect_garbage_slots(void) { struct kprobe_insn_page *kip, *next; - /* Ensure no-one is preepmted on the garbages */ - if (check_safety()) - return -EAGAIN; + /* Ensure no-one is interrupted on the garbages */ + synchronize_sched(); list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) { int i; From 2cfa19780d61740f65790c5bae363b759d7c96fa Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:11 -0500 Subject: [PATCH 246/640] ftrace/alternatives: Introducing *_text_reserved functions Introducing *_text_reserved functions for checking the text address range is partially reserved or not. This patch provides checking routines for x86 smp alternatives and dynamic ftrace. Since both functions modify fixed pieces of kernel text, they should reserve and protect those from other dynamic text modifier, like kprobes. This will also be extended when introducing other subsystems which modify fixed pieces of kernel text. Dynamic text modifiers should avoid those. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Jason Baron LKML-Reference: <20100202214911.4694.16587.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/kernel/alternative.c | 16 ++++++++++++++++ include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 15 +++++++++++++++ 4 files changed, 42 insertions(+) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 69b74a7b877f..ac80b7d70014 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); extern void alternatives_smp_switch(int smp); +extern int alternatives_text_reserved(void *start, void *end); #else static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} +static inline int alternatives_text_reserved(void *start, void *end) +{ + return 0; +} #endif /* CONFIG_SMP */ /* alternative assembly primitive: */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de7353c0ce9c..3c13284ff86d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -390,6 +390,22 @@ void alternatives_smp_switch(int smp) mutex_unlock(&smp_alt); } +/* Return 1 if the address range is reserved for smp-alternatives */ +int alternatives_text_reserved(void *start, void *end) +{ + struct smp_alt_module *mod; + u8 **ptr; + + list_for_each_entry(mod, &smp_alt_modules, next) { + if (mod->text > end || mod->text_end < start) + continue; + for (ptr = mod->locks; ptr < mod->locks_end; ptr++) + if (start <= *ptr && end >= *ptr) + return 1; + } + + return 0; +} #endif #ifdef CONFIG_PARAVIRT diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b4f97d24d7f..9d127efed43c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -134,6 +134,8 @@ extern void unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); extern void unregister_ftrace_function_probe_all(char *glob); +extern int ftrace_text_reserved(void *start, void *end); + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -250,6 +252,10 @@ static inline int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } +static inline int ftrace_text_reserved(void *start, void *end) +{ + return 0; +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1e6640f80454..3d90661a5f40 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1025,6 +1025,21 @@ static void ftrace_bug(int failed, unsigned long ip) } +/* Return 1 if the address range is reserved for ftrace */ +int ftrace_text_reserved(void *start, void *end) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + do_for_each_ftrace_rec(pg, rec) { + if (rec->ip <= (unsigned long)end && + rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start) + return 1; + } while_for_each_ftrace_rec(); + return 0; +} + + static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { From 4554dbcb85a4ed2abaa2b6fa15649b796699ec89 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:18 -0500 Subject: [PATCH 247/640] kprobes: Check probe address is reserved Check whether the address of new probe is already reserved by ftrace or alternatives (on x86) when registering new probe. If reserved, it returns an error and not register the probe. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Jason Baron LKML-Reference: <20100202214918.4694.94179.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 3 +++ kernel/kprobes.c | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 9453815138fa..5de9f4a9c3fd 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -337,6 +337,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (alternatives_text_reserved(p->addr, p->addr)) + return -EINVAL; + if (!can_probe((unsigned long)p->addr)) return -EILSEQ; /* insn: must be on special executable page on x86. */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9907a03c29f6..c3340e836c37 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -703,7 +704,8 @@ int __kprobes register_kprobe(struct kprobe *p) preempt_disable(); if (!kernel_text_address((unsigned long) p->addr) || - in_kprobes_functions((unsigned long) p->addr)) { + in_kprobes_functions((unsigned long) p->addr) || + ftrace_text_reserved(p->addr, p->addr)) { preempt_enable(); return -EINVAL; } From f24bb999d2b9f2950e5cac5b69bffedf73c24ea4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:25 -0500 Subject: [PATCH 248/640] ftrace: Remove record freezing Remove record freezing. Because kprobes never puts probe on ftrace's mcount call anymore, it doesn't need ftrace to check whether kprobes on it. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker LKML-Reference: <20100202214925.4694.73469.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 - kernel/trace/ftrace.c | 39 --------------------------------------- 2 files changed, 40 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9d127efed43c..eb054ae95605 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -143,7 +143,6 @@ enum { FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), FTRACE_FL_CONVERTED = (1 << 5), - FTRACE_FL_FROZEN = (1 << 6), }; struct dyn_ftrace { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3d90661a5f40..1904797f4a8a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records; } \ } -#ifdef CONFIG_KPROBES - -static int frozen_record_count; - -static inline void freeze_record(struct dyn_ftrace *rec) -{ - if (!(rec->flags & FTRACE_FL_FROZEN)) { - rec->flags |= FTRACE_FL_FROZEN; - frozen_record_count++; - } -} - -static inline void unfreeze_record(struct dyn_ftrace *rec) -{ - if (rec->flags & FTRACE_FL_FROZEN) { - rec->flags &= ~FTRACE_FL_FROZEN; - frozen_record_count--; - } -} - -static inline int record_frozen(struct dyn_ftrace *rec) -{ - return rec->flags & FTRACE_FL_FROZEN; -} -#else -# define freeze_record(rec) ({ 0; }) -# define unfreeze_record(rec) ({ 0; }) -# define record_frozen(rec) ({ 0; }) -#endif /* CONFIG_KPROBES */ - static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->freelist = ftrace_free_records; @@ -1091,14 +1060,6 @@ static void ftrace_replace_code(int enable) !(rec->flags & FTRACE_FL_CONVERTED)) continue; - /* ignore updates to this record's mcount site */ - if (get_kprobe((void *)rec->ip)) { - freeze_record(rec); - continue; - } else { - unfreeze_record(rec); - } - failed = __ftrace_replace_code(rec, enable); if (failed) { rec->flags |= FTRACE_FL_FAILED; From 9717e6cd3db22eade7dbae0fc9235c66325a7132 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Jan 2010 13:57:44 +0100 Subject: [PATCH 249/640] perf_events: Optimize perf_event_task_tick() Pretty much all of the calls do perf_disable/perf_enable cycles, pull that out to cut back on hardware programming. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 40f8b07c5601..087025fe3ba1 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1573,12 +1573,8 @@ static void rotate_ctx(struct perf_event_context *ctx) raw_spin_lock(&ctx->lock); /* Rotate the first entry last of non-pinned groups */ - perf_disable(); - list_rotate_left(&ctx->flexible_groups); - perf_enable(); - raw_spin_unlock(&ctx->lock); } @@ -1593,6 +1589,8 @@ void perf_event_task_tick(struct task_struct *curr) cpuctx = &__get_cpu_var(perf_cpu_context); ctx = curr->perf_event_ctxp; + perf_disable(); + perf_ctx_adjust_freq(&cpuctx->ctx); if (ctx) perf_ctx_adjust_freq(ctx); @@ -1608,6 +1606,8 @@ void perf_event_task_tick(struct task_struct *curr) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_in(curr, EVENT_FLEXIBLE); + + perf_enable(); } static int event_enable_on_exec(struct perf_event *event, From 8c48e444191de0ff84e85d41180d7bc3e74f14ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Jan 2010 13:25:31 +0100 Subject: [PATCH 250/640] perf_events, x86: Implement intel core solo/duo support Implement Intel Core Solo/Duo, aka. Intel Architectural Performance Monitoring Version 1. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 133 ++++++++++++++----------------- 1 file changed, 61 insertions(+), 72 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1846ead0576b..5b91992b6b25 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -227,6 +227,17 @@ static const u64 intel_perfmon_event_map[] = }; static struct event_constraint intel_core_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_core2_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ @@ -1216,7 +1227,7 @@ static void intel_pmu_disable_all(void) intel_pmu_disable_bts(); } -static void amd_pmu_disable_all(void) +static void x86_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -1226,11 +1237,11 @@ static void amd_pmu_disable_all(void) if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + rdmsrl(x86_pmu.eventsel + idx, val); if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) continue; val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + wrmsrl(x86_pmu.eventsel + idx, val); } } @@ -1278,7 +1289,7 @@ static void intel_pmu_enable_all(void) } } -static void amd_pmu_enable_all(void) +static void x86_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -1292,7 +1303,7 @@ static void amd_pmu_enable_all(void) val = event->hw.config; val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + wrmsrl(x86_pmu.eventsel + idx, val); } } @@ -1546,7 +1557,7 @@ static inline void intel_pmu_ack_status(u64 ack) wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); @@ -1598,12 +1609,6 @@ intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) x86_pmu_disable_event(hwc, idx); } -static inline void -amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - x86_pmu_disable_event(hwc, idx); -} - static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* @@ -1723,15 +1728,14 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - x86_pmu_enable_event(hwc, idx); + __x86_pmu_enable_event(hwc, idx); } -static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (cpuc->enabled) - x86_pmu_enable_event(hwc, idx); + __x86_pmu_enable_event(hwc, idx); } /* @@ -1988,50 +1992,6 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } -static int p6_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct perf_event *event; - struct hw_perf_event *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - data.raw = NULL; - - cpuc = &__get_cpu_var(cpu_hw_events); - - for (idx = 0; idx < x86_pmu.num_events; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - event = cpuc->events[idx]; - hwc = &event->hw; - - val = x86_perf_event_update(event, hwc, idx); - if (val & (1ULL << (x86_pmu.event_bits - 1))) - continue; - - /* - * event overflow - */ - handled = 1; - data.period = event->hw.last_period; - - if (!x86_perf_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 1, &data, regs)) - p6_pmu_disable_event(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - /* * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: @@ -2098,7 +2058,7 @@ again: return 1; } -static int amd_pmu_handle_irq(struct pt_regs *regs) +static int x86_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_events *cpuc; @@ -2133,7 +2093,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - amd_pmu_disable_event(hwc, idx); + x86_pmu.disable(hwc, idx); } if (handled) @@ -2374,7 +2334,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { static __initconst struct x86_pmu p6_pmu = { .name = "p6", - .handle_irq = p6_pmu_handle_irq, + .handle_irq = x86_pmu_handle_irq, .disable_all = p6_pmu_disable_all, .enable_all = p6_pmu_enable_all, .enable = p6_pmu_enable_event, @@ -2401,6 +2361,29 @@ static __initconst struct x86_pmu p6_pmu = { .event_constraints = intel_p6_event_constraints }; +static __initconst struct x86_pmu core_pmu = { + .name = "core", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .get_event_constraints = intel_get_event_constraints, + .event_constraints = intel_core_event_constraints, +}; + static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -2427,11 +2410,11 @@ static __initconst struct x86_pmu intel_pmu = { static __initconst struct x86_pmu amd_pmu = { .name = "AMD", - .handle_irq = amd_pmu_handle_irq, - .disable_all = amd_pmu_disable_all, - .enable_all = amd_pmu_enable_all, - .enable = amd_pmu_enable_event, - .disable = amd_pmu_disable_event, + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, @@ -2498,9 +2481,10 @@ static __init int intel_pmu_init(void) version = eax.split.version_id; if (version < 2) - return -ENODEV; + x86_pmu = core_pmu; + else + x86_pmu = intel_pmu; - x86_pmu = intel_pmu; x86_pmu.version = version; x86_pmu.num_events = eax.split.num_events; x86_pmu.event_bits = eax.split.bit_width; @@ -2510,12 +2494,17 @@ static __init int intel_pmu_init(void) * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events: */ - x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + if (version > 1) + x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); /* * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { + case 14: /* 65 nm core solo/duo, "Yonah" */ + pr_cont("Core events, "); + break; + case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ @@ -2523,7 +2512,7 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - x86_pmu.event_constraints = intel_core_event_constraints; + x86_pmu.event_constraints = intel_core2_event_constraints; pr_cont("Core2 events, "); break; From fce877e3a429940a986e085a41e8b57f2d922e36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Jan 2010 13:25:12 +0100 Subject: [PATCH 251/640] bitops: Ensure the compile time HWEIGHT is only used for such Avoid accidental misuse by failing to compile things Suggested-by: Andrew Morton Signed-off-by: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 10 +++++++--- include/linux/bitops.h | 33 +++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5b91992b6b25..96cfc1a4fe9f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -93,13 +93,16 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ +#define __EVENT_CONSTRAINT(c, n, m, w) {\ { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ - .weight = HWEIGHT64((u64)(n)), \ + .weight = (w), \ } +#define EVENT_CONSTRAINT(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) + #define INTEL_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) @@ -2622,7 +2625,8 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); unconstrained = (struct event_constraint) - EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0); + __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, + 0, x86_pmu.num_events); pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.event_bits); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index ba0fd1eb4af7..25b8b2f33ae9 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -45,19 +45,30 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } -#define HWEIGHT8(w) \ - ( (!!((w) & (1ULL << 0))) + \ - (!!((w) & (1ULL << 1))) + \ - (!!((w) & (1ULL << 2))) + \ - (!!((w) & (1ULL << 3))) + \ - (!!((w) & (1ULL << 4))) + \ - (!!((w) & (1ULL << 5))) + \ - (!!((w) & (1ULL << 6))) + \ +/* + * Clearly slow versions of the hweightN() functions, their benefit is + * of course compile time evaluation of constant arguments. + */ +#define HWEIGHT8(w) \ + ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \ + (!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ (!!((w) & (1ULL << 7))) ) -#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8)) -#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16)) -#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32)) +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8)) +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16)) +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32)) + +/* + * Type invariant version that simply casts things to the + * largest type. + */ +#define HWEIGHT(w) HWEIGHT64((u64)(w)) /** * rol32 - rotate a 32-bit value left From 447a194b393f32699607fd99617a40abd6a95114 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 1 Feb 2010 14:50:01 +0200 Subject: [PATCH 252/640] perf_events, x86: Fix bug in hw_perf_enable() We cannot assume that because hwc->idx == assign[i], we can avoid reprogramming the counter in hw_perf_enable(). The event may have been scheduled out and another event may have been programmed into this counter. Thus, we need a more robust way of verifying if the counter still contains config/data related to an event. This patch adds a generation number to each counter on each cpu. Using this mechanism we can verify reliabilty whether the content of a counter corresponds to an event. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4b66dc67.0b38560a.1635.ffffae18@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 34 ++++++++++++++++++++++++++------ include/linux/perf_event.h | 2 ++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 96cfc1a4fe9f..a920f173a220 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -90,6 +90,7 @@ struct cpu_hw_events { int n_events; int n_added; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; @@ -1142,6 +1143,8 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->config = ARCH_PERFMON_EVENTSEL_INT; hwc->idx = -1; + hwc->last_cpu = -1; + hwc->last_tag = ~0ULL; /* * Count user and OS events unless requested not to. @@ -1457,11 +1460,14 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, return n; } - static inline void x86_assign_hw_event(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct cpu_hw_events *cpuc, int i) { - hwc->idx = idx; + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = cpuc->assign[i]; + hwc->last_cpu = smp_processor_id(); + hwc->last_tag = ++cpuc->tags[i]; if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { hwc->config_base = 0; @@ -1480,6 +1486,15 @@ static inline void x86_assign_hw_event(struct perf_event *event, } } +static inline int match_prev_assignment(struct hw_perf_event *hwc, + struct cpu_hw_events *cpuc, + int i) +{ + return hwc->idx == cpuc->assign[i] && + hwc->last_cpu == smp_processor_id() && + hwc->last_tag == cpuc->tags[i]; +} + static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); void hw_perf_enable(void) @@ -1508,7 +1523,14 @@ void hw_perf_enable(void) event = cpuc->event_list[i]; hwc = &event->hw; - if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) + /* + * we can avoid reprogramming counter if: + * - assigned same counter as last time + * - running on same CPU as last time + * - no other event has used the counter since + */ + if (hwc->idx == -1 || + match_prev_assignment(hwc, cpuc, i)) continue; __x86_pmu_disable(event, cpuc); @@ -1522,12 +1544,12 @@ void hw_perf_enable(void) hwc = &event->hw; if (hwc->idx == -1) { - x86_assign_hw_event(event, hwc, cpuc->assign[i]); + x86_assign_hw_event(event, cpuc, i); x86_perf_event_set_period(event, hwc, hwc->idx); } /* * need to mark as active because x86_pmu_disable() - * clear active_mask and eventsp[] yet it preserves + * clear active_mask and events[] yet it preserves * idx */ set_bit(hwc->idx, cpuc->active_mask); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 556b0f4a668e..071a7db52549 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -478,9 +478,11 @@ struct hw_perf_event { union { struct { /* hardware */ u64 config; + u64 last_tag; unsigned long config_base; unsigned long event_base; int idx; + int last_cpu; }; struct { /* software */ s64 remaining; From f887f3019e56389a73617f4e70f512e82cc89adb Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 4 Feb 2010 16:46:42 +0800 Subject: [PATCH 253/640] perf tools: Clean up O_LARGEFILE et al usage Setting _FILE_OFFSET_BITS and using O_LARGEFILE, lseek64, etc, is redundant. Thanks H. Peter Anvin for pointing it out. So, this patch removes O_LARGEFILE, lseek64, etc. Suggested-by: "H. Peter Anvin" Signed-off-by: Xiao Guangrong Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <4B6A8972.3070605@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 3 +-- tools/perf/util/header.c | 21 ++++++++++----------- tools/perf/util/session.c | 3 +-- tools/perf/util/trace-event-read.c | 20 ++++++++++---------- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 706f00196b87..3ad599b12c91 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -5,7 +5,6 @@ * (or a CPU, or a PID) into the perf.data output file - for * later analysis via perf report. */ -#define _LARGEFILE64_SOURCE #define _FILE_OFFSET_BITS 64 #include "builtin.h" @@ -451,7 +450,7 @@ static int __cmd_record(int argc, const char **argv) append_file = 0; } - flags = O_CREAT|O_RDWR|O_LARGEFILE; + flags = O_CREAT|O_RDWR; if (append_file) file_new = 0; else diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d5facd5ab1f7..6c9aa16ee51f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,4 +1,3 @@ -#define _LARGEFILE64_SOURCE #define _FILE_OFFSET_BITS 64 #include @@ -388,7 +387,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd) sec_size = sizeof(*feat_sec) * nr_sections; sec_start = self->data_offset + self->data_size; - lseek64(fd, sec_start + sec_size, SEEK_SET); + lseek(fd, sec_start + sec_size, SEEK_SET); if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { struct perf_file_section *trace_sec; @@ -396,9 +395,9 @@ static int perf_header__adds_write(struct perf_header *self, int fd) trace_sec = &feat_sec[idx++]; /* Write trace info */ - trace_sec->offset = lseek64(fd, 0, SEEK_CUR); + trace_sec->offset = lseek(fd, 0, SEEK_CUR); read_tracing_data(fd, attrs, nr_counters); - trace_sec->size = lseek64(fd, 0, SEEK_CUR) - trace_sec->offset; + trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; } @@ -408,18 +407,18 @@ static int perf_header__adds_write(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; /* Write build-ids */ - buildid_sec->offset = lseek64(fd, 0, SEEK_CUR); + buildid_sec->offset = lseek(fd, 0, SEEK_CUR); err = dsos__write_buildid_table(fd); if (err < 0) { pr_debug("failed to write buildid table\n"); goto out_free; } - buildid_sec->size = lseek64(fd, 0, SEEK_CUR) - - buildid_sec->offset; + buildid_sec->size = lseek(fd, 0, SEEK_CUR) - + buildid_sec->offset; dsos__cache_build_ids(); } - lseek64(fd, sec_start, SEEK_SET); + lseek(fd, sec_start, SEEK_SET); err = do_write(fd, feat_sec, sec_size); if (err < 0) pr_debug("failed to write feature section\n"); @@ -513,7 +512,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit) pr_debug("failed to write perf header\n"); return err; } - lseek64(fd, self->data_offset + self->data_size, SEEK_SET); + lseek(fd, self->data_offset + self->data_size, SEEK_SET); self->frozen = 1; return 0; @@ -567,7 +566,7 @@ int perf_header__process_sections(struct perf_header *self, int fd, sec_size = sizeof(*feat_sec) * nr_sections; - lseek64(fd, self->data_offset + self->data_size, SEEK_SET); + lseek(fd, self->data_offset + self->data_size, SEEK_SET); if (perf_header__getbuffer64(self, fd, feat_sec, sec_size)) goto out_free; @@ -641,7 +640,7 @@ static int perf_file_section__process(struct perf_file_section *self, struct perf_header *ph, int feat, int fd) { - if (lseek64(fd, self->offset, SEEK_SET) < 0) { + if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) { pr_debug("Failed to lseek to %Ld offset for feature %d, " "continuing...\n", self->offset, feat); return 0; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 74cbc64a3a3c..0de7258e70a5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,4 +1,3 @@ -#define _LARGEFILE64_SOURCE #define _FILE_OFFSET_BITS 64 #include @@ -15,7 +14,7 @@ static int perf_session__open(struct perf_session *self, bool force) { struct stat input_stat; - self->fd = open(self->filename, O_RDONLY|O_LARGEFILE); + self->fd = open(self->filename, O_RDONLY); if (self->fd < 0) { pr_err("failed to open file: %s", self->filename); if (!strcmp(self->filename, "perf.data")) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index ca3c26d466f3..7cd1193918c7 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -18,7 +18,7 @@ * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -#define _LARGEFILE64_SOURCE +#define _FILE_OFFSET_BITS 64 #include #include @@ -83,7 +83,7 @@ static char *read_string(void) char *str = NULL; int size = 0; int i; - s64 r; + off_t r; for (;;) { r = read(input_fd, buf, BUFSIZ); @@ -117,8 +117,8 @@ static char *read_string(void) i++; /* move the file descriptor to the end of the string */ - r = lseek64(input_fd, -(r - i), SEEK_CUR); - if (r < 0) + r = lseek(input_fd, -(r - i), SEEK_CUR); + if (r == (off_t)-1) die("lseek"); if (str) { @@ -282,8 +282,8 @@ static void update_cpu_data_index(int cpu) static void get_next_page(int cpu) { - off64_t save_seek; - off64_t ret; + off_t save_seek; + off_t ret; if (!cpu_data[cpu].page) return; @@ -298,17 +298,17 @@ static void get_next_page(int cpu) update_cpu_data_index(cpu); /* other parts of the code may expect the pointer to not move */ - save_seek = lseek64(input_fd, 0, SEEK_CUR); + save_seek = lseek(input_fd, 0, SEEK_CUR); - ret = lseek64(input_fd, cpu_data[cpu].offset, SEEK_SET); - if (ret < 0) + ret = lseek(input_fd, cpu_data[cpu].offset, SEEK_SET); + if (ret == (off_t)-1) die("failed to lseek"); ret = read(input_fd, cpu_data[cpu].page, page_size); if (ret < 0) die("failed to read page"); /* reset the file pointer back */ - lseek64(input_fd, save_seek, SEEK_SET); + lseek(input_fd, save_seek, SEEK_SET); return; } From 2161db969313cb94ffd9377a525fb75c3fee9eeb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 4 Feb 2010 10:22:01 +0100 Subject: [PATCH 254/640] perf tools: Fix session init on non-modular kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf top and perf record refuses to initialize on non-modular kernels: refuse to initialize: $ perf top -v map_groups__set_modules_path_dir: cannot open /lib/modules/2.6.33-rc6-tip-00586-g398dde3-dirty/ Cc: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1265223128-11786-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a60ba2ba1044..6882e9fec2d6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1937,7 +1937,7 @@ int map_groups__create_kernel_maps(struct map_groups *self, return -1; if (symbol_conf.use_modules && map_groups__create_modules(self) < 0) - return -1; + return 0; /* * Now that we have all the maps created, just set the ->end of them: */ From f044ba7835b84e69c68b620ca8fa27e5ef67759d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 4 Feb 2010 08:46:56 +0000 Subject: [PATCH 255/640] Btrfs: fix race between allocate and release extent buffer. Increase extent buffer's reference count while holding the lock. Otherwise it can race with try_release_extent_buffer. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96577e8bf9fd..b177ed319612 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, spin_unlock(&tree->buffer_lock); goto free_eb; } - spin_unlock(&tree->buffer_lock); - /* add one reference for the tree */ atomic_inc(&eb->refs); + spin_unlock(&tree->buffer_lock); return eb; free_eb: From 014e4ac4f7d9c981750491fa40ea35efadc9ed49 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 29 Jan 2010 10:42:11 +0000 Subject: [PATCH 256/640] Btrfs: make error return negative in btrfs_sync_file() It appears the error return should be negative Signed-off-by: Roel Kluin Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ae96fdae1f7d..413a30dafcda 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1133,7 +1133,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_lock(&dentry->d_inode->i_mutex); out: - return ret > 0 ? EIO : ret; + return ret > 0 ? -EIO : ret; } static const struct vm_operations_struct btrfs_file_vm_ops = { From d7ce5843bb28ada6845ab2ae8510ba3f12d33154 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 2 Feb 2010 08:46:44 +0000 Subject: [PATCH 257/640] Btrfs: remove BUG_ON() due to mounting bad filesystem Mounting a bad filesystem caused a BUG_ON(). The following is steps to reproduce it. # mkfs.btrfs /dev/sda2 # mount /dev/sda2 /mnt # mkfs.btrfs /dev/sda1 /dev/sda2 (the program says that /dev/sda2 was mounted, and then exits. ) # umount /mnt # mount /dev/sda1 /mnt At the third step, mkfs.btrfs exited in the way of make filesystem. So the initialization of the filesystem didn't finish. So the filesystem was bad, and it caused BUG_ON() when mounting it. But BUG_ON() should be called by the wrong code, not user's operation, so I think it is a bug of btrfs. This patch fixes it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- fs/btrfs/relocation.c | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87b25543d7d1..2b59201b955c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1982,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_recover_relocation(tree_root); - BUG_ON(ret); + if (ret < 0) { + printk(KERN_WARNING + "btrfs: failed to recover relocation\n"); + err = -EINVAL; + goto fail_trans_kthread; + } } location.objectid = BTRFS_FS_TREE_OBJECTID; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ed3e4a2ec2c8..ab7ab5318745 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3764,7 +3764,8 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); - btrfs_orphan_cleanup(fs_root); + else + btrfs_orphan_cleanup(fs_root); } return err; } From 7a7965f83e89f0be506a96769938a721e4e5ae50 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 1 Feb 2010 02:41:17 +0000 Subject: [PATCH 258/640] Btrfs: Fix oopsen when dropping empty tree. When dropping a empty tree, walk_down_tree() skips checking extent information for the tree root. This will triggers a BUG_ON in walk_up_proc(). Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 432a2da4641e..559f72489b3b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5402,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, int ret; while (level >= 0) { - if (path->slots[level] >= - btrfs_header_nritems(path->nodes[level])) - break; - ret = walk_down_proc(trans, root, path, wc, lookup_info); if (ret > 0) break; @@ -5413,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (level == 0) break; + if (path->slots[level] >= + btrfs_header_nritems(path->nodes[level])) + break; + ret = do_walk_down(trans, root, path, wc, &lookup_info); if (ret > 0) { path->slots[level]++; From efd049fb26a162c3830fd3cb1001fdc09b147f3b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 2 Feb 2010 20:50:10 +0000 Subject: [PATCH 259/640] Btrfs: do not try and lookup the file extent when finishing ordered io When running the following fio job [torrent] filename=torrent-test rw=randwrite size=4g filesize=4g bs=4k ioengine=sync you would see long stalls where no work was being done. That is because we were doing all this extra work to read in the file extent outside of the transaction, however in the random io case this ends up hurting us because the file extents are not there to begin with. So axe this logic, since we end up reading in the file extent when we go to update it anyway. This took the fio job from 11 mb/s with several ~10 second stalls to 24 mb/s to a couple of 1-2 second stalls. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 46 ++-------------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8cd109972fa6..6782aa19130d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1681,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * before we start the transaction. It limits the amount of btree * reads required while inside the transaction. */ -static noinline void reada_csum(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_ordered_extent *ordered_extent) -{ - struct btrfs_ordered_sum *sum; - u64 bytenr; - - sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, - list); - bytenr = sum->sums[0].bytenr; - - /* - * we don't care about the results, the point of this search is - * just to get the btree leaves into ram - */ - btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); -} - /* as ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. @@ -1709,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct btrfs_path *path; int compressed = 0; int ret; @@ -1717,32 +1698,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) if (!ret) return 0; - /* - * before we join the transaction, try to do some of our IO. - * This will limit the amount of IO that we have to do with - * the transaction running. We're unlikely to need to do any - * IO if the file extents are new, the disk_i_size checks - * covers the most common case. - */ - if (start < BTRFS_I(inode)->disk_i_size) { - path = btrfs_alloc_path(); - if (path) { - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, - start, 0); - ordered_extent = btrfs_lookup_ordered_extent(inode, - start); - if (!list_empty(&ordered_extent->list)) { - btrfs_release_path(root, path); - reada_csum(root, path, ordered_extent); - } - btrfs_free_path(path); - } - } - - if (!ordered_extent) - ordered_extent = btrfs_lookup_ordered_extent(inode, start); + ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); From 23b5c50945f2294add0137799400329c0ebba290 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 4 Feb 2010 11:33:03 -0500 Subject: [PATCH 260/640] Btrfs: apply updated fallocate i_size fix This version of the i_size fix for fallocate makes sure we only update the i_size when the current fallocate is really operating outside of i_size. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6782aa19130d..4deb280f8969 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5799,7 +5799,9 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - cur_offset > inode->i_size) { + (actual_len > inode->i_size) && + (cur_offset > inode->i_size)) { + if (cur_offset > actual_len) i_size = actual_len; else From 33c5fd121eabbccc9103daf6cda36941eb3c349f Mon Sep 17 00:00:00 2001 From: David John Date: Wed, 27 Jan 2010 15:19:08 +0530 Subject: [PATCH 261/640] drm/i915: Disable SR when more than one pipe is enabled Self Refresh should be disabled on dual plane configs. Otherwise, as the SR watermark is not calculated for such configs, switching to non VGA mode causes FIFO underrun and display flicker. This fixes Korg Bug #14897. Signed-off-by: David John Signed-off-by: Jesse Barnes Cc: stable@kernel.org Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5f14dfbf715c..12775df1bbfd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2520,6 +2520,10 @@ static void g4x_update_wm(struct drm_device *dev, int planea_clock, sr_entries = roundup(sr_entries / cacheline_size, 1); DRM_DEBUG("self-refresh entries: %d\n", sr_entries); I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN); + } else { + /* Turn off self refresh if both pipes are enabled */ + I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF) + & ~FW_BLC_SELF_EN); } DRM_DEBUG("Setting FIFO watermarks - A: %d, B: %d, SR %d\n", @@ -2563,6 +2567,10 @@ static void i965_update_wm(struct drm_device *dev, int planea_clock, srwm = 1; srwm &= 0x3f; I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN); + } else { + /* Turn off self refresh if both pipes are enabled */ + I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF) + & ~FW_BLC_SELF_EN); } DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", @@ -2631,6 +2639,10 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, if (srwm < 0) srwm = 1; I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN | (srwm & 0x3f)); + } else { + /* Turn off self refresh if both pipes are enabled */ + I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF) + & ~FW_BLC_SELF_EN); } DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", From 67026e03244d76b8f1fa725b079d4182fe8910aa Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Tue, 2 Feb 2010 20:09:04 +0100 Subject: [PATCH 262/640] drm/i915: slow acpi_lid_open() causes flickering - V2 acpi_lid_open() could take up to 10ms on my computer. Some component is calling the drm GETCONNECTOR ioctl many times in a row. This results in flickering (for example, when starting a video). Fix it by assuming an always connected lid status. Signed-off-by: Thomas Meyer Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_lvds.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 75a9772061cb..b1d0acbae4e4 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -622,6 +622,13 @@ static const struct dmi_system_id bad_lid_status[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"), }, }, + { + .ident = "Aspire 1810T", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1810T"), + }, + }, { .ident = "PC-81005", .matches = { @@ -643,7 +650,7 @@ static enum drm_connector_status intel_lvds_detect(struct drm_connector *connect { enum drm_connector_status status = connector_status_connected; - if (!acpi_lid_open() && !dmi_check_system(bad_lid_status)) + if (!dmi_check_system(bad_lid_status) && !acpi_lid_open()) status = connector_status_disconnected; return status; From 93533c291a0af78ca57115fc44d2e6c4c9517cd2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 31 Jan 2010 10:40:48 +0000 Subject: [PATCH 263/640] drm/i915: Fix leak of relocs along do_execbuffer error path Following a gpu hang, we would leak the relocation buffer. So simply earrange the error path to always free the relocation buffer. Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1ef7ec4f38fe..be0fd1a63321 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3584,6 +3584,9 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, uint32_t reloc_count = 0, i; int ret = 0; + if (relocs == NULL) + return 0; + for (i = 0; i < buffer_count; i++) { struct drm_i915_gem_relocation_entry __user *user_relocs; int unwritten; @@ -3673,7 +3676,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_gem_object *batch_obj; struct drm_i915_gem_object *obj_priv; struct drm_clip_rect *cliprects = NULL; - struct drm_i915_gem_relocation_entry *relocs; + struct drm_i915_gem_relocation_entry *relocs = NULL; int ret = 0, ret2, i, pinned = 0; uint64_t exec_offset; uint32_t seqno, flush_domains, reloc_index; @@ -3950,6 +3953,7 @@ err: mutex_unlock(&dev->struct_mutex); +pre_mutex_err: /* Copy the updated relocations out regardless of current error * state. Failure to update the relocs would mean that the next * time userland calls execbuf, it would do so with presumed offset @@ -3964,7 +3968,6 @@ err: ret = ret2; } -pre_mutex_err: drm_free_large(object_list); kfree(cliprects); From feeb2721a7a0bd0cfa5b8847f80aec93aa2cc00d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 3 Feb 2010 21:59:51 +0000 Subject: [PATCH 264/640] igb: make certain to reassign legacy interrupt vectors after reset This change corrects an issue that will cause false hangs when using either 82575 or 82580 in legacy interrupt mode. The issue is caused when there is a slow traffic flow and an "ethtool -r" is executed while using legacy or MSI interrupts. MSI-X is not affected by this issue due to the fact that we were already reconfiguring the vectors after reset. If possible it would be best to push this for net-2.6 since it is resolving a bug but if that is not possible then net-next-2.6 will be fine. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/igb/igb_main.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 997124d2992a..c881347cb26d 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -421,6 +421,8 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; if (tx_queue > IGB_N0_QUEUE) msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; + if (!adapter->msix_entries && msix_vector == 0) + msixbm |= E1000_EIMS_OTHER; array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); q_vector->eims_value = msixbm; break; @@ -877,7 +879,6 @@ static int igb_request_irq(struct igb_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; int err = 0; if (adapter->msix_entries) { @@ -909,20 +910,7 @@ static int igb_request_irq(struct igb_adapter *adapter) igb_setup_all_tx_resources(adapter); igb_setup_all_rx_resources(adapter); } else { - switch (hw->mac.type) { - case e1000_82575: - wr32(E1000_MSIXBM(0), - (E1000_EICR_RX_QUEUE0 | - E1000_EICR_TX_QUEUE0 | - E1000_EIMS_OTHER)); - break; - case e1000_82580: - case e1000_82576: - wr32(E1000_IVAR0, E1000_IVAR_VALID); - break; - default: - break; - } + igb_assign_vector(adapter->q_vector[0], 0); } if (adapter->flags & IGB_FLAG_HAS_MSI) { @@ -1140,6 +1128,8 @@ int igb_up(struct igb_adapter *adapter) } if (adapter->msix_entries) igb_configure_msix(adapter); + else + igb_assign_vector(adapter->q_vector[0], 0); /* Clear any pending interrupts. */ rd32(E1000_ICR); From 21956b61f594f7924d98240da74bc81c28601fa9 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 2 Feb 2010 19:58:25 +0100 Subject: [PATCH 265/640] ALSA: ctxfi - fix PTP address initialization After hours of debugging, I finally found the reason why some source and runtime combination does not work. The PTP (page table pages) address must be aligned. I am not sure how much, but alignment to PAGE_SIZE is sufficient. Also, use ALSA's page allocation routines to ensure proper virtual -> physical address translation. Cc: Signed-off-by: Jaroslav Kysela --- sound/pci/ctxfi/ctatc.c | 15 ++------------- sound/pci/ctxfi/ctvmem.c | 38 ++++++++++++++++++-------------------- sound/pci/ctxfi/ctvmem.h | 8 +++++--- 3 files changed, 25 insertions(+), 36 deletions(-) diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index cb65bd0dd35b..459c1f62783b 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c @@ -166,18 +166,7 @@ static void ct_unmap_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm) static unsigned long atc_get_ptp_phys(struct ct_atc *atc, int index) { - struct ct_vm *vm; - void *kvirt_addr; - unsigned long phys_addr; - - vm = atc->vm; - kvirt_addr = vm->get_ptp_virt(vm, index); - if (kvirt_addr == NULL) - phys_addr = (~0UL); - else - phys_addr = virt_to_phys(kvirt_addr); - - return phys_addr; + return atc->vm->get_ptp_phys(atc->vm, index); } static unsigned int convert_format(snd_pcm_format_t snd_format) @@ -1669,7 +1658,7 @@ int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci, } /* Set up device virtual memory management object */ - err = ct_vm_create(&atc->vm); + err = ct_vm_create(&atc->vm, pci); if (err < 0) goto error1; diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c index 6b78752e9503..65da6e466f80 100644 --- a/sound/pci/ctxfi/ctvmem.c +++ b/sound/pci/ctxfi/ctvmem.c @@ -138,7 +138,7 @@ ct_vm_map(struct ct_vm *vm, struct snd_pcm_substream *substream, int size) return NULL; } - ptp = vm->ptp[0]; + ptp = (unsigned long *)vm->ptp[0].area; pte_start = (block->addr >> CT_PAGE_SHIFT); pages = block->size >> CT_PAGE_SHIFT; for (i = 0; i < pages; i++) { @@ -158,25 +158,25 @@ static void ct_vm_unmap(struct ct_vm *vm, struct ct_vm_block *block) } /* * - * return the host (kmalloced) addr of the @index-th device - * page talbe page on success, or NULL on failure. - * The first returned NULL indicates the termination. + * return the host physical addr of the @index-th device + * page table page on success, or ~0UL on failure. + * The first returned ~0UL indicates the termination. * */ -static void * -ct_get_ptp_virt(struct ct_vm *vm, int index) +static dma_addr_t +ct_get_ptp_phys(struct ct_vm *vm, int index) { - void *addr; + dma_addr_t addr; - addr = (index >= CT_PTP_NUM) ? NULL : vm->ptp[index]; + addr = (index >= CT_PTP_NUM) ? ~0UL : vm->ptp[index].addr; return addr; } -int ct_vm_create(struct ct_vm **rvm) +int ct_vm_create(struct ct_vm **rvm, struct pci_dev *pci) { struct ct_vm *vm; struct ct_vm_block *block; - int i; + int i, err = 0; *rvm = NULL; @@ -188,23 +188,21 @@ int ct_vm_create(struct ct_vm **rvm) /* Allocate page table pages */ for (i = 0; i < CT_PTP_NUM; i++) { - vm->ptp[i] = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vm->ptp[i]) + err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, + snd_dma_pci_data(pci), + PAGE_SIZE, &vm->ptp[i]); + if (err < 0) break; } - if (!i) { + if (err < 0) { /* no page table pages are allocated */ - kfree(vm); + ct_vm_destroy(vm); return -ENOMEM; } vm->size = CT_ADDRS_PER_PAGE * i; - /* Initialise remaining ptps */ - for (; i < CT_PTP_NUM; i++) - vm->ptp[i] = NULL; - vm->map = ct_vm_map; vm->unmap = ct_vm_unmap; - vm->get_ptp_virt = ct_get_ptp_virt; + vm->get_ptp_phys = ct_get_ptp_phys; INIT_LIST_HEAD(&vm->unused); INIT_LIST_HEAD(&vm->used); block = kzalloc(sizeof(*block), GFP_KERNEL); @@ -242,7 +240,7 @@ void ct_vm_destroy(struct ct_vm *vm) /* free allocated page table pages */ for (i = 0; i < CT_PTP_NUM; i++) - kfree(vm->ptp[i]); + snd_dma_free_pages(&vm->ptp[i]); vm->size = 0; diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h index 01e4fd0386a3..b23adfca4de6 100644 --- a/sound/pci/ctxfi/ctvmem.h +++ b/sound/pci/ctxfi/ctvmem.h @@ -22,6 +22,8 @@ #include #include +#include +#include /* The chip can handle the page table of 4k pages * (emu20k1 can handle even 8k pages, but we don't use it right now) @@ -41,7 +43,7 @@ struct snd_pcm_substream; /* Virtual memory management object for card device */ struct ct_vm { - void *ptp[CT_PTP_NUM]; /* Device page table pages */ + struct snd_dma_buffer ptp[CT_PTP_NUM]; /* Device page table pages */ unsigned int size; /* Available addr space in bytes */ struct list_head unused; /* List of unused blocks */ struct list_head used; /* List of used blocks */ @@ -52,10 +54,10 @@ struct ct_vm { int size); /* Unmap device logical addr area. */ void (*unmap)(struct ct_vm *, struct ct_vm_block *block); - void *(*get_ptp_virt)(struct ct_vm *vm, int index); + dma_addr_t (*get_ptp_phys)(struct ct_vm *vm, int index); }; -int ct_vm_create(struct ct_vm **rvm); +int ct_vm_create(struct ct_vm **rvm, struct pci_dev *pci); void ct_vm_destroy(struct ct_vm *vm); #endif /* CTVMEM_H */ From 1b3f720bf033fde1fbb6231f9b156b918c5f68d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Feb 2010 14:00:41 -0800 Subject: [PATCH 266/640] pktgen: Fix freezing problem Add missing try_to_freeze() to one of the pktgen_thread_worker() code paths so that it doesn't block suspend/hibernation. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=15006 Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: Ciprian Dorin Craciun Signed-off-by: David S. Miller --- net/core/pktgen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de0c2c726420..2e692afdc55d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3524,6 +3524,7 @@ static int pktgen_thread_worker(void *arg) wait_event_interruptible_timeout(t->queue, t->control != 0, HZ/10); + try_to_freeze(); continue; } From 6f14a668f1a8b715a6e855f4e32705e54a6e86a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 4 Feb 2010 17:57:37 +0900 Subject: [PATCH 267/640] idr: revert misallocation bug fix Commit 859ddf09743a8cc680af33f7259ccd0fd36bfe9d tried to fix misallocation bug but broke full bit marking by not clearing pa[idp->layers] and also is causing X failures due to lookup failure in drm code. The cause of the latter hasn't been found yet. Revert the fix for now. Signed-off-by: Tejun Heo Signed-off-by: Linus Torvalds --- lib/idr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index ba7d37cf7847..1cac726c44bc 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -140,7 +140,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) id = *starting_id; restart: p = idp->top; - l = p->layer; + l = idp->layers; + pa[l--] = NULL; while (1) { /* * We run around this while until we reach the leaf node... @@ -154,8 +155,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) oid = id; id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; - /* did id go over the limit? */ - if (id >= (1 << (idp->layers * IDR_BITS))) { + /* if already at the top layer, we need to grow */ + if (!(p = pa[l])) { *starting_id = id; return IDR_NEED_TO_GROW; } From c38c7b64a2747a211c3a6e8e5919ee25ccd474e7 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 4 Feb 2010 17:27:27 +0100 Subject: [PATCH 268/640] drm/radeon/kms: move blit initialization after we disabled VGA VGA might be overwritting VRAM and corrupt our blit shader leading to corruption, it likely won't happen if you load fbcon right after radeon. Thanks to Shawn Starr and Andre Maasikas for tracking down this issue. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600.c | 12 ++++++------ drivers/gpu/drm/radeon/rv770.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index b833b4b97162..8c94040cb7f2 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1861,6 +1861,12 @@ int r600_startup(struct radeon_device *rdev) return r; } r600_gpu_init(rdev); + r = r600_blit_init(rdev); + if (r) { + r600_blit_fini(rdev); + rdev->asic->copy = NULL; + dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); + } /* pin copy shader into vram */ if (rdev->r600_blit.shader_obj) { r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); @@ -2045,12 +2051,6 @@ int r600_init(struct radeon_device *rdev) r = r600_pcie_gart_init(rdev); if (r) return r; - r = r600_blit_init(rdev); - if (r) { - r600_blit_fini(rdev); - rdev->asic->copy = NULL; - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); - } rdev->accel_working = true; r = r600_startup(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 2d465768ac71..06b037eebef5 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -887,6 +887,12 @@ static int rv770_startup(struct radeon_device *rdev) return r; } rv770_gpu_init(rdev); + r = r600_blit_init(rdev); + if (r) { + r600_blit_fini(rdev); + rdev->asic->copy = NULL; + dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); + } /* pin copy shader into vram */ if (rdev->r600_blit.shader_obj) { r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); @@ -1055,12 +1061,6 @@ int rv770_init(struct radeon_device *rdev) r = r600_pcie_gart_init(rdev); if (r) return r; - r = r600_blit_init(rdev); - if (r) { - r600_blit_fini(rdev); - rdev->asic->copy = NULL; - dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); - } rdev->accel_working = true; r = rv770_startup(rdev); From 062b389c8704e539e234cfd67c7e034a514f50bf Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 4 Feb 2010 20:36:39 +0100 Subject: [PATCH 269/640] drm/radeon/kms: fix regression rendering issue on R6XX/R7XX It seems that some R6XX/R7XX silently ignore HDP flush when programmed through ring, this patch addback an ioctl callback to allow R6XX/R7XX hw to perform such flush through MMIO in order to fix a regression. For more details see: http://bugzilla.kernel.org/show_bug.cgi?id=15186 Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600.c | 15 +++++++++++++++ drivers/gpu/drm/radeon/radeon.h | 7 +++++++ drivers/gpu/drm/radeon/radeon_asic.h | 11 +++++++++++ drivers/gpu/drm/radeon/radeon_gem.c | 3 +++ 4 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 8c94040cb7f2..45d565bae71b 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2900,3 +2900,18 @@ int r600_debugfs_mc_info_init(struct radeon_device *rdev) return 0; #endif } + +/** + * r600_ioctl_wait_idle - flush host path cache on wait idle ioctl + * rdev: radeon device structure + * bo: buffer object struct which userspace is waiting for idle + * + * Some R6XX/R7XX doesn't seems to take into account HDP flush performed + * through ring buffer, this leads to corruption in rendering, see + * http://bugzilla.kernel.org/show_bug.cgi?id=15186 to avoid this we + * directly perform HDP flush by writing register through MMIO. + */ +void r600_ioctl_wait_idle(struct radeon_device *rdev, struct radeon_bo *bo) +{ + WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); +} diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2d5f2bfa7201..37150fc406b5 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -661,6 +661,13 @@ struct radeon_asic { void (*hpd_fini)(struct radeon_device *rdev); bool (*hpd_sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd); void (*hpd_set_polarity)(struct radeon_device *rdev, enum radeon_hpd_id hpd); + /* ioctl hw specific callback. Some hw might want to perform special + * operation on specific ioctl. For instance on wait idle some hw + * might want to perform and HDP flush through MMIO as it seems that + * some R6XX/R7XX hw doesn't take HDP flush into account if programmed + * through ring. + */ + void (*ioctl_wait_idle)(struct radeon_device *rdev, struct radeon_bo *bo); }; /* diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index f2fbd2e4e9df..05ee1aeac3fd 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -117,6 +117,7 @@ static struct radeon_asic r100_asic = { .hpd_fini = &r100_hpd_fini, .hpd_sense = &r100_hpd_sense, .hpd_set_polarity = &r100_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; @@ -176,6 +177,7 @@ static struct radeon_asic r300_asic = { .hpd_fini = &r100_hpd_fini, .hpd_sense = &r100_hpd_sense, .hpd_set_polarity = &r100_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; /* @@ -219,6 +221,7 @@ static struct radeon_asic r420_asic = { .hpd_fini = &r100_hpd_fini, .hpd_sense = &r100_hpd_sense, .hpd_set_polarity = &r100_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; @@ -267,6 +270,7 @@ static struct radeon_asic rs400_asic = { .hpd_fini = &r100_hpd_fini, .hpd_sense = &r100_hpd_sense, .hpd_set_polarity = &r100_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; @@ -323,6 +327,7 @@ static struct radeon_asic rs600_asic = { .hpd_fini = &rs600_hpd_fini, .hpd_sense = &rs600_hpd_sense, .hpd_set_polarity = &rs600_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; @@ -370,6 +375,7 @@ static struct radeon_asic rs690_asic = { .hpd_fini = &rs600_hpd_fini, .hpd_sense = &rs600_hpd_sense, .hpd_set_polarity = &rs600_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; @@ -421,6 +427,7 @@ static struct radeon_asic rv515_asic = { .hpd_fini = &rs600_hpd_fini, .hpd_sense = &rs600_hpd_sense, .hpd_set_polarity = &rs600_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; @@ -463,6 +470,7 @@ static struct radeon_asic r520_asic = { .hpd_fini = &rs600_hpd_fini, .hpd_sense = &rs600_hpd_sense, .hpd_set_polarity = &rs600_hpd_set_polarity, + .ioctl_wait_idle = NULL, }; /* @@ -504,6 +512,7 @@ void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); void r600_hpd_set_polarity(struct radeon_device *rdev, enum radeon_hpd_id hpd); +extern void r600_ioctl_wait_idle(struct radeon_device *rdev, struct radeon_bo *bo); static struct radeon_asic r600_asic = { .init = &r600_init, @@ -538,6 +547,7 @@ static struct radeon_asic r600_asic = { .hpd_fini = &r600_hpd_fini, .hpd_sense = &r600_hpd_sense, .hpd_set_polarity = &r600_hpd_set_polarity, + .ioctl_wait_idle = r600_ioctl_wait_idle, }; /* @@ -582,6 +592,7 @@ static struct radeon_asic rv770_asic = { .hpd_fini = &r600_hpd_fini, .hpd_sense = &r600_hpd_sense, .hpd_set_polarity = &r600_hpd_set_polarity, + .ioctl_wait_idle = r600_ioctl_wait_idle, }; #endif diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 0e1325e18534..db8e9a355a01 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -308,6 +308,9 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } robj = gobj->driver_private; r = radeon_bo_wait(robj, NULL, false); + /* callback hw specific functions if any */ + if (robj->rdev->asic->ioctl_wait_idle) + robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj); mutex_lock(&dev->struct_mutex); drm_gem_object_unreference(gobj); mutex_unlock(&dev->struct_mutex); From 9e5b2af75abc67c13005c706cf95bbbb78f7fddc Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Thu, 4 Feb 2010 19:20:53 +0200 Subject: [PATCH 270/640] drm/r100/kms: Emit cache flush to the end of command buffer. (v2) Cache flush is required in case CPU is accessing rendered data. This fixes glean/readPixSanity test case and random rendering errors in sauerbraten and warzone2100. v2 Fix comment ordering in r100_fence_ring_emit and remove extra defines added in first version. Signed-off-by: Pauli Nieminen Reviewed-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r100.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 11c9a3fe6810..626e79023e30 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -354,11 +354,17 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) return RREG32(RADEON_CRTC2_CRNT_FRAME); } +/* Who ever call radeon_fence_emit should call ring_lock and ask + * for enough space (today caller are ib schedule and buffer move) */ void r100_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { - /* Who ever call radeon_fence_emit should call ring_lock and ask - * for enough space (today caller are ib schedule and buffer move) */ + /* We have to make sure that caches are flushed before + * CPU might read something from VRAM. */ + radeon_ring_write(rdev, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); + radeon_ring_write(rdev, RADEON_RB3D_DC_FLUSH_ALL); + radeon_ring_write(rdev, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); + radeon_ring_write(rdev, RADEON_RB3D_ZC_FLUSH_ALL); /* Wait until IDLE & CLEAN */ radeon_ring_write(rdev, PACKET0(0x1720, 0)); radeon_ring_write(rdev, (1 << 16) | (1 << 17)); From d7748bacbbee80b2cc4b690a74d5db2cd84acd7b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 2 Feb 2010 14:40:33 -0800 Subject: [PATCH 271/640] ati_pcigart: fix printk format warning Fix ati_pcigart printk format warning: drivers/gpu/drm/ati_pcigart.c:115: warning: format '%Lx' expects type 'long long unsigned int', but argument 3 has type 'dma_addr_t' Signed-off-by: Randy Dunlap Cc: Zhenyu Wang Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- drivers/gpu/drm/ati_pcigart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c index a1fce68e3bbe..17be051b7aa3 100644 --- a/drivers/gpu/drm/ati_pcigart.c +++ b/drivers/gpu/drm/ati_pcigart.c @@ -113,7 +113,7 @@ int drm_ati_pcigart_init(struct drm_device *dev, struct drm_ati_pcigart_info *ga if (pci_set_dma_mask(dev->pdev, gart_info->table_mask)) { DRM_ERROR("fail to set dma mask to 0x%Lx\n", - gart_info->table_mask); + (unsigned long long)gart_info->table_mask); ret = 1; goto done; } From 94cf6434a1bc5958d5da3be62f1272792dada2bf Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 2 Feb 2010 14:40:29 -0800 Subject: [PATCH 272/640] drivers/gpu/drm/radeon/radeon_combios.c: fix warning drivers/gpu/drm/radeon/radeon_combios.c: In function 'radeon_combios_get_lvds_info': drivers/gpu/drm/radeon/radeon_combios.c:893: warning: comparison is always false due to limited range of data type Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_combios.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 579c8920e081..e7b19440102e 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -971,8 +971,7 @@ struct radeon_encoder_lvds *radeon_combios_get_lvds_info(struct radeon_encoder lvds->native_mode.vdisplay); lvds->panel_vcc_delay = RBIOS16(lcd_info + 0x2c); - if (lvds->panel_vcc_delay > 2000 || lvds->panel_vcc_delay < 0) - lvds->panel_vcc_delay = 2000; + lvds->panel_vcc_delay = min_t(u16, lvds->panel_vcc_delay, 2000); lvds->panel_pwr_delay = RBIOS8(lcd_info + 0x24); lvds->panel_digon_delay = RBIOS16(lcd_info + 0x38) & 0xf; From 655efd3dc92cd0d37292157178d33deb0430aeaa Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 2 Feb 2010 11:51:45 +0100 Subject: [PATCH 273/640] drm/radeon/kms: don't call suspend path before cleaning up GPU In suspend path we unmap the GART table while in cleaning up path we will unbind buffer and thus try to write to unmapped GART leading to oops. In order to avoid this we don't call the suspend path in cleanup path. Cleanup path is clever enough to desactive GPU like the suspend path is doing, thus this was redondant. Tested on: RV370, R420, RV515, RV570, RV610, RV770 (all PCIE) Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r100.c | 4 +--- drivers/gpu/drm/radeon/r300.c | 5 ++--- drivers/gpu/drm/radeon/r420.c | 3 +-- drivers/gpu/drm/radeon/r520.c | 3 +-- drivers/gpu/drm/radeon/r600.c | 21 +++++++++++++-------- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/rs400.c | 2 -- drivers/gpu/drm/radeon/rs600.c | 2 -- drivers/gpu/drm/radeon/rs690.c | 2 -- drivers/gpu/drm/radeon/rv515.c | 4 +--- drivers/gpu/drm/radeon/rv770.c | 12 ++++++------ 11 files changed, 26 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 626e79023e30..c0d4650cdb79 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3375,7 +3375,6 @@ int r100_suspend(struct radeon_device *rdev) void r100_fini(struct radeon_device *rdev) { - r100_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); @@ -3487,13 +3486,12 @@ int r100_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - r100_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); + radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCI) r100_pci_gart_fini(rdev); - radeon_irq_kms_fini(rdev); rdev->accel_working = false; } return 0; diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 0051d11b907c..e699e6d053dd 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1327,7 +1327,6 @@ int r300_suspend(struct radeon_device *rdev) void r300_fini(struct radeon_device *rdev) { - r300_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); @@ -1418,15 +1417,15 @@ int r300_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - r300_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); + radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_fini(rdev); if (rdev->flags & RADEON_IS_PCI) r100_pci_gart_fini(rdev); - radeon_irq_kms_fini(rdev); + radeon_agp_fini(rdev); rdev->accel_working = false; } return 0; diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 4526faaacca8..d9373246c97f 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -389,16 +389,15 @@ int r420_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - r420_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); + radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_fini(rdev); if (rdev->flags & RADEON_IS_PCI) r100_pci_gart_fini(rdev); radeon_agp_fini(rdev); - radeon_irq_kms_fini(rdev); rdev->accel_working = false; } return 0; diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 9a189072f2b9..ddf5731eba0d 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -294,13 +294,12 @@ int r520_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - rv515_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); + radeon_irq_kms_fini(rdev); rv370_pcie_gart_fini(rdev); radeon_agp_fini(rdev); - radeon_irq_kms_fini(rdev); rdev->accel_working = false; } return 0; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 45d565bae71b..9661a469f3bd 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1654,6 +1654,12 @@ void r600_ring_init(struct radeon_device *rdev, unsigned ring_size) rdev->cp.align_mask = 16 - 1; } +void r600_cp_fini(struct radeon_device *rdev) +{ + r600_cp_stop(rdev); + radeon_ring_fini(rdev); +} + /* * GPU scratch registers helpers function. @@ -2055,9 +2061,11 @@ int r600_init(struct radeon_device *rdev) rdev->accel_working = true; r = r600_startup(rdev); if (r) { - r600_suspend(rdev); + dev_err(rdev->dev, "disabling GPU acceleration\n"); + r600_cp_fini(rdev); r600_wb_fini(rdev); - radeon_ring_fini(rdev); + r600_irq_fini(rdev); + radeon_irq_kms_fini(rdev); r600_pcie_gart_fini(rdev); rdev->accel_working = false; } @@ -2083,20 +2091,17 @@ int r600_init(struct radeon_device *rdev) void r600_fini(struct radeon_device *rdev) { - /* Suspend operations */ - r600_suspend(rdev); - r600_audio_fini(rdev); r600_blit_fini(rdev); + r600_cp_fini(rdev); + r600_wb_fini(rdev); r600_irq_fini(rdev); radeon_irq_kms_fini(rdev); - radeon_ring_fini(rdev); - r600_wb_fini(rdev); r600_pcie_gart_fini(rdev); + radeon_agp_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); radeon_clocks_fini(rdev); - radeon_agp_fini(rdev); radeon_bo_fini(rdev); radeon_atombios_fini(rdev); kfree(rdev->bios); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 37150fc406b5..f57480ba1355 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1150,6 +1150,7 @@ extern bool r600_card_posted(struct radeon_device *rdev); extern void r600_cp_stop(struct radeon_device *rdev); extern void r600_ring_init(struct radeon_device *rdev, unsigned ring_size); extern int r600_cp_resume(struct radeon_device *rdev); +extern void r600_cp_fini(struct radeon_device *rdev); extern int r600_count_pipe_bits(uint32_t val); extern int r600_gart_clear_page(struct radeon_device *rdev, int i); extern int r600_mc_wait_for_idle(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index 9f5418983e2a..eeeb0d6d7a44 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -448,7 +448,6 @@ int rs400_suspend(struct radeon_device *rdev) void rs400_fini(struct radeon_device *rdev) { - rs400_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); @@ -527,7 +526,6 @@ int rs400_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - rs400_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index d5255751e7b3..c3818562a13e 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -610,7 +610,6 @@ int rs600_suspend(struct radeon_device *rdev) void rs600_fini(struct radeon_device *rdev) { - rs600_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); @@ -689,7 +688,6 @@ int rs600_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - rs600_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index cd31da913771..06e2771aee5a 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -676,7 +676,6 @@ int rs690_suspend(struct radeon_device *rdev) void rs690_fini(struct radeon_device *rdev) { - rs690_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); @@ -756,7 +755,6 @@ int rs690_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - rs690_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 62756717b044..0e1e6b8632b8 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -537,7 +537,6 @@ void rv515_set_safe_registers(struct radeon_device *rdev) void rv515_fini(struct radeon_device *rdev) { - rv515_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); @@ -615,13 +614,12 @@ int rv515_init(struct radeon_device *rdev) if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); - rv515_suspend(rdev); r100_cp_fini(rdev); r100_wb_fini(rdev); r100_ib_fini(rdev); + radeon_irq_kms_fini(rdev); rv370_pcie_gart_fini(rdev); radeon_agp_fini(rdev); - radeon_irq_kms_fini(rdev); rdev->accel_working = false; } return 0; diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 06b037eebef5..c9320e727986 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1065,9 +1065,11 @@ int rv770_init(struct radeon_device *rdev) rdev->accel_working = true; r = rv770_startup(rdev); if (r) { - rv770_suspend(rdev); + dev_err(rdev->dev, "disabling GPU acceleration\n"); + r600_cp_fini(rdev); r600_wb_fini(rdev); - radeon_ring_fini(rdev); + r600_irq_fini(rdev); + radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); rdev->accel_working = false; } @@ -1089,13 +1091,11 @@ int rv770_init(struct radeon_device *rdev) void rv770_fini(struct radeon_device *rdev) { - rv770_suspend(rdev); - r600_blit_fini(rdev); + r600_cp_fini(rdev); + r600_wb_fini(rdev); r600_irq_fini(rdev); radeon_irq_kms_fini(rdev); - radeon_ring_fini(rdev); - r600_wb_fini(rdev); rv770_pcie_gart_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); From 23fff28a9b0529869bffef8aab0d3f350dd3b5a4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Feb 2010 11:57:42 +1000 Subject: [PATCH 274/640] drm/radeon/kms: disable HDMI audio for now on rv710/rv730 Support isn't correct yet and we are getting green tinges on the displays. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index 99e2c3891a7d..b1c1d3433454 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -35,7 +35,7 @@ */ static int r600_audio_chipset_supported(struct radeon_device *rdev) { - return rdev->family >= CHIP_R600 + return (rdev->family >= CHIP_R600 && rdev->family < CHIP_RV710) || rdev->family == CHIP_RS600 || rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740; From 624ab4f87e99f10ea3b45e76039c477fd4d7a7e6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 27 Jan 2010 16:07:15 +1000 Subject: [PATCH 275/640] drm/radeon/kms: make initial state of load detect property correct. this was incorrect on my rs480. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 55266416fa47..2d8e5a70f284 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1343,7 +1343,7 @@ radeon_add_legacy_connector(struct drm_device *dev, radeon_connector->dac_load_detect = false; drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.load_detect_property, - 1); + radeon_connector->dac_load_detect); drm_connector_attach_property(&radeon_connector->base, rdev->mode_info.tv_std_property, radeon_combios_get_tv_info(rdev)); From 2717568e7c44fe7dc3f4f52ea823811cfeede2b5 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 4 Feb 2010 06:57:58 +0000 Subject: [PATCH 276/640] usb: r8a66597-hcd: Flush the D-cache for the pipe-in transfer buffers. This implements the same D-cache flushing logic for r8a66597-hcd as Catalin's isp1760 (http://patchwork.kernel.org/patch/76391/) change, with the same note applying here as well: When the HDC driver writes the data to the transfer buffers it pollutes the D-cache (unlike DMA drivers where the device writes the data). If the corresponding pages get mapped into user space, there are no additional cache flushing operations performed and this causes random user space faults on architectures with separate I and D caches (Harvard) or those with aliasing D-cache. This fixes up crashes during USB boot on SH7724 and others: http://marc.info/?l=linux-sh&m=126439837308912&w=2 Reported-by: Goda Yusuke Tested-by: Goda Yusuke Cc: stable@kernel.org Signed-off-by: Paul Mundt Acked-by: Yoshihiro Shimoda --- drivers/usb/host/r8a66597-hcd.c | 37 ++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 0ceec123ddfd..50a3e2d6a6cc 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -35,7 +35,9 @@ #include #include #include +#include #include +#include #include "../core/hcd.h" #include "r8a66597.h" @@ -820,6 +822,26 @@ static void enable_r8a66597_pipe(struct r8a66597 *r8a66597, struct urb *urb, enable_r8a66597_pipe_dma(r8a66597, dev, pipe, urb); } +static void r8a66597_urb_done(struct r8a66597 *r8a66597, struct urb *urb, + int status) +__releases(r8a66597->lock) +__acquires(r8a66597->lock) +{ + if (usb_pipein(urb->pipe) && usb_pipetype(urb->pipe) != PIPE_CONTROL) { + void *ptr; + + for (ptr = urb->transfer_buffer; + ptr < urb->transfer_buffer + urb->transfer_buffer_length; + ptr += PAGE_SIZE) + flush_dcache_page(virt_to_page(ptr)); + } + + usb_hcd_unlink_urb_from_ep(r8a66597_to_hcd(r8a66597), urb); + spin_unlock(&r8a66597->lock); + usb_hcd_giveback_urb(r8a66597_to_hcd(r8a66597), urb, status); + spin_lock(&r8a66597->lock); +} + /* this function must be called with interrupt disabled */ static void force_dequeue(struct r8a66597 *r8a66597, u16 pipenum, u16 address) { @@ -838,15 +860,9 @@ static void force_dequeue(struct r8a66597 *r8a66597, u16 pipenum, u16 address) list_del(&td->queue); kfree(td); - if (urb) { - usb_hcd_unlink_urb_from_ep(r8a66597_to_hcd(r8a66597), - urb); + if (urb) + r8a66597_urb_done(r8a66597, urb, -ENODEV); - spin_unlock(&r8a66597->lock); - usb_hcd_giveback_urb(r8a66597_to_hcd(r8a66597), urb, - -ENODEV); - spin_lock(&r8a66597->lock); - } break; } } @@ -1283,10 +1299,7 @@ __releases(r8a66597->lock) __acquires(r8a66597->lock) if (usb_pipeisoc(urb->pipe)) urb->start_frame = r8a66597_get_frame(hcd); - usb_hcd_unlink_urb_from_ep(r8a66597_to_hcd(r8a66597), urb); - spin_unlock(&r8a66597->lock); - usb_hcd_giveback_urb(hcd, urb, status); - spin_lock(&r8a66597->lock); + r8a66597_urb_done(r8a66597, urb, status); } if (restart) { From 2c940db250c1610d95ea5331dc819b8bd4db96ae Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 4 Feb 2010 06:58:28 +0000 Subject: [PATCH 277/640] usb: r8a66597-hcd: Fix up spinlock recursion in root hub polling. The current root hub polling code exhibits a spinlock recursion on the private controller lock. r8a66597_root_hub_control() is called from r8a66597_timer() which grabs the lock and disables IRQs. The following chain emerges: r8a66597_timer() <-- lock taken r8a66597_root_hub_control() r8a66597_check_syssts() usb_hcd_poll_rh_status() <-- acquires the same lock /* insert death here */ The entire chain requires IRQs to be disabled, so we just unlock and relock around the call to usb_hcd_poll_rh_status() while leaving the IRQ state unchanged. Signed-off-by: Paul Mundt Acked-by: Yoshihiro Shimoda --- drivers/usb/host/r8a66597-hcd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 50a3e2d6a6cc..bee558aed427 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1022,6 +1022,8 @@ static void start_root_hub_sampling(struct r8a66597 *r8a66597, int port, /* this function must be called with interrupt disabled */ static void r8a66597_check_syssts(struct r8a66597 *r8a66597, int port, u16 syssts) +__releases(r8a66597->lock) +__acquires(r8a66597->lock) { if (syssts == SE0) { r8a66597_write(r8a66597, ~ATTCH, get_intsts_reg(port)); @@ -1039,7 +1041,9 @@ static void r8a66597_check_syssts(struct r8a66597 *r8a66597, int port, usb_hcd_resume_root_hub(r8a66597_to_hcd(r8a66597)); } + spin_unlock(&r8a66597->lock); usb_hcd_poll_rh_status(r8a66597_to_hcd(r8a66597)); + spin_lock(&r8a66597->lock); } /* this function must be called with interrupt disabled */ From a17538f93c16f0e15e35dc31eedad87e2d9c5c26 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Feb 2010 13:41:54 +1000 Subject: [PATCH 278/640] drm/radeon/kms: rs400/480 MC setup is different than r300. Boot testing on my rs480 laptop found the MC idle never happened on startup, a quick check with AMD found the idle bit is in a different place on the rs4xx than r300. Implement a new rs400 mc idle function to fix this. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/rs400.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index eeeb0d6d7a44..287fcebfb4e6 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -223,15 +223,31 @@ int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) return 0; } +int rs400_mc_wait_for_idle(struct radeon_device *rdev) +{ + unsigned i; + uint32_t tmp; + + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(0x0150); + if (tmp & (1 << 2)) { + return 0; + } + DRM_UDELAY(1); + } + return -1; +} + void rs400_gpu_init(struct radeon_device *rdev) { /* FIXME: HDP same place on rs400 ? */ r100_hdp_reset(rdev); /* FIXME: is this correct ? */ r420_pipes_init(rdev); - if (r300_mc_wait_for_idle(rdev)) { - printk(KERN_WARNING "Failed to wait MC idle while " - "programming pipes. Bad things might happen.\n"); + if (rs400_mc_wait_for_idle(rdev)) { + printk(KERN_WARNING "rs400: Failed to wait MC idle while " + "programming pipes. Bad things might happen. %08x\n", RREG32(0x150)); } } @@ -370,8 +386,8 @@ void rs400_mc_program(struct radeon_device *rdev) r100_mc_stop(rdev, &save); /* Wait for mc idle */ - if (r300_mc_wait_for_idle(rdev)) - dev_warn(rdev->dev, "Wait MC idle timeout before updating MC.\n"); + if (rs400_mc_wait_for_idle(rdev)) + dev_warn(rdev->dev, "rs400: Wait MC idle timeout before updating MC.\n"); WREG32(R_000148_MC_FB_LOCATION, S_000148_MC_FB_START(rdev->mc.vram_start >> 16) | S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16)); From 5ff55717674470b96562f931f778c878080755b7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Feb 2010 13:57:03 +1000 Subject: [PATCH 279/640] drm/radeon/kms: fix r300 vram width calculations This was incorrect according to the docs and the UMS driver does it like this. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index e699e6d053dd..43b55a030b4d 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -506,11 +506,14 @@ void r300_vram_info(struct radeon_device *rdev) /* DDR for all card after R300 & IGP */ rdev->mc.vram_is_ddr = true; + tmp = RREG32(RADEON_MEM_CNTL); - if (tmp & R300_MEM_NUM_CHANNELS_MASK) { - rdev->mc.vram_width = 128; - } else { - rdev->mc.vram_width = 64; + tmp &= R300_MEM_NUM_CHANNELS_MASK; + switch (tmp) { + case 0: rdev->mc.vram_width = 64; break; + case 1: rdev->mc.vram_width = 128; break; + case 2: rdev->mc.vram_width = 256; break; + default: rdev->mc.vram_width = 128; break; } r100_vram_init_sizes(rdev); From 38fd2c6ff526e6a59edfa8e08f6f0724646784c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 28 Jan 2010 18:16:30 +0100 Subject: [PATCH 280/640] drm/radeon/kms: suspend and resume audio stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes FDO bug #26214 Signed-off-by: Rafał Miłecki Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600.c | 8 ++++++++ drivers/gpu/drm/radeon/r600_audio.c | 3 +-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 9661a469f3bd..29faebc105df 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1950,6 +1950,13 @@ int r600_resume(struct radeon_device *rdev) DRM_ERROR("radeon: failled testing IB (%d).\n", r); return r; } + + r = r600_audio_init(rdev); + if (r) { + DRM_ERROR("radeon: audio resume failed\n"); + return r; + } + return r; } @@ -1957,6 +1964,7 @@ int r600_suspend(struct radeon_device *rdev) { int r; + r600_audio_fini(rdev); /* FIXME: we should wait for ring to be empty */ r600_cp_stop(rdev); rdev->cp.ready = false; diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index b1c1d3433454..0dcb6904c4ff 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -261,7 +261,6 @@ void r600_audio_fini(struct radeon_device *rdev) if (!r600_audio_chipset_supported(rdev)) return; - WREG32_P(R600_AUDIO_ENABLE, 0x0, ~0x81000000); - del_timer(&rdev->audio_timer); + WREG32_P(R600_AUDIO_ENABLE, 0x0, ~0x81000000); } From 5ecaafdbf44b1ba400b746c60c401d54c7ee0863 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 5 Feb 2010 01:24:34 -0500 Subject: [PATCH 281/640] kprobes: Add mcount to the kprobes blacklist Since mcount function can be called from everywhere, it should be blacklisted. Moreover, the "mcount" symbol is a special symbol name. So, it is better to put it in the generic blacklist. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Ananth N Mavinakayanahalli Cc: Steven Rostedt LKML-Reference: <20100205062433.3745.36726.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- kernel/kprobes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c3340e836c37..ccec774c716d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -94,6 +94,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = { {"native_get_debugreg",}, {"irq_entries_start",}, {"common_interrupt",}, + {"mcount",}, /* mcount can be called from everywhere */ {NULL} /* Terminator */ }; From 1eb6dc7dabcb4aa762d96f4f6978f3ef86321d68 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 4 Feb 2010 22:21:47 +0200 Subject: [PATCH 282/640] ALSA: hda - Delay switching to polling mode if an interrupt was missing My sound codec seems sometimes (very rarely) to omit interrupts (ALC268) However, interrupt mode still works. Thus if we get timeout, poll the codec once. If we get 3 such polls in a row, then switch to polling mode. This patch is maybe an bandaid, but this might be a workaround for hardware bug. Signed-off-by: Maxim Levitsky Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 565de38a3fc7..d853e2c33bb7 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -426,6 +426,7 @@ struct azx { /* flags */ int position_fix; + int poll_count; unsigned int running :1; unsigned int initialized :1; unsigned int single_cmd :1; @@ -506,7 +507,7 @@ static char *driver_short_names[] __devinitdata = { #define get_azx_dev(substream) (substream->runtime->private_data) static int azx_acquire_irq(struct azx *chip, int do_disconnect); - +static int azx_send_cmd(struct hda_bus *bus, unsigned int val); /* * Interface for HD codec */ @@ -664,11 +665,12 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, { struct azx *chip = bus->private_data; unsigned long timeout; + int do_poll = 0; again: timeout = jiffies + msecs_to_jiffies(1000); for (;;) { - if (chip->polling_mode) { + if (chip->polling_mode || do_poll) { spin_lock_irq(&chip->reg_lock); azx_update_rirb(chip); spin_unlock_irq(&chip->reg_lock); @@ -676,6 +678,9 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, if (!chip->rirb.cmds[addr]) { smp_rmb(); bus->rirb_error = 0; + + if (!do_poll) + chip->poll_count = 0; return chip->rirb.res[addr]; /* the last value */ } if (time_after(jiffies, timeout)) @@ -688,6 +693,16 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, } } + if (!chip->polling_mode && chip->poll_count < 2) { + snd_printdd(SFX "azx_get_response timeout, " + "polling the codec once: last cmd=0x%08x\n", + chip->last_cmd[addr]); + do_poll = 1; + chip->poll_count++; + goto again; + } + + if (!chip->polling_mode) { snd_printk(KERN_WARNING SFX "azx_get_response timeout, " "switching to polling mode: last cmd=0x%08x\n", From 9492837a6f54b069e13e40e3c89898bb8837a386 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 4 Feb 2010 22:26:37 +0200 Subject: [PATCH 283/640] ALSA: cosmetic: make hda intel interrupt name consistent with others This renames the interrupt name in /proc/interrupt. HDA Intel -> hda_intel This also eliminates space from the name, probably helping some parsers. Don't think anybody depends on this name in userspace Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d853e2c33bb7..b8faa6dc5abe 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2058,7 +2058,7 @@ static int azx_acquire_irq(struct azx *chip, int do_disconnect) { if (request_irq(chip->pci->irq, azx_interrupt, chip->msi ? 0 : IRQF_SHARED, - "HDA Intel", chip)) { + "hda_intel", chip)) { printk(KERN_ERR "hda-intel: unable to grab IRQ %d, " "disabling device\n", chip->pci->irq); if (do_disconnect) From 9d4c7464458770d309169f7a7ce1ea6f8a4a7de5 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 5 Feb 2010 10:19:41 +0100 Subject: [PATCH 284/640] ALSA: ice1724 - aureon - fix wm8770 volume offset The volume register is from 0..0x7f and 0..0x1a range is mute. Also, fix mute combining in wm_vol_put(). The wrong behaviour was noticed by Peter Christensen. Signed-off-by: Jaroslav Kysela --- sound/pci/ice1712/aureon.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c index 765d7bd4c3d4..9e66f6d306f8 100644 --- a/sound/pci/ice1712/aureon.c +++ b/sound/pci/ice1712/aureon.c @@ -703,11 +703,13 @@ static void wm_set_vol(struct snd_ice1712 *ice, unsigned int index, unsigned sho { unsigned char nvol; - if ((master & WM_VOL_MUTE) || (vol & WM_VOL_MUTE)) + if ((master & WM_VOL_MUTE) || (vol & WM_VOL_MUTE)) { nvol = 0; - else + } else { nvol = ((vol % WM_VOL_CNT) * (master % WM_VOL_CNT)) / WM_VOL_MAX; + nvol += 0x1b; + } wm_put(ice, index, nvol); wm_put_nocache(ice, index, 0x180 | nvol); @@ -778,7 +780,7 @@ static int wm_master_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_ for (ch = 0; ch < 2; ch++) { unsigned int vol = ucontrol->value.integer.value[ch]; if (vol > WM_VOL_MAX) - continue; + vol = WM_VOL_MAX; vol |= spec->master[ch] & WM_VOL_MUTE; if (vol != spec->master[ch]) { int dac; @@ -834,8 +836,8 @@ static int wm_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value * for (i = 0; i < voices; i++) { unsigned int vol = ucontrol->value.integer.value[i]; if (vol > WM_VOL_MAX) - continue; - vol |= spec->vol[ofs+i]; + vol = WM_VOL_MAX; + vol |= spec->vol[ofs+i] & WM_VOL_MUTE; if (vol != spec->vol[ofs+i]) { spec->vol[ofs+i] = vol; idx = WM_DAC_ATTEN + ofs + i; From ae54abed636d18f7939c965f21ad126001dbe34c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 5 Feb 2010 13:11:45 +0100 Subject: [PATCH 285/640] cfq-iosched: split seeky coop queues after one slice Currently we split seeky coop queues after 1s, which is too big. Below patch marks seeky coop queue split_coop flag after one slice. After that, if new requests come in, the queues will be splitted. Patch is suggested by Corrado. Signed-off-by: Shaohua Li Reviewed-by: Corrado Zoccolo Acked-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 49 +++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 17b768d0d42f..023f4e69a337 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -42,16 +42,13 @@ static const int cfq_hist_divisor = 4; */ #define CFQ_MIN_TT (2) -/* - * Allow merged cfqqs to perform this amount of seeky I/O before - * deciding to break the queues up again. - */ -#define CFQQ_COOP_TOUT (HZ) - #define CFQ_SLICE_SCALE (5) #define CFQ_HW_QUEUE_MIN (5) #define CFQ_SERVICE_SHIFT 12 +#define CFQQ_SEEK_THR 8 * 1024 +#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) + #define RQ_CIC(rq) \ ((struct cfq_io_context *) (rq)->elevator_private) #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) @@ -137,7 +134,6 @@ struct cfq_queue { u64 seek_total; sector_t seek_mean; sector_t last_request_pos; - unsigned long seeky_start; pid_t pid; @@ -314,6 +310,7 @@ enum cfqq_state_flags { CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ CFQ_CFQQ_FLAG_sync, /* synchronous queue */ CFQ_CFQQ_FLAG_coop, /* cfqq is shared */ + CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ }; @@ -342,6 +339,7 @@ CFQ_CFQQ_FNS(prio_changed); CFQ_CFQQ_FNS(slice_new); CFQ_CFQQ_FNS(sync); CFQ_CFQQ_FNS(coop); +CFQ_CFQQ_FNS(split_coop); CFQ_CFQQ_FNS(deep); CFQ_CFQQ_FNS(wait_busy); #undef CFQ_CFQQ_FNS @@ -1565,6 +1563,15 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_clear_cfqq_wait_request(cfqq); cfq_clear_cfqq_wait_busy(cfqq); + /* + * If this cfqq is shared between multiple processes, check to + * make sure that those processes are still issuing I/Os within + * the mean seek distance. If not, it may be time to break the + * queues apart again. + */ + if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq)) + cfq_mark_cfqq_split_coop(cfqq); + /* * store what was left of this slice, if the queue idled/timed out */ @@ -1663,9 +1670,6 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, return cfqd->last_position - blk_rq_pos(rq); } -#define CFQQ_SEEK_THR 8 * 1024 -#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) - static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq, bool for_preempt) { @@ -3000,19 +3004,6 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, total = cfqq->seek_total + (cfqq->seek_samples/2); do_div(total, cfqq->seek_samples); cfqq->seek_mean = (sector_t)total; - - /* - * If this cfqq is shared between multiple processes, check to - * make sure that those processes are still issuing I/Os within - * the mean seek distance. If not, it may be time to break the - * queues apart again. - */ - if (cfq_cfqq_coop(cfqq)) { - if (CFQQ_SEEKY(cfqq) && !cfqq->seeky_start) - cfqq->seeky_start = jiffies; - else if (!CFQQ_SEEKY(cfqq)) - cfqq->seeky_start = 0; - } } /* @@ -3453,14 +3444,6 @@ cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic, return cic_to_cfqq(cic, 1); } -static int should_split_cfqq(struct cfq_queue *cfqq) -{ - if (cfqq->seeky_start && - time_after(jiffies, cfqq->seeky_start + CFQQ_COOP_TOUT)) - return 1; - return 0; -} - /* * Returns NULL if a new cfqq should be allocated, or the old cfqq if this * was the last process referring to said cfqq. @@ -3469,9 +3452,9 @@ static struct cfq_queue * split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) { if (cfqq_process_refs(cfqq) == 1) { - cfqq->seeky_start = 0; cfqq->pid = current->pid; cfq_clear_cfqq_coop(cfqq); + cfq_clear_cfqq_split_coop(cfqq); return cfqq; } @@ -3510,7 +3493,7 @@ new_queue: /* * If the queue was seeky for too long, break it apart. */ - if (cfq_cfqq_coop(cfqq) && should_split_cfqq(cfqq)) { + if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) { cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq"); cfqq = split_cfqq(cic, cfqq); if (!cfqq) From 531c2dc70d339c5dfa8c3eb628c3459dc6f3a075 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Fri, 5 Feb 2010 13:14:04 +0100 Subject: [PATCH 286/640] cciss: Make cciss_seq_show handle holes in the h->drv[] array It is possible (and expected) for there to be holes in the h->drv[] array, that is, some elements may be NULL pointers. cciss_seq_show needs to be made aware of this possibility to avoid an Oops. To reproduce the Oops which this fixes: 1) Create two "arrays" in the Array Configuratino Utility and several logical drives on each array. 2) cat /proc/driver/cciss/cciss* in an infinite loop 3) delete some of the logical drives in the first "array." Signed-off-by: Stephen M. Cameron Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 873e594860d3..9291614ac6b7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -337,6 +337,9 @@ static int cciss_seq_show(struct seq_file *seq, void *v) if (*pos > h->highest_lun) return 0; + if (drv == NULL) /* it's possible for h->drv[] to have holes. */ + return 0; + if (drv->heads == 0) return 0; From 3b9447fb7fa1829731290e64ef928d4f6461310a Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Fri, 5 Feb 2010 00:55:33 +0200 Subject: [PATCH 287/640] ASoC: pandora: Add APLL supply to fix audio output Pandora's external DAC is using 256*Fs output from the TWL4030 codec, and TWL4030 needs to have APLL enabled for it's 256*Fs output to function. Signed-off-by: Grazvydas Ignotas Acked-by: Peter Ujfalusi Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/omap/omap3pandora.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c index 71b2c161158d..68980c19a3bc 100644 --- a/sound/soc/omap/omap3pandora.c +++ b/sound/soc/omap/omap3pandora.c @@ -145,6 +145,7 @@ static const struct snd_soc_dapm_widget omap3pandora_in_dapm_widgets[] = { }; static const struct snd_soc_dapm_route omap3pandora_out_map[] = { + {"PCM DAC", NULL, "APLL Enable"}, {"Headphone Amplifier", NULL, "PCM DAC"}, {"Line Out", NULL, "PCM DAC"}, {"Headphone Jack", NULL, "Headphone Amplifier"}, From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 5 Feb 2010 16:09:11 +1100 Subject: [PATCH 288/640] percpu: add __percpu for sparse This is to make the annotation of percpu variables during the next merge window less painfull. Extracted from a patch by Rusty Russell. Signed-off-by: Stephen Rothwell Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5be3dab4a695..188fcae10a99 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -15,6 +15,7 @@ # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +# define __percpu __attribute__((noderef, address_space(3))) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else @@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __acquire(x) (void)0 # define __release(x) (void)0 # define __cond_lock(x,c) (c) +# define __percpu #endif #ifdef __KERNEL__ From 73d2eaac8a3f1ec1d6d0a80ea7302a439ca9b933 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Fri, 5 Feb 2010 01:42:43 -0500 Subject: [PATCH 289/640] CS5536: apply pci quirk for BIOS SMBUS bug The new cs5535-* drivers use PCI header config info rather than MSRs to determine the memory region to use for things like GPIOs and MFGPTs. As anticipated, we've run into a buggy BIOS: [ 0.081818] pci 0000:00:14.0: reg 10: [io 0x6000-0x7fff] [ 0.081906] pci 0000:00:14.0: reg 14: [io 0x6100-0x61ff] [ 0.082015] pci 0000:00:14.0: reg 18: [io 0x6200-0x63ff] [ 0.082917] pci 0000:00:14.2: reg 20: [io 0xe000-0xe00f] [ 0.083551] pci 0000:00:15.0: reg 10: [mem 0xa0010000-0xa0010fff] [ 0.084436] pci 0000:00:15.1: reg 10: [mem 0xa0011000-0xa0011fff] [ 0.088816] PCI: pci_cache_line_size set to 32 bytes [ 0.088938] pci 0000:00:14.0: address space collision: [io 0x6100-0x61ff] already in use [ 0.089052] pci 0000:00:14.0: can't reserve [io 0x6100-0x61ff] This is a Soekris board, and its BIOS sets the size of the PCI ISA bridge device's BAR0 to 8k. In reality, it should be 8 bytes (BAR0 is used for SMBus stuff). This quirk checks for an incorrect size, and resets it accordingly. Signed-off-by: Andres Salomon Tested-by: Leigh Porter Tested-by: Jens Rottmann Signed-off-by: Linus Torvalds --- drivers/pci/quirks.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c74694345b6e..d58b94030ef3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -338,6 +338,23 @@ static void __devinit quirk_s3_64M(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_868, quirk_s3_64M); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_968, quirk_s3_64M); +/* + * Some CS5536 BIOSes (for example, the Soekris NET5501 board w/ comBIOS + * ver. 1.33 20070103) don't set the correct ISA PCI region header info. + * BAR0 should be 8 bytes; instead, it may be set to something like 8k + * (which conflicts w/ BAR1's memory range). + */ +static void __devinit quirk_cs5536_vsa(struct pci_dev *dev) +{ + if (pci_resource_len(dev, 0) != 8) { + struct resource *res = &dev->resource[0]; + res->end = res->start + 8 - 1; + dev_info(&dev->dev, "CS5536 ISA bridge bug detected " + "(incorrect header); workaround applied.\n"); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, quirk_cs5536_vsa); + static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region, unsigned size, int nr, const char *name) { From 1c010ff8912cbc08d80e865aab9c32b6b00c527d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 5 Feb 2010 17:48:13 +0100 Subject: [PATCH 290/640] i2c-tiny-usb: Fix on big-endian systems The functionality bit vector is always returned as a little-endian 32-bit number by the device, so it must be byte-swapped to the host endianness. On the other hand, the delay value is handled by the USB stack, so no byte swapping is needed on our side. This fixes bug #15105: http://bugzilla.kernel.org/show_bug.cgi?id=15105 Reported-by: Jens Richter Signed-off-by: Jean Delvare Tested-by: Jens Richter Cc: Till Harbaum Cc: stable@kernel.org --- drivers/i2c/busses/i2c-tiny-usb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index b1c050ff311d..e29b6d5ba8ef 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -13,6 +13,7 @@ #include #include #include +#include /* include interfaces to usb layer */ #include @@ -31,8 +32,8 @@ #define CMD_I2C_IO_END (1<<1) /* i2c bit delay, default is 10us -> 100kHz */ -static int delay = 10; -module_param(delay, int, 0); +static unsigned short delay = 10; +module_param(delay, ushort, 0); MODULE_PARM_DESC(delay, "bit delay in microseconds, " "e.g. 10 for 100kHz (default is 100kHz)"); @@ -109,7 +110,7 @@ static int usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) static u32 usb_func(struct i2c_adapter *adapter) { - u32 func; + __le32 func; /* get functionality from adapter */ if (usb_read(adapter, CMD_GET_FUNC, 0, 0, &func, sizeof(func)) != @@ -118,7 +119,7 @@ static u32 usb_func(struct i2c_adapter *adapter) return 0; } - return func; + return le32_to_cpu(func); } /* This is the actual algorithm we define */ @@ -216,8 +217,7 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface, "i2c-tiny-usb at bus %03d device %03d", dev->usb_dev->bus->busnum, dev->usb_dev->devnum); - if (usb_write(&dev->adapter, CMD_SET_DELAY, - cpu_to_le16(delay), 0, NULL, 0) != 0) { + if (usb_write(&dev->adapter, CMD_SET_DELAY, delay, 0, NULL, 0) != 0) { dev_err(&dev->adapter.dev, "failure setting delay to %dus\n", delay); retval = -EIO; From 15c697ce1c5b408c5e20dcdc6aea2968d1125b75 Mon Sep 17 00:00:00 2001 From: Michael Poole Date: Fri, 5 Feb 2010 12:23:43 -0500 Subject: [PATCH 291/640] Bluetooth: Keep a copy of each HID device's report descriptor The report descriptor is read by user space (via the Service Discovery Protocol), so it is only available during the ioctl to connect. However, the HID probe function that needs the descriptor might not be called until a specific module is loaded. Keep a copy of the descriptor so it is available for later use. Signed-off-by: Michael Poole Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 49 +++++++++++++++++++-------------------- net/bluetooth/hidp/hidp.h | 4 +++- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 6cf526d06e21..fc6ec1e72652 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -703,29 +703,9 @@ static void hidp_close(struct hid_device *hid) static int hidp_parse(struct hid_device *hid) { struct hidp_session *session = hid->driver_data; - struct hidp_connadd_req *req = session->req; - unsigned char *buf; - int ret; - buf = kmalloc(req->rd_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, req->rd_data, req->rd_size)) { - kfree(buf); - return -EFAULT; - } - - ret = hid_parse_report(session->hid, buf, req->rd_size); - - kfree(buf); - - if (ret) - return ret; - - session->req = NULL; - - return 0; + return hid_parse_report(session->hid, session->rd_data, + session->rd_size); } static int hidp_start(struct hid_device *hid) @@ -770,12 +750,24 @@ static int hidp_setup_hid(struct hidp_session *session, bdaddr_t src, dst; int err; + session->rd_data = kzalloc(req->rd_size, GFP_KERNEL); + if (!session->rd_data) + return -ENOMEM; + + if (copy_from_user(session->rd_data, req->rd_data, req->rd_size)) { + err = -EFAULT; + goto fault; + } + session->rd_size = req->rd_size; + hid = hid_allocate_device(); - if (IS_ERR(hid)) - return PTR_ERR(hid); + if (IS_ERR(hid)) { + err = PTR_ERR(hid); + goto fault; + } session->hid = hid; - session->req = req; + hid->driver_data = session; baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); @@ -806,6 +798,10 @@ failed: hid_destroy_device(hid); session->hid = NULL; +fault: + kfree(session->rd_data); + session->rd_data = NULL; + return err; } @@ -900,6 +896,9 @@ unlink: session->hid = NULL; } + kfree(session->rd_data); + session->rd_data = NULL; + purge: skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index faf3d74c3586..a4e215d50c10 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -154,7 +154,9 @@ struct hidp_session { struct sk_buff_head ctrl_transmit; struct sk_buff_head intr_transmit; - struct hidp_connadd_req *req; + /* Report descriptor */ + __u8 *rd_data; + uint rd_size; }; static inline void hidp_schedule(struct hidp_session *session) From 85f8d3e5faea8bd36c3e5196f8334f7db45e19b2 Mon Sep 17 00:00:00 2001 From: Ray Copeland Date: Fri, 5 Feb 2010 19:58:35 +0100 Subject: [PATCH 292/640] hwmon: (adt7462) Wrong ADT7462_VOLT_COUNT The #define ADT7462_VOLT_COUNT is wrong, it should be 13 not 12. All the for loops that use this as a limit count are of the typical form, "for (n = 0; n < ADT7462_VOLT_COUNT; n++)", so to loop through all voltages w/o missing the last one it is necessary for the count to be one greater than it is. (Specifically, you will miss the +1.5V 3GPIO input with count = 12 vs. 13.) Signed-off-by: Ray Copeland Acked-by: "Darrick J. Wong" Signed-off-by: Jean Delvare Cc: stable@kernel.org --- drivers/hwmon/adt7462.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index a31e77c776ae..b8156b4893bb 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -179,7 +179,7 @@ static const unsigned short normal_i2c[] = { 0x58, 0x5C, I2C_CLIENT_END }; * * Some, but not all, of these voltages have low/high limits. */ -#define ADT7462_VOLT_COUNT 12 +#define ADT7462_VOLT_COUNT 13 #define ADT7462_VENDOR 0x41 #define ADT7462_DEVICE 0x62 From 197027e6ef830d60e10f76efc8d12bf3b6c35db5 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 5 Feb 2010 19:58:36 +0100 Subject: [PATCH 293/640] hwmon: (lm78) Request I/O ports individually for probing Different motherboards have different PNP declarations for LM78/LM79 chips. Some declare the whole range of I/O ports (8 ports), some declare only the useful ports (2 ports at offset 5) and some declare fancy ranges, for example 4 ports at offset 4. To properly handle all cases, request all ports individually for probing. After we have determined that we really have an LM78 or LM79 chip, the useful port range will be requested again, as a single block. This fixes the driver on the Olivetti M3000 DT 540, at least. Signed-off-by: Jean Delvare Cc: stable@kernel.org --- drivers/hwmon/lm78.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c index cadcbd90ff3b..72ff2c4e757d 100644 --- a/drivers/hwmon/lm78.c +++ b/drivers/hwmon/lm78.c @@ -851,17 +851,16 @@ static struct lm78_data *lm78_update_device(struct device *dev) static int __init lm78_isa_found(unsigned short address) { int val, save, found = 0; + int port; - /* We have to request the region in two parts because some - boards declare base+4 to base+7 as a PNP device */ - if (!request_region(address, 4, "lm78")) { - pr_debug("lm78: Failed to request low part of region\n"); - return 0; - } - if (!request_region(address + 4, 4, "lm78")) { - pr_debug("lm78: Failed to request high part of region\n"); - release_region(address, 4); - return 0; + /* Some boards declare base+0 to base+7 as a PNP device, some base+4 + * to base+7 and some base+5 to base+6. So we better request each port + * individually for the probing phase. */ + for (port = address; port < address + LM78_EXTENT; port++) { + if (!request_region(port, 1, "lm78")) { + pr_debug("lm78: Failed to request port 0x%x\n", port); + goto release; + } } #define REALLY_SLOW_IO @@ -925,8 +924,8 @@ static int __init lm78_isa_found(unsigned short address) val & 0x80 ? "LM79" : "LM78", (int)address); release: - release_region(address + 4, 4); - release_region(address, 4); + for (port--; port >= address; port--) + release_region(port, 1); return found; } From b0bcdd3cd0adb85a7686b396ba50493871b1135c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 5 Feb 2010 19:58:36 +0100 Subject: [PATCH 294/640] hwmon: (w83781d) Request I/O ports individually for probing Different motherboards have different PNP declarations for W83781D/W83782D chips. Some declare the whole range of I/O ports (8 ports), some declare only the useful ports (2 ports at offset 5) and some declare fancy ranges, for example 4 ports at offset 4. To properly handle all cases, request all ports individually for probing. After we have determined that we really have a W83781D or W83782D chip, the useful port range will be requested again, as a single block. I did not see a board which needs this yet, but I know of one for lm78 driver and I'd like to keep the logic of these two drivers in sync. Signed-off-by: Jean Delvare Cc: stable@kernel.org --- drivers/hwmon/w83781d.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c index 05f9225b6f94..32d4adee73db 100644 --- a/drivers/hwmon/w83781d.c +++ b/drivers/hwmon/w83781d.c @@ -1793,17 +1793,17 @@ static int __init w83781d_isa_found(unsigned short address) { int val, save, found = 0; + int port; - /* We have to request the region in two parts because some - boards declare base+4 to base+7 as a PNP device */ - if (!request_region(address, 4, "w83781d")) { - pr_debug("w83781d: Failed to request low part of region\n"); - return 0; - } - if (!request_region(address + 4, 4, "w83781d")) { - pr_debug("w83781d: Failed to request high part of region\n"); - release_region(address, 4); - return 0; + /* Some boards declare base+0 to base+7 as a PNP device, some base+4 + * to base+7 and some base+5 to base+6. So we better request each port + * individually for the probing phase. */ + for (port = address; port < address + W83781D_EXTENT; port++) { + if (!request_region(port, 1, "w83781d")) { + pr_debug("w83781d: Failed to request port 0x%x\n", + port); + goto release; + } } #define REALLY_SLOW_IO @@ -1877,8 +1877,8 @@ w83781d_isa_found(unsigned short address) val == 0x30 ? "W83782D" : "W83781D", (int)address); release: - release_region(address + 4, 4); - release_region(address, 4); + for (port--; port >= address; port--) + release_region(port, 1); return found; } From b953df7c70740cd7593072ebec77a8f658505630 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Fri, 5 Feb 2010 21:52:37 +0800 Subject: [PATCH 295/640] dmaengine: correct onstack wait_queue_head declaration Use DECLARE_WAIT_QUEUE_HEAD_ONSTACK to make lockdep happy Signed-off-by: Yong Zhang Cc: Maciej Sosnowski Cc: Andrew Morton Cc: Nicolas Ferre Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 8b905161fbf4..948d563941c9 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -467,7 +467,7 @@ err_srcs: if (iterations > 0) while (!kthread_should_stop()) { - DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit); + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit); interruptible_sleep_on(&wait_dmatest_exit); } From bd6b0bf87d8cf3d9cfeadeb12dbf5449e3e50765 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 5 Feb 2010 10:26:27 +0100 Subject: [PATCH 296/640] ocfs2: Fix contiguousness check in ocfs2_try_to_merge_extent_map() The wrong member was compared in the continguousness check. Acked-by: Tao Ma Signed-off-by: Roel Kluin Signed-off-by: Joel Becker --- fs/ocfs2/extent_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index d35a27f4523e..5328529e7fd2 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -192,7 +192,7 @@ static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, emi->ei_clusters += ins->ei_clusters; return 1; } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && - (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys && + (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && ins->ei_flags == emi->ei_flags) { emi->ei_phys = ins->ei_phys; emi->ei_cpos = ins->ei_cpos; From f12f98dba6ea1517cd7fbb912208893b9c014c15 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Feb 2010 13:14:00 -0500 Subject: [PATCH 297/640] cifs: fix length calculation for converted unicode readdir names cifs_from_ucs2 returns the length of the converted name, including the length of the NULL terminator. We don't want to include the NULL terminator in the dentry name length however since that'll throw off the hash calculation for the dentry cache. I believe that this is the root cause of several problems that have cropped up recently that seem to be papered over with the "noserverino" mount option. More confirmation of that would be good, but this is clearly a bug and it fixes at least one reproducible problem that was reported. This patch fixes at least this reproducer in this kernel.org bug: http://bugzilla.kernel.org/show_bug.cgi?id=15088#c12 Reported-by: Bjorn Tore Sund Acked-by: Dave Kleikamp Signed-off-by: Jeff Layton Cc: stable@kernel.org Signed-off-by: Steve French --- fs/cifs/readdir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f84062f9a985..f5618f8cc462 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -666,6 +666,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, min(len, max_len), nlt, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + pqst->len -= nls_nullsize(nlt); } else { pqst->name = filename; pqst->len = len; From 301a6a317797ca362951ea21da397c05236f0070 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 6 Feb 2010 07:08:53 +0000 Subject: [PATCH 298/640] [CIFS] Maximum username length check in session setup does not match Fix length check reported by D. Binderman (see below) d binderman wrote: > > I just ran the sourceforge tool cppcheck over the source code of the > new Linux kernel 2.6.33-rc6 > > It said > > [./cifs/sess.c:250]: (error) Buffer access out-of-bounds May turn out to be harmless, but best to be safe. Note max username length is defined to 32 due to Linux (Windows maximum is 20). Signed-off-by: Steve French --- fs/cifs/sess.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7085a6275c4c..aaa9c1c5a5bd 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -223,9 +223,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, /* null user mount */ *bcc_ptr = 0; *(bcc_ptr+1) = 0; - } else { /* 300 should be long enough for any conceivable user name */ + } else { bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, - 300, nls_cp); + MAX_USERNAME_SIZE, nls_cp); } bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null termination */ @@ -246,11 +246,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, /* copy user */ if (ses->userName == NULL) { /* BB what about null user mounts - check that we do this BB */ - } else { /* 300 should be long enough for any conceivable user name */ - strncpy(bcc_ptr, ses->userName, 300); + } else { + strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE); } - /* BB improve check for overflow */ - bcc_ptr += strnlen(ses->userName, 300); + bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE); *bcc_ptr = 0; bcc_ptr++; /* account for null termination */ From 6f5a55f1a6c5abee15a0e878e5c74d9f1569b8b0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Feb 2010 16:16:50 -0800 Subject: [PATCH 299/640] Fix potential crash with sys_move_pages We incorrectly depended on the 'node_state/node_isset()' functions testing the node range, rather than checking it explicitly. That's not reliable, even if it might often happen to work. So do the proper explicit test. Reported-by: Marcus Meissner Acked-and-tested-by: Brice Goglin Acked-by: Hugh Dickins Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/migrate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index efddbf0926b2..9a0db5bbabe4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, goto out_pm; err = -ENODEV; + if (node < 0 || node >= MAX_NUMNODES) + goto out_pm; + if (!node_state(node, N_HIGH_MEMORY)) goto out_pm; From 29275254caedfedce960cfe6df24b90cb04fe431 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 Feb 2010 14:17:12 -0800 Subject: [PATCH 300/640] Linux 2.6.33-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 394aec712c7d..f8e02e9491d0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 33 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* From 8dd5ca532c2d2c2b85f16bc038ebfff05b8853e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Jan 2010 22:11:38 -0500 Subject: [PATCH 301/640] befs: fix leak Signed-off-by: Al Viro --- fs/befs/linuxvfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 33baf27fac78..34ddda888e63 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -873,6 +873,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); unacquire_priv_sbp: + kfree(befs_sb->mount_opts.iocharset); kfree(sb->s_fs_info); unacquire_none: From 4b06e5b9ad8abb20105b2b25e42c509ebe9b2d76 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Fri, 29 Jan 2010 09:56:22 +0900 Subject: [PATCH 302/640] freeze_bdev: don't deactivate successfully frozen MS_RDONLY sb Thanks Thomas and Christoph for testing and review. I removed 'smp_wmb()' before up_write from the previous patch, since up_write() should have necessary ordering constraints. (I.e. the change of s_frozen is visible to others after up_write) I'm quite sure the change is harmless but if you are uncomfortable with Tested-by/Reviewed-by on the modified patch, please remove them. If MS_RDONLY, freeze_bdev should just up_write(s_umount) instead of deactivate_locked_super(). Also, keep sb->s_frozen consistent so that remount can check the frozen state. Otherwise a crash reported here can happen: http://lkml.org/lkml/2010/1/16/37 http://lkml.org/lkml/2010/1/28/53 This patch should be applied for 2.6.32 stable series, too. Reviewed-by: Christoph Hellwig Tested-by: Thomas Backlund Signed-off-by: Jun'ichi Nomura Cc: stable@kernel.org Signed-off-by: Al Viro --- fs/block_dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 73d6a735b8f3..d11d0289f3d2 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -246,7 +246,8 @@ struct super_block *freeze_bdev(struct block_device *bdev) if (!sb) goto out; if (sb->s_flags & MS_RDONLY) { - deactivate_locked_super(sb); + sb->s_frozen = SB_FREEZE_TRANS; + up_write(&sb->s_umount); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } @@ -307,7 +308,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) BUG_ON(sb->s_bdev != bdev); down_write(&sb->s_umount); if (sb->s_flags & MS_RDONLY) - goto out_deactivate; + goto out_unfrozen; if (sb->s_op->unfreeze_fs) { error = sb->s_op->unfreeze_fs(sb); @@ -321,11 +322,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) } } +out_unfrozen: sb->s_frozen = SB_UNFROZEN; smp_wmb(); wake_up(&sb->s_wait_unfrozen); -out_deactivate: if (sb) deactivate_locked_super(sb); out_unlock: From 1e41568d7378d1ba8c64ba137b9ddd00b59f893a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jan 2010 05:43:08 -0500 Subject: [PATCH 303/640] Take ima_path_check() in nfsd past dentry_open() in nfsd_open() Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c194793b642b..325959e264ce 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -752,6 +752,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); + host_err = ima_path_check(&(*filp)->f_path, + access & (MAY_READ | MAY_WRITE | MAY_EXEC)); out_nfserr: err = nfserrno(host_err); out: @@ -2127,7 +2129,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, */ path.mnt = exp->ex_path.mnt; path.dentry = dentry; - err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC)); nfsd_out: return err? nfserrno(err) : 0; } From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 20 Jan 2010 15:35:41 -0500 Subject: [PATCH 304/640] fix ima breakage The "Untangling ima mess, part 2 with counters" patch messed up the counters. Based on conversations with Al Viro, this patch streamlines ima_path_check() by removing the counter maintaince. The counters are now updated independently, from measuring the file, in __dentry_open() and alloc_file() by calling ima_counts_get(). ima_path_check() is called from nfsd and do_filp_open(). It also did not measure all files that should have been measured. Reason: ima_path_check() got bogus value passed as mask. [AV: mea culpa] [AV: add missing nfsd bits] Signed-off-by: Mimi Zohar Signed-off-by: Al Viro --- fs/namei.c | 6 +- fs/nfsd/vfs.c | 3 +- include/linux/ima.h | 4 +- security/integrity/ima/ima_main.c | 236 ++++++++++++------------------ 4 files changed, 97 insertions(+), 152 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 94a5e60779f9..cd77b6375efd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1736,8 +1736,7 @@ do_last: if (nd.root.mnt) path_put(&nd.root); if (!IS_ERR(filp)) { - error = ima_path_check(&filp->f_path, filp->f_mode & - (MAY_READ | MAY_WRITE | MAY_EXEC)); + error = ima_path_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); @@ -1797,8 +1796,7 @@ ok: } filp = nameidata_to_filp(&nd); if (!IS_ERR(filp)) { - error = ima_path_check(&filp->f_path, filp->f_mode & - (MAY_READ | MAY_WRITE | MAY_EXEC)); + error = ima_path_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 325959e264ce..32477e3a645c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -752,8 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); - host_err = ima_path_check(&(*filp)->f_path, - access & (MAY_READ | MAY_WRITE | MAY_EXEC)); + host_err = ima_path_check(*filp, access); out_nfserr: err = nfserrno(host_err); out: diff --git a/include/linux/ima.h b/include/linux/ima.h index 99dc6d5cf7e5..aa55a8f1f5b9 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -17,7 +17,7 @@ struct linux_binprm; extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct path *path, int mask); +extern int ima_path_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern void ima_counts_get(struct file *file); @@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct path *path, int mask) +static inline int ima_path_check(struct file *file, int mask) { return 0; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index a89f44d5e030..75aee18f6163 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -84,6 +84,36 @@ out: return found; } +/* ima_read_write_check - reflect possible reading/writing errors in the PCR. + * + * When opening a file for read, if the file is already open for write, + * the file could change, resulting in a file measurement error. + * + * Opening a file for write, if the file is already open for read, results + * in a time of measure, time of use (ToMToU) error. + * + * In either case invalidate the PCR. + */ +enum iint_pcr_error { TOMTOU, OPEN_WRITERS }; +static void ima_read_write_check(enum iint_pcr_error error, + struct ima_iint_cache *iint, + struct inode *inode, + const unsigned char *filename) +{ + switch (error) { + case TOMTOU: + if (iint->readcount > 0) + ima_add_violation(inode, filename, "invalid_pcr", + "ToMToU"); + break; + case OPEN_WRITERS: + if (iint->writecount > 0) + ima_add_violation(inode, filename, "invalid_pcr", + "open_writers"); + break; + } +} + /* * Update the counts given an fmode_t */ @@ -98,6 +128,47 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode) iint->writecount++; } +/* + * ima_counts_get - increment file counts + * + * Maintain read/write counters for all files, but only + * invalidate the PCR for measured files: + * - Opening a file for write when already open for read, + * results in a time of measure, time of use (ToMToU) error. + * - Opening a file for read when already open for write, + * could result in a file measurement error. + * + */ +void ima_counts_get(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + fmode_t mode = file->f_mode; + struct ima_iint_cache *iint; + int rc; + + if (!ima_initialized || !S_ISREG(inode->i_mode)) + return; + iint = ima_iint_find_get(inode); + if (!iint) + return; + mutex_lock(&iint->mutex); + rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); + if (rc < 0) + goto out; + + if (mode & FMODE_WRITE) { + ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name); + goto out; + } + ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name); +out: + ima_inc_counts(iint, file->f_mode); + mutex_unlock(&iint->mutex); + + kref_put(&iint->refcount, iint_free); +} + /* * Decrement ima counts */ @@ -153,123 +224,6 @@ void ima_file_free(struct file *file) kref_put(&iint->refcount, iint_free); } -/* ima_read_write_check - reflect possible reading/writing errors in the PCR. - * - * When opening a file for read, if the file is already open for write, - * the file could change, resulting in a file measurement error. - * - * Opening a file for write, if the file is already open for read, results - * in a time of measure, time of use (ToMToU) error. - * - * In either case invalidate the PCR. - */ -enum iint_pcr_error { TOMTOU, OPEN_WRITERS }; -static void ima_read_write_check(enum iint_pcr_error error, - struct ima_iint_cache *iint, - struct inode *inode, - const unsigned char *filename) -{ - switch (error) { - case TOMTOU: - if (iint->readcount > 0) - ima_add_violation(inode, filename, "invalid_pcr", - "ToMToU"); - break; - case OPEN_WRITERS: - if (iint->writecount > 0) - ima_add_violation(inode, filename, "invalid_pcr", - "open_writers"); - break; - } -} - -static int get_path_measurement(struct ima_iint_cache *iint, struct file *file, - const unsigned char *filename) -{ - int rc = 0; - - ima_inc_counts(iint, file->f_mode); - - rc = ima_collect_measurement(iint, file); - if (!rc) - ima_store_measurement(iint, file, filename); - return rc; -} - -/** - * ima_path_check - based on policy, collect/store measurement. - * @path: contains a pointer to the path to be measured - * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE - * - * Measure the file being open for readonly, based on the - * ima_must_measure() policy decision. - * - * Keep read/write counters for all files, but only - * invalidate the PCR for measured files: - * - Opening a file for write when already open for read, - * results in a time of measure, time of use (ToMToU) error. - * - Opening a file for read when already open for write, - * could result in a file measurement error. - * - * Always return 0 and audit dentry_open failures. - * (Return code will be based upon measurement appraisal.) - */ -int ima_path_check(struct path *path, int mask) -{ - struct inode *inode = path->dentry->d_inode; - struct ima_iint_cache *iint; - struct file *file = NULL; - int rc; - - if (!ima_initialized || !S_ISREG(inode->i_mode)) - return 0; - iint = ima_iint_find_get(inode); - if (!iint) - return 0; - - mutex_lock(&iint->mutex); - - rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); - if (rc < 0) - goto out; - - if ((mask & MAY_WRITE) || (mask == 0)) - ima_read_write_check(TOMTOU, iint, inode, - path->dentry->d_name.name); - - if ((mask & (MAY_WRITE | MAY_READ | MAY_EXEC)) != MAY_READ) - goto out; - - ima_read_write_check(OPEN_WRITERS, iint, inode, - path->dentry->d_name.name); - if (!(iint->flags & IMA_MEASURED)) { - struct dentry *dentry = dget(path->dentry); - struct vfsmount *mnt = mntget(path->mnt); - - file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE, - current_cred()); - if (IS_ERR(file)) { - int audit_info = 0; - - integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, - dentry->d_name.name, - "add_measurement", - "dentry_open failed", - 1, audit_info); - file = NULL; - goto out; - } - rc = get_path_measurement(iint, file, dentry->d_name.name); - } -out: - mutex_unlock(&iint->mutex); - if (file) - fput(file); - kref_put(&iint->refcount, iint_free); - return 0; -} -EXPORT_SYMBOL_GPL(ima_path_check); - static int process_measurement(struct file *file, const unsigned char *filename, int mask, int function) { @@ -297,33 +251,6 @@ out: return rc; } -/* - * ima_counts_get - increment file counts - * - * - for IPC shm and shmat file. - * - for nfsd exported files. - * - * Increment the counts for these files to prevent unnecessary - * imbalance messages. - */ -void ima_counts_get(struct file *file) -{ - struct inode *inode = file->f_dentry->d_inode; - struct ima_iint_cache *iint; - - if (!ima_initialized || !S_ISREG(inode->i_mode)) - return; - iint = ima_iint_find_get(inode); - if (!iint) - return; - mutex_lock(&iint->mutex); - ima_inc_counts(iint, file->f_mode); - mutex_unlock(&iint->mutex); - - kref_put(&iint->refcount, iint_free); -} -EXPORT_SYMBOL_GPL(ima_counts_get); - /** * ima_file_mmap - based on policy, collect/store measurement. * @file: pointer to the file to be measured (May be NULL) @@ -369,6 +296,27 @@ int ima_bprm_check(struct linux_binprm *bprm) return 0; } +/** + * ima_path_check - based on policy, collect/store measurement. + * @file: pointer to the file to be measured + * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE + * + * Measure files based on the ima_must_measure() policy decision. + * + * Always return 0 and audit dentry_open failures. + * (Return code will be based upon measurement appraisal.) + */ +int ima_path_check(struct file *file, int mask) +{ + int rc; + + rc = process_measurement(file, file->f_dentry->d_name.name, + mask & (MAY_READ | MAY_WRITE | MAY_EXEC), + PATH_CHECK); + return 0; +} +EXPORT_SYMBOL_GPL(ima_path_check); + static int __init init_ima(void) { int error; From 54bb6552bd9405dc7685653157a4ec260c77a71c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 9 Dec 2009 15:29:01 -0500 Subject: [PATCH 305/640] ima: initialize ima before inodes can be allocated ima wants to create an inode information struct (iint) when inodes are allocated. This means that at least the part of ima which does this allocation (the allocation is filled with information later) should before any inodes are created. To accomplish this we split the ima initialization routine placing the kmem cache allocator inside a security_initcall() function. Since this makes use of radix trees we also need to make sure that is initialized before security_initcall(). Signed-off-by: Eric Paris Acked-by: Mimi Zohar Signed-off-by: Al Viro --- init/main.c | 2 +- security/integrity/ima/ima.h | 1 - security/integrity/ima/ima_iint.c | 9 +++------ security/integrity/ima/ima_main.c | 1 - 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/init/main.c b/init/main.c index dac44a9356a5..4cb47a159f02 100644 --- a/init/main.c +++ b/init/main.c @@ -657,9 +657,9 @@ asmlinkage void __init start_kernel(void) proc_caches_init(); buffer_init(); key_init(); + radix_tree_init(); security_init(); vfs_caches_init(totalram_pages); - radix_tree_init(); signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index c41afe6639a0..aa25a7eb2d0e 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -65,7 +65,6 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode, const char *cause, int result, int info); /* Internal IMA function definitions */ -void ima_iintcache_init(void); int ima_init(void); void ima_cleanup(void); int ima_fs_init(void); diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c index fa592ff1ac1c..0d83edcfc402 100644 --- a/security/integrity/ima/ima_iint.c +++ b/security/integrity/ima/ima_iint.c @@ -52,9 +52,6 @@ int ima_inode_alloc(struct inode *inode) struct ima_iint_cache *iint = NULL; int rc = 0; - if (!ima_initialized) - return 0; - iint = kmem_cache_alloc(iint_cache, GFP_NOFS); if (!iint) return -ENOMEM; @@ -118,8 +115,6 @@ void ima_inode_free(struct inode *inode) { struct ima_iint_cache *iint; - if (!ima_initialized) - return; spin_lock(&ima_iint_lock); iint = radix_tree_delete(&ima_iint_store, (unsigned long)inode); spin_unlock(&ima_iint_lock); @@ -141,9 +136,11 @@ static void init_once(void *foo) kref_set(&iint->refcount, 1); } -void __init ima_iintcache_init(void) +static int __init ima_iintcache_init(void) { iint_cache = kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0, SLAB_PANIC, init_once); + return 0; } +security_initcall(ima_iintcache_init); diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 75aee18f6163..eb1cf6498cc9 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -321,7 +321,6 @@ static int __init init_ima(void) { int error; - ima_iintcache_init(); error = ima_init(); ima_initialized = 1; return error; From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 26 Jan 2010 17:02:40 -0500 Subject: [PATCH 306/640] ima: rename ima_path_check to ima_file_check ima_path_check actually deals with files! call it ima_file_check instead. Signed-off-by: Eric Paris Acked-by: Mimi Zohar Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- fs/nfsd/vfs.c | 2 +- include/linux/ima.h | 4 ++-- security/integrity/ima/ima_main.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index cd77b6375efd..d62fdc875f22 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1736,7 +1736,7 @@ do_last: if (nd.root.mnt) path_put(&nd.root); if (!IS_ERR(filp)) { - error = ima_path_check(filp, acc_mode); + error = ima_file_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); @@ -1796,7 +1796,7 @@ ok: } filp = nameidata_to_filp(&nd); if (!IS_ERR(filp)) { - error = ima_path_check(filp, acc_mode); + error = ima_file_check(filp, acc_mode); if (error) { fput(filp); filp = ERR_PTR(error); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 32477e3a645c..97d79eff6b7f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -752,7 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); - host_err = ima_path_check(*filp, access); + host_err = ima_file_check(*filp, access); out_nfserr: err = nfserrno(host_err); out: diff --git a/include/linux/ima.h b/include/linux/ima.h index aa55a8f1f5b9..975837e7d6c0 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -17,7 +17,7 @@ struct linux_binprm; extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct file *file, int mask); +extern int ima_file_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern void ima_counts_get(struct file *file); @@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct file *file, int mask) +static inline int ima_file_check(struct file *file, int mask) { return 0; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index eb1cf6498cc9..b76e1f03ea2b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -14,7 +14,7 @@ * * File: ima_main.c * implements the IMA hooks: ima_bprm_check, ima_file_mmap, - * and ima_path_check. + * and ima_file_check. */ #include #include @@ -306,7 +306,7 @@ int ima_bprm_check(struct linux_binprm *bprm) * Always return 0 and audit dentry_open failures. * (Return code will be based upon measurement appraisal.) */ -int ima_path_check(struct file *file, int mask) +int ima_file_check(struct file *file, int mask) { int rc; @@ -315,7 +315,7 @@ int ima_path_check(struct file *file, int mask) PATH_CHECK); return 0; } -EXPORT_SYMBOL_GPL(ima_path_check); +EXPORT_SYMBOL_GPL(ima_file_check); static int __init init_ima(void) { From 1e93d0052d9a6b3d0b382eedceb18b519d603baf Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 26 Jan 2010 17:02:41 -0500 Subject: [PATCH 307/640] ima: rename PATH_CHECK to FILE_CHECK With the movement of the ima hooks functions were renamed from *path* to *file* since they always deal with struct file. This patch renames some of the ima internal flags to make them consistent with the rest of the code. Signed-off-by: Mimi Zohar Signed-off-by: Eric Paris Signed-off-by: Al Viro --- Documentation/ABI/testing/ima_policy | 12 ++++++------ security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_api.c | 4 ++-- security/integrity/ima/ima_main.c | 4 ++-- security/integrity/ima/ima_policy.c | 9 ++++++--- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index 6434f0df012e..6cd6daefaaed 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -20,7 +20,7 @@ Description: lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] - base: func:= [BPRM_CHECK][FILE_MMAP][INODE_PERMISSION] + base: func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK] mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC] fsmagic:= hex value uid:= decimal value @@ -40,11 +40,11 @@ Description: measure func=BPRM_CHECK measure func=FILE_MMAP mask=MAY_EXEC - measure func=INODE_PERM mask=MAY_READ uid=0 + measure func=FILE_CHECK mask=MAY_READ uid=0 The default policy measures all executables in bprm_check, all files mmapped executable in file_mmap, and all files - open for read by root in inode_permission. + open for read by root in do_filp_open. Examples of LSM specific definitions: @@ -54,8 +54,8 @@ Description: dont_measure obj_type=var_log_t dont_measure obj_type=auditd_log_t - measure subj_user=system_u func=INODE_PERM mask=MAY_READ - measure subj_role=system_r func=INODE_PERM mask=MAY_READ + measure subj_user=system_u func=FILE_CHECK mask=MAY_READ + measure subj_role=system_r func=FILE_CHECK mask=MAY_READ Smack: - measure subj_user=_ func=INODE_PERM mask=MAY_READ + measure subj_user=_ func=FILE_CHECK mask=MAY_READ diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index aa25a7eb2d0e..47fb65d1fcbd 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -130,7 +130,7 @@ void iint_free(struct kref *kref); void iint_rcu_free(struct rcu_head *rcu); /* IMA policy related functions */ -enum ima_hooks { PATH_CHECK = 1, FILE_MMAP, BPRM_CHECK }; +enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK }; int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask); void ima_init_policy(void); diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 3cd58b60afd2..2a5e0bcf3887 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -95,12 +95,12 @@ err_out: * ima_must_measure - measure decision based on policy. * @inode: pointer to inode to measure * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXECUTE) - * @function: calling function (PATH_CHECK, BPRM_CHECK, FILE_MMAP) + * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP) * * The policy is defined in terms of keypairs: * subj=, obj=, type=, func=, mask=, fsmagic= * subj,obj, and type: are LSM specific. - * func: PATH_CHECK | BPRM_CHECK | FILE_MMAP + * func: FILE_CHECK | BPRM_CHECK | FILE_MMAP * mask: contains the permission mask * fsmagic: hex value * diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index b76e1f03ea2b..294b005d6520 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -153,7 +153,7 @@ void ima_counts_get(struct file *file) if (!iint) return; mutex_lock(&iint->mutex); - rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); + rc = ima_must_measure(iint, inode, MAY_READ, FILE_CHECK); if (rc < 0) goto out; @@ -312,7 +312,7 @@ int ima_file_check(struct file *file, int mask) rc = process_measurement(file, file->f_dentry->d_name.name, mask & (MAY_READ | MAY_WRITE | MAY_EXEC), - PATH_CHECK); + FILE_CHECK); return 0; } EXPORT_SYMBOL_GPL(ima_file_check); diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index e1278399b345..4759d0f99335 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -67,7 +67,7 @@ static struct ima_measure_rule_entry default_rules[] = { .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, - {.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0, + {.action = MEASURE,.func = FILE_CHECK,.mask = MAY_READ,.uid = 0, .flags = IMA_FUNC | IMA_MASK | IMA_UID}, }; @@ -282,8 +282,11 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry) break; case Opt_func: audit_log_format(ab, "func=%s ", args[0].from); - if (strcmp(args[0].from, "PATH_CHECK") == 0) - entry->func = PATH_CHECK; + if (strcmp(args[0].from, "FILE_CHECK") == 0) + entry->func = FILE_CHECK; + /* PATH_CHECK is for backwards compat */ + else if (strcmp(args[0].from, "PATH_CHECK") == 0) + entry->func = FILE_CHECK; else if (strcmp(args[0].from, "FILE_MMAP") == 0) entry->func = FILE_MMAP; else if (strcmp(args[0].from, "BPRM_CHECK") == 0) From 89068c576bf324ef6fbd50dfc745148f7def202c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Feb 2010 03:07:29 -0500 Subject: [PATCH 308/640] Take ima_file_free() to proper place. Hooks: Just Say No. Signed-off-by: Al Viro --- fs/file_table.c | 1 + security/security.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 69652c5bd5f0..b98404b54383 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -253,6 +253,7 @@ void __fput(struct file *file) if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); + ima_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) cdev_put(inode->i_cdev); fops_put(file->f_op); diff --git a/security/security.c b/security/security.c index 24e060be9fa5..122b748d0f4c 100644 --- a/security/security.c +++ b/security/security.c @@ -666,8 +666,6 @@ int security_file_alloc(struct file *file) void security_file_free(struct file *file) { security_ops->file_free_security(file); - if (file->f_dentry) - ima_file_free(file); } int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) From ee11b90b12eb1ec25e1044bac861e90bfd19ec9e Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Sun, 7 Feb 2010 11:46:15 -0200 Subject: [PATCH 309/640] perf top: Fix annotate for userspace First, for programs and prelinked libraries, annotate code was fooled by objdump output IPs (src->eip in the code) being wrongly converted to absolute IPs. In such case there were no conversion needed, but in src->eip = strtoull(src->line, NULL, 16); src->eip = map->unmap_ip(map, src->eip); // = eip + map->start - map->pgoff we were reading absolute address from objdump (e.g. 8048604) and then almost doubling it, because eip & map->start are approximately close for small programs. Needless to say, that later, in record_precise_ip() there was no matching with real runtime IPs. And second, like with `perf annotate` the problem with non-prelinked *.so was that we were doing rip -> objdump address conversion wrong. Also, because unlike `perf annotate`, `perf top` code does annotation based on absolute IPs for performance reasons(*), new helper for mapping objdump addresse to IP is introduced. (*) we get samples info in absolute IPs, and since we do lots of hit-testing on absolute IPs at runtime in record_precise_ip(), it's better to convert objdump addresses to IPs once and do no conversion at runtime. I also had to fix how objdump output is parsed (with hardcoded 8/16 characters format, which was inappropriate for ET_DYN dsos with small addresses like '4ac') Also note, that not all objdump output lines has associtated IPs, e.g. look at source lines here: 000004ac : extern "C" int my_strlen(const char *s) 4ac: 55 push %ebp 4ad: 89 e5 mov %esp,%ebp 4af: 83 ec 10 sub $0x10,%esp { int len = 0; 4b2: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%ebp) 4b9: eb 08 jmp 4c3 while (*s) { ++len; 4bb: 83 45 fc 01 addl $0x1,-0x4(%ebp) ++s; 4bf: 83 45 08 01 addl $0x1,0x8(%ebp) So we mark them with eip=0, and ignore such lines in annotate lookup code. Signed-off-by: Kirill Smelkov [ Note: one hunk of this patch was applied by Mike in 57d8188 ] Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1265550376-12665-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 18 +++++++++--------- tools/perf/util/map.c | 8 ++++++++ tools/perf/util/map.h | 4 ++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e4156bc4566d..befa57e2284d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -216,7 +216,7 @@ static void parse_source(struct sym_entry *syme) while (!feof(file)) { struct source_line *src; size_t dummy = 0; - char *c; + char *c, *sep; src = malloc(sizeof(struct source_line)); assert(src != NULL); @@ -235,14 +235,11 @@ static void parse_source(struct sym_entry *syme) *source->lines_tail = src; source->lines_tail = &src->next; - if (strlen(src->line)>8 && src->line[8] == ':') { - src->eip = strtoull(src->line, NULL, 16); - src->eip = map->unmap_ip(map, src->eip); - } - if (strlen(src->line)>8 && src->line[16] == ':') { - src->eip = strtoull(src->line, NULL, 16); - src->eip = map->unmap_ip(map, src->eip); - } + src->eip = strtoull(src->line, &sep, 16); + if (*sep == ':') + src->eip = map__objdump_2ip(map, src->eip); + else /* this line has no ip info (e.g. source line) */ + src->eip = 0; } pclose(file); out_assign: @@ -277,6 +274,9 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) goto out_unlock; for (line = syme->src->lines; line; line = line->next) { + /* skip lines without IP info */ + if (line->eip == 0) + continue; if (line->eip == ip) { line->count[counter]++; break; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index af5805f51314..138e3cb2b727 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -222,3 +222,11 @@ u64 map__rip_2objdump(struct map *map, u64 rip) rip; return addr; } + +u64 map__objdump_2ip(struct map *map, u64 addr) +{ + u64 ip = map->dso->adjust_symbols ? + addr : + map->unmap_ip(map, addr); /* RIP -> IP */ + return ip; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 9cee9c788dbf..86f77cb1d060 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -61,9 +61,9 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip) } -/* rip -> addr suitable for passing to `objdump --start-address=` */ +/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); - +u64 map__objdump_2ip(struct map *map, u64 addr); struct symbol; struct mmap_event; From 5f485364365f00853e5249cb3ae31f876936b552 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 7 Feb 2010 11:46:16 -0200 Subject: [PATCH 310/640] perf top: Use address pattern in lookup_sym_source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because we may have aliases, like __GI___strcoll_l in /lib64/libc-2.10.2.so that appears in objdump as: $ objdump --start-address=0x0000003715a86420 \ --stop-address=0x0000003715a872dc -dS /lib64/libc-2.10.2.so 0000003715a86420 <__strcoll_l>: 3715a86420: 55 push %rbp 3715a86421: 48 89 e5 mov %rsp,%rbp 3715a86424: 41 57 push %r15 [root@doppio linux-2.6-tip]# So look for the address exactly at the start of the line instead so that annotation can work for in these cases. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Kirill Smelkov Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1265550376-12665-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index befa57e2284d..c72ab50d65ca 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -202,10 +202,9 @@ static void parse_source(struct sym_entry *syme) len = sym->end - sym->start; sprintf(command, - "objdump --start-address=0x%016Lx " - "--stop-address=0x%016Lx -dS %s", - map__rip_2objdump(map, sym->start), - map__rip_2objdump(map, sym->end), path); + "objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s", + BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start), + BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path); file = popen(command, "r"); if (!file) @@ -292,13 +291,15 @@ static void lookup_sym_source(struct sym_entry *syme) { struct symbol *symbol = sym_entry__symbol(syme); struct source_line *line; - char pattern[PATH_MAX]; + const size_t pattern_len = BITS_PER_LONG / 4 + 2; + char pattern[pattern_len + 1]; - sprintf(pattern, "<%s>:", symbol->name); + sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4, + map__rip_2objdump(syme->map, symbol->start)); pthread_mutex_lock(&syme->src->lock); for (line = syme->src->lines; line; line = line->next) { - if (strstr(line->line, pattern)) { + if (memcmp(line->line, pattern, pattern_len) == 0) { syme->src->source = line; break; } From 076dc4a65a6d99a16979e2c7917e669fb8c91ee5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 5 Feb 2010 12:16:47 -0500 Subject: [PATCH 311/640] x86/alternatives: Fix build warning Fixes these warnings: arch/x86/kernel/alternative.c: In function 'alternatives_text_reserved': arch/x86/kernel/alternative.c:402: warning: comparison of distinct pointer types lacks a cast arch/x86/kernel/alternative.c:402: warning: comparison of distinct pointer types lacks a cast arch/x86/kernel/alternative.c:405: warning: comparison of distinct pointer types lacks a cast arch/x86/kernel/alternative.c:405: warning: comparison of distinct pointer types lacks a cast Caused by: 2cfa197: ftrace/alternatives: Introducing *_text_reserved functions Changes in v2: - Use local variables to compare, instead of type casts. Reported-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100205171647.15750.37221.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3c13284ff86d..e63b80e5861c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -395,12 +395,14 @@ int alternatives_text_reserved(void *start, void *end) { struct smp_alt_module *mod; u8 **ptr; + u8 *text_start = start; + u8 *text_end = end; list_for_each_entry(mod, &smp_alt_modules, next) { - if (mod->text > end || mod->text_end < start) + if (mod->text > text_end || mod->text_end < text_start) continue; for (ptr = mod->locks; ptr < mod->locks_end; ptr++) - if (start <= *ptr && end >= *ptr) + if (text_start <= *ptr && text_end >= *ptr) return 1; } From 80e1e823989ec44d8e35bdfddadbddcffec90424 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Feb 2010 10:11:23 -0800 Subject: [PATCH 312/640] Fix race in tty_fasync() properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 703625118069 ("tty: fix race in tty_fasync") and commit b04da8bfdfbb ("fnctl: f_modown should call write_lock_irqsave/ restore") that tried to fix up some of the fallout but was incomplete. It turns out that we really cannot hold 'tty->ctrl_lock' over calling __f_setown, because not only did that cause problems with interrupt disables (which the second commit fixed), it also causes a potential ABBA deadlock due to lock ordering. Thanks to Tetsuo Handa for following up on the issue, and running lockdep to show the problem. It goes roughly like this: - f_getown gets filp->f_owner.lock for reading without interrupts disabled, so an interrupt that happens while that lock is held can cause a lockdep chain from f_owner.lock -> sighand->siglock. - at the same time, the tty->ctrl_lock -> f_owner.lock chain that commit 703625118069 introduced, together with the pre-existing sighand->siglock -> tty->ctrl_lock chain means that we have a lock dependency the other way too. So instead of extending tty->ctrl_lock over the whole __f_setown() call, we now just take a reference to the 'pid' structure while holding the lock, and then release it after having done the __f_setown. That still guarantees that 'struct pid' won't go away from under us, which is all we really ever needed. Reported-and-tested-by: Tetsuo Handa Acked-by: Greg Kroah-Hartman Acked-by: Américo Wang Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 4 +++- fs/fcntl.c | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index c6f3b48be9dd..dcb9083ecde0 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1951,8 +1951,10 @@ static int tty_fasync(int fd, struct file *filp, int on) pid = task_pid(current); type = PIDTYPE_PID; } - retval = __f_setown(filp, pid, type, 0); + get_pid(pid); spin_unlock_irqrestore(&tty->ctrl_lock, flags); + retval = __f_setown(filp, pid, type, 0); + put_pid(pid); if (retval) goto out; } else { diff --git a/fs/fcntl.c b/fs/fcntl.c index 5ef953e6f908..97e01dc0d95f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -199,9 +199,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int force) { - unsigned long flags; - - write_lock_irqsave(&filp->f_owner.lock, flags); + write_lock_irq(&filp->f_owner.lock); if (force || !filp->f_owner.pid) { put_pid(filp->f_owner.pid); filp->f_owner.pid = get_pid(pid); @@ -213,7 +211,7 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, filp->f_owner.euid = cred->euid; } } - write_unlock_irqrestore(&filp->f_owner.lock, flags); + write_unlock_irq(&filp->f_owner.lock); } int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, From 142698282ceb6811ad3482c218b7292037cb67ff Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 27 Jan 2010 20:05:20 +0000 Subject: [PATCH 313/640] sh: Correct the offset of the return address in ret_from_exception The address that ret_from_exception and ret_from_irq will return to is found in the stack slot for SPC, not PR. This error was causing the DWARF unwinder to pick up the wrong return address on the stack and then unwind using the unwind tables for the wrong function. While I'm here I might as well add CFI annotations for the other registers since they could be useful when unwinding. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/kernel/entry-common.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index f0abd58c3a69..2b15ae60c3a0 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -70,8 +70,14 @@ ret_from_exception: CFI_STARTPROC simple CFI_DEF_CFA r14, 0 CFI_REL_OFFSET 17, 64 - CFI_REL_OFFSET 15, 0 + CFI_REL_OFFSET 15, 60 CFI_REL_OFFSET 14, 56 + CFI_REL_OFFSET 13, 52 + CFI_REL_OFFSET 12, 48 + CFI_REL_OFFSET 11, 44 + CFI_REL_OFFSET 10, 40 + CFI_REL_OFFSET 9, 36 + CFI_REL_OFFSET 8, 32 preempt_stop() ENTRY(ret_from_irq) ! From 1dca56f13899b9e256f56198026019835aaf9a3a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 27 Jan 2010 20:44:59 +0000 Subject: [PATCH 314/640] sh: Setup frame pointer in handle_exception path In order to allow the DWARF unwinder to unwind through exceptions we need to setup the frame pointer register (r14). Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh3/entry.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index 3f7e2a22c7c2..29021bc67493 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S @@ -365,6 +365,8 @@ handle_exception: mov.l @k2, k2 ! read out vector and keep in k2 handle_exception_special: + setup_frame_reg + ! Setup return address and jump to exception handler mov.l 7f, r9 ! fetch return address stc r2_bank, r0 ! k2 (vector) From 944a3438615da65f11e2559840404a2cac5f65ea Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 30 Jan 2010 17:36:20 +0000 Subject: [PATCH 315/640] sh: Don't continue unwinding across interrupts Unfortunately, due to poor DWARF info in current toolchains, unwinding through interrutps cannot be done reliably. The problem is that the DWARF info for function epilogues is wrong. Take this standard epilogue sequence, 80003cc4: e3 6f mov r14,r15 80003cc6: 26 4f lds.l @r15+,pr 80003cc8: f6 6e mov.l @r15+,r14 <---- interrupt here 80003cca: f6 6b mov.l @r15+,r11 80003ccc: f6 6a mov.l @r15+,r10 80003cce: f6 69 mov.l @r15+,r9 80003cd0: 0b 00 rts If we take an interrupt at the highlighted point, the DWARF info will bogusly claim that the return address can be found at some offset from the frame pointer, even though the frame pointer was just restored. The worst part is if the unwinder finds a text address at the bogus stack address - unwinding will continue, for a bit, until it finally comes across an unexpected address on the stack and blows up. The only solution is to stop unwinding once we've calculated the function that was executing when the interrupt occurred. This PC can be easily calculated from pt_regs->pc. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/kernel/dwarf.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 88d28ec3780a..e51168064e56 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -540,6 +540,8 @@ void dwarf_free_frame(struct dwarf_frame *frame) mempool_free(frame, dwarf_frame_pool); } +extern void ret_from_irq(void); + /** * dwarf_unwind_stack - unwind the stack * @@ -678,6 +680,24 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc, addr = frame->cfa + reg->addr; frame->return_addr = __raw_readl(addr); + /* + * Ah, the joys of unwinding through interrupts. + * + * Interrupts are tricky - the DWARF info needs to be _really_ + * accurate and unfortunately I'm seeing a lot of bogus DWARF + * info. For example, I've seen interrupts occur in epilogues + * just after the frame pointer (r14) had been restored. The + * problem was that the DWARF info claimed that the CFA could be + * reached by using the value of the frame pointer before it was + * restored. + * + * So until the compiler can be trusted to produce reliable + * DWARF info when it really matters, let's stop unwinding once + * we've calculated the function that was interrupted. + */ + if (prev && prev->pc == (unsigned long)ret_from_irq) + frame->return_addr = 0; + return frame; bail: From 1af0b2fc676009d9b5b71a82ea6a3c2b20b7ea56 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 30 Jan 2010 17:37:25 +0000 Subject: [PATCH 316/640] sh: Remove superfluous setup_frame_reg call There's no need to setup the frame pointer again in call_handle_tlbmiss. The frame pointer will already have been setup in handle_interrupt. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh3/entry.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index 29021bc67493..f6a389c996cb 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S @@ -132,7 +132,6 @@ ENTRY(tlb_protection_violation_store) mov #1, r5 call_handle_tlbmiss: - setup_frame_reg mov.l 1f, r0 mov r5, r8 mov.l @r0, r6 From 36350e00696df148507246c817cf6f86329479fd Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Sun, 7 Feb 2010 16:45:12 +0000 Subject: [PATCH 317/640] powerpc/pseries: Fix kexec regression caused by CPPR tracking The code to track the CPPR values added by commit 49bd3647134ea47420067aea8d1401e722bf2aac ("powerpc/pseries: Track previous CPPR values to correctly EOI interrupts") broke kexec on pseries because the kexec code in xics.c calls xics_set_cpu_priority() before the IPI has been EOI'ed. This wasn't a problem previously but it now triggers a BUG_ON in xics_set_cpu_priority() because os_cppr->index isn't 0. Fix this problem by setting the index on the CPPR stack to 0 before calling xics_set_cpu_priority() in xics_teardown_cpu(). Also make it clear that we only want to set the priority when there's just one CPPR value in the stack, and enforce it by updating the value of os_cppr->stack[0] rather than os_cppr->stack[os_cppr->index]. While we're at it change the BUG_ON to a WARN_ON. Reported-by: Anton Blanchard Signed-off-by: Mark Nelson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/xics.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 1ee66db003be..f5f79196721c 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -784,9 +784,13 @@ static void xics_set_cpu_priority(unsigned char cppr) { struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - BUG_ON(os_cppr->index != 0); + /* + * we only really want to set the priority when there's + * just one cppr value on the stack + */ + WARN_ON(os_cppr->index != 0); - os_cppr->stack[os_cppr->index] = cppr; + os_cppr->stack[0] = cppr; if (firmware_has_feature(FW_FEATURE_LPAR)) lpar_cppr_info(cppr); @@ -821,8 +825,14 @@ void xics_setup_cpu(void) void xics_teardown_cpu(void) { + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); int cpu = smp_processor_id(); + /* + * we have to reset the cppr index to 0 because we're + * not going to return from the IPI + */ + os_cppr->index = 0; xics_set_cpu_priority(0); /* Clear any pending IPI request */ From 1ca137cdcd4e11af03dbe073d48a470b833a456d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Feb 2010 19:02:24 +1000 Subject: [PATCH 318/640] drm/radeon/kms: change Kconfig text to reflect the new option. Ingo pointed out that we really don't give the user enough warning to make a decision here. So revise the Kconfig text with a better warning. Acked-by: Ingo Molnar Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/Kconfig | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index 5982321be4d5..1c02d23f6fcc 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -1,10 +1,14 @@ config DRM_RADEON_KMS - bool "Enable modesetting on radeon by default" + bool "Enable modesetting on radeon by default - NEW DRIVER" depends on DRM_RADEON help - Choose this option if you want kernel modesetting enabled by default, - and you have a new enough userspace to support this. Running old - userspaces with this enabled will cause pain. + Choose this option if you want kernel modesetting enabled by default. + + This is a completely new driver. It's only part of the existing drm + for compatibility reasons. It requires an entirely different graphics + stack above it and works very differently from the old drm stack. + i.e. don't enable this unless you know what you are doing it may + cause issues or bugs compared to the previous userspace driver stack. When kernel modesetting is enabled the IOCTL of radeon/drm driver are considered as invalid and an error message is printed From a9f0c381973097462d9688dc26fe66f4f020502e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 7 Feb 2010 23:10:04 -0800 Subject: [PATCH 319/640] Input: psmouse - make sure we don't schedule reconnects after cleanup Set state of the device as "initializing" during and after cleanup to ensure that unsolicited data from the device is not passed on. We especially want to avoid processing new device announcements "0xaa 0x00" that can come up before we perform reconnect operation. Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/psmouse-base.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 9774bdfaa482..d8c0c8d6992c 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -1141,7 +1141,14 @@ static void psmouse_cleanup(struct serio *serio) psmouse_deactivate(parent); } - psmouse_deactivate(psmouse); + psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); + + /* + * Disable stream mode so cleanup routine can proceed undisturbed. + */ + if (ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_DISABLE)) + printk(KERN_WARNING "psmouse.c: Failed to disable mouse on %s\n", + psmouse->ps2dev.serio->phys); if (psmouse->cleanup) psmouse->cleanup(psmouse); From a6013411118a6c8c34f1bd8b047b36fdf9711590 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 1 Feb 2010 12:15:58 +0100 Subject: [PATCH 320/640] microblaze: Invalidate dcache before enabling it We found that on write-trough kernel is necessary to do that invalidation. One WB is possible to use invalidation too. Signed-off-by: Michal Simek --- arch/microblaze/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 5372b24ad049..bb8c4b9ccb80 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -54,6 +54,7 @@ void __init setup_arch(char **cmdline_p) microblaze_cache_init(); + invalidate_dcache(); enable_dcache(); invalidate_icache(); From adefdceef4c1cefee2678724cd57824c8ca80091 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 1 Feb 2010 10:35:22 -0300 Subject: [PATCH 321/640] V4L/DVB: Fix the risk of an oops at dvb_dmx_release dvb_dmx_init tries to allocate virtual memory for 2 pointers: filter and feed. If the second vmalloc fails, filter is freed, but the pointer keeps pointing to the old place. Later, when dvb_dmx_release() is called, it will try to free an already freed memory, causing an OOPS. Reviewed-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/dvb-core/dvb_demux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c index b78cfb7d1897..a78408e76e75 100644 --- a/drivers/media/dvb/dvb-core/dvb_demux.c +++ b/drivers/media/dvb/dvb-core/dvb_demux.c @@ -1246,6 +1246,7 @@ int dvb_dmx_init(struct dvb_demux *dvbdemux) dvbdemux->feed = vmalloc(dvbdemux->feednum * sizeof(struct dvb_demux_feed)); if (!dvbdemux->feed) { vfree(dvbdemux->filter); + dvbdemux->filter = NULL; return -ENOMEM; } for (i = 0; i < dvbdemux->filternum; i++) { From bc081cc8693800ebb118cc2cc6a859dd0b45921b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 1 Feb 2010 11:50:42 -0300 Subject: [PATCH 322/640] V4L/DVB: dvb_demux: Don't use vmalloc at dvb_dmx_swfilter_packet As dvb_dmx_swfilter_packet() is protected by a spinlock, it shouldn't sleep. However, vmalloc() may call sleep. So, move the initialization of dvb_demux::cnt_storage field to a better place. Reviewed-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/dvb-core/dvb_demux.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c index a78408e76e75..67f189b7aa1f 100644 --- a/drivers/media/dvb/dvb-core/dvb_demux.c +++ b/drivers/media/dvb/dvb-core/dvb_demux.c @@ -426,16 +426,7 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) }; }; - if (dvb_demux_tscheck) { - if (!demux->cnt_storage) - demux->cnt_storage = vmalloc(MAX_PID + 1); - - if (!demux->cnt_storage) { - printk(KERN_WARNING "Couldn't allocate memory for TS/TEI check. Disabling it\n"); - dvb_demux_tscheck = 0; - goto no_dvb_demux_tscheck; - } - + if (demux->cnt_storage) { /* check pkt counter */ if (pid < MAX_PID) { if (buf[1] & 0x80) @@ -454,7 +445,6 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) }; /* end check */ }; -no_dvb_demux_tscheck: list_for_each_entry(feed, &demux->feed_list, list_head) { if ((feed->pid != pid) && (feed->pid != 0x2000)) @@ -1258,6 +1248,13 @@ int dvb_dmx_init(struct dvb_demux *dvbdemux) dvbdemux->feed[i].index = i; } + if (dvb_demux_tscheck) { + dvbdemux->cnt_storage = vmalloc(MAX_PID + 1); + + if (!dvbdemux->cnt_storage) + printk(KERN_WARNING "Couldn't allocate memory for TS/TEI check. Disabling it\n"); + } + INIT_LIST_HEAD(&dvbdemux->frontend_list); for (i = 0; i < DMX_TS_PES_OTHER; i++) { From 691c9ae099b9bcb5c27125af00a4a90120977458 Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Sun, 7 Feb 2010 09:49:58 -0300 Subject: [PATCH 323/640] V4L/DVB: dvb-core: fix initialization of feeds list in demux filter A DVB demultiplexer device can be used to set up either a PES filter or a section filter. In the former case, the ts field of the feed union of struct dmxdev_filter is used, in the latter case the sec field of the same union is used. The ts field is a struct list_head, and is currently initialized in the open() method of the demux device. When for a given demuxer a section filter is set up, the sec field is played with, thus if a PES filter needs to be set up after that the ts field will be corrupted, causing a kernel oops. This fix moves the list head initialization to dvb_dmxdev_pes_filter_set(), so that the ts field is properly initialized every time a PES filter is set up. Signed-off-by: Francesco Lavra Cc: stable Reviewed-by: Andy Walls Tested-by: hermann pitton Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/dvb-core/dmxdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c index c37790ad92d0..9ddc57909d49 100644 --- a/drivers/media/dvb/dvb-core/dmxdev.c +++ b/drivers/media/dvb/dvb-core/dmxdev.c @@ -761,7 +761,6 @@ static int dvb_demux_open(struct inode *inode, struct file *file) dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192); dmxdevfilter->type = DMXDEV_TYPE_NONE; dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED); - INIT_LIST_HEAD(&dmxdevfilter->feed.ts); init_timer(&dmxdevfilter->timer); dvbdev->users++; @@ -887,6 +886,7 @@ static int dvb_dmxdev_pes_filter_set(struct dmxdev *dmxdev, dmxdevfilter->type = DMXDEV_TYPE_PES; memcpy(&dmxdevfilter->params, params, sizeof(struct dmx_pes_filter_params)); + INIT_LIST_HEAD(&dmxdevfilter->feed.ts); dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET); From f7e7ee36757f68778700cde1aaed89e1d23e59fd Mon Sep 17 00:00:00 2001 From: "austin_zhang@linux.intel.com" Date: Fri, 5 Feb 2010 09:02:42 -0800 Subject: [PATCH 324/640] perf record: Fix existing process callgraph symbol When 'perf record -g' a existing process, even with debuginfo packages, still cannnot get symbol from 'perf report'. try: perf record -g -p `pidof xxx` -f perf report 68.26% :1181 b74870f2 [.] 0x000000b74870f2 | |--32.09%-- 0xb73b5b44 | 0xb7487102 | 0xb748a4e2 | 0xb748633d | 0xb73b41cd | 0xb73b4467 | 0xb747d531 The reason is: for existing process, in __cmd_record(), the pid is 0 rather than the existing process id. Signed-off-by: Austin Zhang Acked-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4710.10.255.24.35.1265389362.squirrel@linux.intel.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3ad599b12c91..771533ced6a8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -570,7 +570,7 @@ static int __cmd_record(int argc, const char **argv) } if (!system_wide && profile_cpu == -1) - event__synthesize_thread(pid, process_synthesized_event, + event__synthesize_thread(target_pid, process_synthesized_event, session); else event__synthesize_threads(process_synthesized_event, session); From ccd4bb1beb3316de4611de24d223ad761b5a7e95 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 8 Feb 2010 17:39:58 +0000 Subject: [PATCH 325/640] [CIFS] Don't cache timestamps on utimes due to coarse granularity force revalidate of the file when any of the timestamps are set since some filesytem types do not have finer granularity timestamps and we can not always detect which file systems round timestamps down to determine whether we can cache the mtime on setattr samba bugzilla 3775 Acked-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/inode.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index cf18ee765590..e3fda978f481 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1762,8 +1762,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) CIFS_MOUNT_MAP_SPECIAL_CHR); } - if (!rc) + if (!rc) { rc = inode_setattr(inode, attrs); + + /* force revalidate when any of these times are set since some + of the fs types (eg ext3, fat) do not have fine enough + time granularity to match protocol, and we do not have a + a way (yet) to query the server fs's time granularity (and + whether it rounds times down). + */ + if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))) + cifsInode->time = 0; + } out: kfree(args); kfree(full_path); From 05507fa2ac8d5e503bcf33ee43329449027d9060 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Feb 2010 13:30:36 -0500 Subject: [PATCH 326/640] cifs: fix dentry hash calculation for case-insensitive mounts case-insensitive mounts shouldn't use full_name_hash(). Make sure we use the parent dentry's d_hash routine when one is set. Reported-by: Dave Kleikamp Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/readdir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f5618f8cc462..c343b14ba2d3 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -77,6 +77,11 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, cFYI(1, ("For %s", name->name)); + if (parent->d_op && parent->d_op->d_hash) + parent->d_op->d_hash(parent, name); + else + name->hash = full_name_hash(name->name, name->len); + dentry = d_lookup(parent, name); if (dentry) { /* FIXME: check for inode number changes? */ @@ -671,8 +676,6 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, pqst->name = filename; pqst->len = len; } - pqst->hash = full_name_hash(pqst->name, pqst->len); -/* cFYI(1, ("filldir on %s",pqst->name)); */ return rc; } From 9edd7ca0a3e3999c260642c92fa008892d82ca6e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:16:26 -0800 Subject: [PATCH 327/640] netfilter: nf_conntrack: fix memory corruption with multiple namespaces As discovered by Jon Masters , the "untracked" conntrack, which is located in the data section, might be accidentally freed when a new namespace is instantiated while the untracked conntrack is attached to a skb because the reference count it re-initialized. The best fix would be to use a seperate untracked conntrack per namespace since it includes a namespace pointer. Unfortunately this is not possible without larger changes since the namespace is not easily available everywhere we need it. For now move the untracked conntrack initialization to the init_net setup function to make sure the reference count is not re-initialized and handle cleanup in the init_net cleanup function to make sure namespaces can exit properly while the untracked conntrack is in use in other namespaces. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0e98c3282d42..37e2b88313f2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1113,6 +1113,10 @@ static void nf_ct_release_dying_list(struct net *net) static void nf_conntrack_cleanup_init_net(void) { + /* wait until all references to nf_conntrack_untracked are dropped */ + while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) + schedule(); + nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); kmem_cache_destroy(nf_conntrack_cachep); @@ -1127,9 +1131,6 @@ static void nf_conntrack_cleanup_net(struct net *net) schedule(); goto i_see_dead_people; } - /* wait until all references to nf_conntrack_untracked are dropped */ - while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) - schedule(); nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); @@ -1288,6 +1289,14 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_helper; + /* Set up fake conntrack: to never be deleted, not in any hashes */ +#ifdef CONFIG_NET_NS + nf_conntrack_untracked.ct_net = &init_net; +#endif + atomic_set(&nf_conntrack_untracked.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); + return 0; err_helper: @@ -1333,15 +1342,6 @@ static int nf_conntrack_init_net(struct net *net) if (ret < 0) goto err_ecache; - /* Set up fake conntrack: - - to never be deleted, not in any hashes */ -#ifdef CONFIG_NET_NS - nf_conntrack_untracked.ct_net = &init_net; -#endif - atomic_set(&nf_conntrack_untracked.ct_general.use, 1); - /* - and look it like as a confirmed connection */ - set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - return 0; err_ecache: From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 11:16:56 -0800 Subject: [PATCH 328/640] netfilter: nf_conntrack: per netns nf_conntrack_cachep nf_conntrack_cachep is currently shared by all netns instances, but because of SLAB_DESTROY_BY_RCU special semantics, this is wrong. If we use a shared slab cache, one object can instantly flight between one hash table (netns ONE) to another one (netns TWO), and concurrent reader (doing a lookup in netns ONE, 'finding' an object of netns TWO) can be fooled without notice, because no RCU grace period has to be observed between object freeing and its reuse. We dont have this problem with UDP/TCP slab caches because TCP/UDP hashtables are global to the machine (and each object has a pointer to its netns). If we use per netns conntrack hash tables, we also *must* use per netns conntrack slab caches, to guarantee an object can not escape from one namespace to another one. Signed-off-by: Eric Dumazet [Patrick: added unique slab name allocation] Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 39 ++++++++++++++++++------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index ba1ba0c5efd1..aed23b6c8478 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; @@ -28,5 +29,6 @@ struct netns_ct { #endif int hash_vmalloc; int expect_vmalloc; + char *slabname; }; #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 37e2b88313f2..9de4bd4c0dd7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -static struct kmem_cache *nf_conntrack_cachep __read_mostly; - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_DESTROY_BY_RCU. */ - ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); + ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&net->ct.count); @@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); atomic_dec(&net->ct.count); nf_ct_ext_free(ct); - kmem_cache_free(nf_conntrack_cachep, ct); + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); - kmem_cache_destroy(nf_conntrack_cachep); } static void nf_conntrack_cleanup_net(struct net *net) @@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); free_percpu(net->ct.stat); } @@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void) NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); - nf_conntrack_cachep = kmem_cache_create("nf_conntrack", - sizeof(struct nf_conn), - 0, SLAB_DESTROY_BY_RCU, NULL); - if (!nf_conntrack_cachep) { - printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - ret = -ENOMEM; - goto err_cache; - } - ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void) err_helper: nf_conntrack_proto_fini(); err_proto: - kmem_cache_destroy(nf_conntrack_cachep); -err_cache: return ret; } @@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_stat; } + + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + if (!net->ct.slabname) { + ret = -ENOMEM; + goto err_slabname; + } + + net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, + sizeof(struct nf_conn), 0, + SLAB_DESTROY_BY_RCU, NULL); + if (!net->ct.nf_conntrack_cachep) { + printk(KERN_ERR "Unable to create nf_conn slab cache\n"); + ret = -ENOMEM; + goto err_cache; + } net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { @@ -1352,6 +1355,10 @@ err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); err_hash: + kmem_cache_destroy(net->ct.nf_conntrack_cachep); +err_cache: + kfree(net->ct.slabname); +err_slabname: free_percpu(net->ct.stat); err_stat: return ret; From 13ccdfc2af03e09e60791f7d4bc4ccf53398af7c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 8 Feb 2010 11:17:22 -0800 Subject: [PATCH 329/640] netfilter: nf_conntrack: restrict runtime expect hashsize modifications Expectation hashtable size was simply glued to a variable with no code to rehash expectations, so it was a bug to allow writing to it. Make "expect_hashsize" readonly. Signed-off-by: Alexey Dobriyan Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index fdf5d2a1d9b4..4ad7d1d809af 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -569,7 +569,7 @@ static void exp_proc_remove(struct net *net) #endif /* CONFIG_PROC_FS */ } -module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); +module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); int nf_conntrack_expect_init(struct net *net) { From 14c7dbe043d01a83a30633ab6b109ba2ac61d9f7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 8 Feb 2010 11:17:43 -0800 Subject: [PATCH 330/640] netfilter: xtables: compat out of scope fix As per C99 6.2.4(2) when temporary table data goes out of scope, the behaviour is undefined: if (compat) { struct foo tmp; ... private = &tmp; } [dereference private] Signed-off-by: Alexey Dobriyan Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 06632762ba5f..90203e1b9187 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -925,10 +925,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 572330a552ef..3ce53cf13d5a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 480d7f8c9802..8a7e0f52e177 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1164,10 +1164,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET6); private = &tmp; From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:18:07 -0800 Subject: [PATCH 331/640] netfilter: nf_conntrack: fix hash resizing with namespaces As noticed by Jon Masters , the conntrack hash size is global and not per namespace, but modifiable at runtime through /sys/module/nf_conntrack/hashsize. Changing the hash size will only resize the hash in the current namespace however, so other namespaces will use an invalid hash size. This can cause crashes when enlarging the hashsize, or false negative lookups when shrinking it. Move the hash size into the per-namespace data and only use the global hash size to initialize the per-namespace value when instanciating a new namespace. Additionally restrict hash resizing to init_net for now as other namespaces are not handled currently. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netns/conntrack.h | 1 + include/net/netns/ipv4.h | 1 + .../netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- .../nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/ipv4/netfilter/nf_nat_core.c | 22 ++++---- net/netfilter/nf_conntrack_core.c | 53 ++++++++++--------- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 7 +-- 10 files changed, 49 insertions(+), 47 deletions(-) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index aed23b6c8478..63d449807d9b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + unsigned int htable_size; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d6258..9a4b8b714079 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,7 @@ struct netns_ipv4 { struct xt_table *iptable_security; struct xt_table *nat_table; struct hlist_head *nat_bysource; + unsigned int nat_htable_size; int nat_vmalloced; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d171b123a656..d1ea38a7c490 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = { }, { .procname = "ip_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8668a3defda6..2fb7b76da94f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe1a64479dd0..26066a2327ad 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; -/* Calculated at init based on memory size */ -static unsigned int nf_nat_htable_size __read_mostly; - #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; @@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) { unsigned int hash; @@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple) hash = jhash_3words((__force u32)tuple->src.u3.ip, (__force u32)tuple->src.u.all, tuple->dst.protonum, 0); - return ((u64)hash * nf_nat_htable_size) >> 32; + return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } /* Is this tuple already taken? (not by us) */ @@ -147,7 +144,7 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(tuple); + unsigned int h = hash_by_src(net, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); @@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int __net_init nf_nat_net_init(struct net *net) { - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &net->ipv4.nat_vmalloced, 0); + /* Leave them the same for the moment. */ + net->ipv4.nat_htable_size = net->ct.htable_size; + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, + &net->ipv4.nat_vmalloced, 0); if (!net->ipv4.nat_bysource) return -ENOMEM; return 0; @@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) nf_ct_iterate_cleanup(net, &clean_nat, NULL); synchronize_rcu(); nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, - nf_nat_htable_size); + net->ipv4.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { @@ -724,9 +723,6 @@ static int __init nf_nat_init(void) return ret; } - /* Leave them the same for the moment. */ - nf_nat_htable_size = nf_conntrack_htable_size; - ret = register_pernet_subsys(&nf_nat_net_ops); if (ret < 0) goto cleanup_extend; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9de4bd4c0dd7..4d79e3c1616c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) +static inline u_int32_t hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, nf_conntrack_htable_size, + return __hash_conntrack(tuple, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, void nf_conntrack_hash_insert(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. @@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) int dropped = 0; rcu_read_lock(); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); @@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) if (cnt >= NF_CT_EVICTION_RANGE) break; - hash = (hash + 1) % nf_conntrack_htable_size; + hash = (hash + 1) % net->ct.htable_size; } rcu_read_unlock(); @@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(orig); + unsigned int hash = hash_conntrack(net, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), struct hlist_nulls_node *n; spin_lock_bh(&nf_conntrack_lock); - for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + for (; *bucket < net->ct.htable_size; (*bucket)++) { hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) @@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); @@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) { int i, bucket, vmalloced, old_vmalloced; unsigned int hashsize, old_size; - int rnd; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + if (current->nsproxy->net_ns != &init_net) + return -EOPNOTSUPP; + /* On boot, we can set this without any fancy locking. */ if (!nf_conntrack_htable_size) return param_set_uint(val, kp); @@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; - /* We have to rehahs for the new table anyway, so we also can - * use a newrandom seed */ - get_random_bytes(&rnd, sizeof(rnd)); - /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections * created because of a false negative won't make it into the hash * though since that required taking the lock. */ spin_lock_bh(&nf_conntrack_lock); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < init_net.ct.htable_size; i++) { while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + bucket = __hash_conntrack(&h->tuple, hashsize, + nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } - old_size = nf_conntrack_htable_size; + old_size = init_net.ct.htable_size; old_vmalloced = init_net.ct.hash_vmalloc; old_hash = init_net.ct.hash; - nf_conntrack_htable_size = hashsize; + init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; init_net.ct.hash_vmalloc = vmalloced; init_net.ct.hash = hash; - nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); @@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_cache; } - net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + + net->ct.htable_size = nf_conntrack_htable_size; + net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { ret = -ENOMEM; @@ -1353,7 +1356,7 @@ err_acct: nf_conntrack_expect_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); err_hash: kmem_cache_destroy(net->ct.nf_conntrack_cachep); err_cache: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4ad7d1d809af..2f25ff610982 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net) if (net_eq(net, &init_net)) { if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + nf_ct_expect_hsize = net->ct.htable_size / 256; if (!nf_ct_expect_hsize) nf_ct_expect_hsize = 1; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 65c2a7bc3afc..4b1a56bd074c 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, /* Get rid of expecteds, set helpers to NULL. */ hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) unhelp(h, me); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 42f21c01a93e..0ffe689dfe97 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); last = (struct nf_conn *)cb->args[1]; - for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { + for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) { restart: hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnnode) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 028aba667ef7..e310f1561bb2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); @@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, @@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; + table[2].data = &net->ct.htable_size; table[3].data = &net->ct.sysctl_checksum; table[4].data = &net->ct.sysctl_log_invalid; From 9858ae38011d699d4c2fa7f3493a47accf43a0f5 Mon Sep 17 00:00:00 2001 From: "Kashyap, Desai" Date: Mon, 25 Jan 2010 16:20:52 +0530 Subject: [PATCH 332/640] [SCSI] mptfusion : mptscsih_abort return value should be SUCCESS instead of value 0. retval should be SUCCESS/FAILED which is defined at scsi.h retval = 0 is directing wrong return value. It must be retval = SUCCESS. Signed-off-by: Kashyap Desai Signed-off-by: James Bottomley --- drivers/message/fusion/mptscsih.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 57752751712b..81279b3d694c 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1796,7 +1796,7 @@ mptscsih_abort(struct scsi_cmnd * SCpnt) dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "task abort: " "Command not in the active list! (sc=%p)\n", ioc->name, SCpnt)); - retval = 0; + retval = SUCCESS; goto out; } From 7dec9cf1dfa283feca4b761160112ea4838a6a8c Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Tue, 26 Jan 2010 17:49:19 +0100 Subject: [PATCH 333/640] [SCSI] zfcp: Report FC BSG errors in correct field The status FC_CTELS_STATUS_REJECT for all FC BSG errors is not appropriate. Instead, report -EIO in the result field if there was a problem in zfcp with the FC BSG request. If the request is good from our point of view, report result 0, status FC_CTELS_STATUS_OK and let userspace read the Accept or Reject from the payload (as documented in scsi_bsg_fc.h). Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 0f7b493fb105..271399f62f1b 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -671,12 +671,11 @@ static void zfcp_fc_ct_els_job_handler(void *data) { struct fc_bsg_job *job = data; struct zfcp_fsf_ct_els *zfcp_ct_els = job->dd_data; - int status = zfcp_ct_els->status; - int reply_status; + struct fc_bsg_reply *jr = job->reply; - reply_status = status ? FC_CTELS_STATUS_REJECT : FC_CTELS_STATUS_OK; - job->reply->reply_data.ctels_reply.status = reply_status; - job->reply->reply_payload_rcv_len = job->reply_payload.payload_len; + jr->reply_payload_rcv_len = job->reply_payload.payload_len; + jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK; + jr->result = zfcp_ct_els->status ? -EIO : 0; job->job_done(job); } From 0f19bc681ed0849a2b95778460a0a8132e3700e2 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Fri, 29 Jan 2010 18:09:30 +0800 Subject: [PATCH 334/640] [SCSI] qla2xxx: make msix interrupt handler safe for irq Yinghai has reported a lockdep warning on qla2xxx: [ 77.965784] WARNING: at kernel/lockdep.c:2332 trace_hardirqs_on_caller+0xc6/0x14b() [ 77.977492] Hardware name: Sun [ 77.979485] Modules linked in: [ 77.994337] Pid: 0, comm: swapper Not tainted 2.6.33-rc4-tip-yh-03949-g3a8e3f5-dirty #64 [ 78.000120] Call Trace: [ 78.013298] [] warn_slowpath_common+0x7c/0x94 [ 78.017746] [] ? _raw_spin_unlock_irq+0x30/0x36 [ 78.035171] [] warn_slowpath_null+0x14/0x16 [ 78.040152] [] trace_hardirqs_on_caller+0xc6/0x14b [ 78.055400] [] trace_hardirqs_on+0xd/0xf [ 78.058951] [] _raw_spin_unlock_irq+0x30/0x36 [ 78.074889] [] qla24xx_msix_default+0x243/0x281 [ 78.091598] [] ? __lock_release+0xa5/0xae [ 78.096799] [] handle_IRQ_event+0x53/0x113 [ 78.111568] [] handle_edge_irq+0xf3/0x13b [ 78.116255] [] handle_irq+0x24/0x2f [ 78.132063] [] do_IRQ+0x5c/0xc3 [ 78.134684] [] ret_from_intr+0x0/0xf [ 78.137903] [] ? mwait_idle+0xaf/0xbb [ 78.155674] [] ? mwait_idle+0xa6/0xbb [ 78.158600] [] cpu_idle+0x61/0xa1 [ 78.174333] [] rest_init+0x7e/0x80 [ 78.178122] [] start_kernel+0x316/0x31d [ 78.193623] [] x86_64_start_reservations+0xa7/0xab [ 78.198924] [] x86_64_start_kernel+0xe4/0xeb [ 78.214540] ---[ end trace be4529f30a2e4ef5 ]--- This was happened when qla2xxx msix interrupt handler is trying to enable IRQs by spin_unlock_irq(). We should make interrupt handler safe for IRQs, use spin_lock_irqsave/spin_unlock_irqrestore, this will not break the IRQs status in interrupt handler. Reported-by: Yinghai Lu Signed-off-by: Xiaotian Feng Acked-by: Giridhar Malavali Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_isr.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index ffd0efdff40e..0ced91c5ebd3 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1917,6 +1917,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id) struct rsp_que *rsp; struct device_reg_24xx __iomem *reg; struct scsi_qla_host *vha; + unsigned long flags; rsp = (struct rsp_que *) dev_id; if (!rsp) { @@ -1927,7 +1928,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id) ha = rsp->hw; reg = &ha->iobase->isp24; - spin_lock_irq(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); vha = qla25xx_get_host(rsp); qla24xx_process_response_queue(vha, rsp); @@ -1935,7 +1936,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id) WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); RD_REG_DWORD_RELAXED(®->hccr); } - spin_unlock_irq(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); return IRQ_HANDLED; } @@ -1946,6 +1947,7 @@ qla25xx_msix_rsp_q(int irq, void *dev_id) struct qla_hw_data *ha; struct rsp_que *rsp; struct device_reg_24xx __iomem *reg; + unsigned long flags; rsp = (struct rsp_que *) dev_id; if (!rsp) { @@ -1958,10 +1960,10 @@ qla25xx_msix_rsp_q(int irq, void *dev_id) /* Clear the interrupt, if enabled, for this response queue */ if (rsp->options & ~BIT_6) { reg = &ha->iobase->isp24; - spin_lock_irq(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); RD_REG_DWORD_RELAXED(®->hccr); - spin_unlock_irq(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } queue_work_on((int) (rsp->id - 1), ha->wq, &rsp->q_work); @@ -1979,6 +1981,7 @@ qla24xx_msix_default(int irq, void *dev_id) uint32_t stat; uint32_t hccr; uint16_t mb[4]; + unsigned long flags; rsp = (struct rsp_que *) dev_id; if (!rsp) { @@ -1990,7 +1993,7 @@ qla24xx_msix_default(int irq, void *dev_id) reg = &ha->iobase->isp24; status = 0; - spin_lock_irq(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); vha = pci_get_drvdata(ha->pdev); do { stat = RD_REG_DWORD(®->host_status); @@ -2039,7 +2042,7 @@ qla24xx_msix_default(int irq, void *dev_id) } WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); } while (0); - spin_unlock_irq(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags) && (status & MBX_INTERRUPT) && ha->flags.mbox_int) { From 84eb8fb42c120ff32b201c1cdd910033c888f699 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 5 Jan 2010 19:41:44 +0900 Subject: [PATCH 335/640] [SCSI] compat_ioct: fix bsg SG_IO bsg's SG_IO doesn't work on 32-bit userspace and 64-bit kernelspace. The problem is that both sg and bsg drivers use SG_IO ioctl. sg_ioctl_trans() does 32/64-bit conversion even against bsg header. It messes up bsg header. bsg driver gets garbage. This patch fixes sg_ioctl_trans to handle only sg header (struct sg_io_hdr). Reported-by: Giridhar Malavali Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- fs/compat_ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c5c45de1a2ee..7cbbc7ab4b50 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -301,6 +301,12 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, u32 data; void __user *dxferp; int err; + int interface_id; + + if (get_user(interface_id, &sgio32->interface_id)) + return -EFAULT; + if (interface_id != 'S') + return sys_ioctl(fd, cmd, (unsigned long)sgio32); if (get_user(iovec_count, &sgio32->iovec_count)) return -EFAULT; From a67093d46e3caed1a42d694a7de452b61db30562 Mon Sep 17 00:00:00 2001 From: Anirban Chakraborty Date: Thu, 4 Feb 2010 14:17:59 -0800 Subject: [PATCH 336/640] [SCSI] qla2xxx: Obtain proper host structure during response-queue processing. Original code incorrectly assumed only status-type-0 IOCBs would be queued to the response-queue, and thus all entries would safely reference a VHA from the IOCB 'handle.' Cc: stable@kernel.org Signed-off-by: Giridhar Malavali Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_gbl.h | 1 - drivers/scsi/qla2xxx/qla_isr.c | 29 +---------------------------- drivers/scsi/qla2xxx/qla_mid.c | 8 +++++--- 3 files changed, 6 insertions(+), 32 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index f61fb8d01330..8bc6f53691e9 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -453,6 +453,5 @@ extern void qla24xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t); extern void qla25xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t); extern void qla25xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t); extern void qla24xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t); -extern struct scsi_qla_host * qla25xx_get_host(struct rsp_que *); #endif /* _QLA_GBL_H */ diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 0ced91c5ebd3..6fc63b98818c 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1930,7 +1930,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id) spin_lock_irqsave(&ha->hardware_lock, flags); - vha = qla25xx_get_host(rsp); + vha = pci_get_drvdata(ha->pdev); qla24xx_process_response_queue(vha, rsp); if (!ha->flags.disable_msix_handshake) { WRT_REG_DWORD(®->hccr, HCCRX_CLR_RISC_INT); @@ -2280,30 +2280,3 @@ int qla25xx_request_irq(struct rsp_que *rsp) msix->rsp = rsp; return ret; } - -struct scsi_qla_host * -qla25xx_get_host(struct rsp_que *rsp) -{ - srb_t *sp; - struct qla_hw_data *ha = rsp->hw; - struct scsi_qla_host *vha = NULL; - struct sts_entry_24xx *pkt; - struct req_que *req; - uint16_t que; - uint32_t handle; - - pkt = (struct sts_entry_24xx *) rsp->ring_ptr; - que = MSW(pkt->handle); - handle = (uint32_t) LSW(pkt->handle); - req = ha->req_q_map[que]; - if (handle < MAX_OUTSTANDING_COMMANDS) { - sp = req->outstanding_cmds[handle]; - if (sp) - return sp->fcport->vha; - else - goto base_que; - } -base_que: - vha = pci_get_drvdata(ha->pdev); - return vha; -} diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index b901aa267e7d..ff17dee28613 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -636,13 +636,15 @@ failed: static void qla_do_work(struct work_struct *work) { + unsigned long flags; struct rsp_que *rsp = container_of(work, struct rsp_que, q_work); struct scsi_qla_host *vha; + struct qla_hw_data *ha = rsp->hw; - spin_lock_irq(&rsp->hw->hardware_lock); - vha = qla25xx_get_host(rsp); + spin_lock_irqsave(&rsp->hw->hardware_lock, flags); + vha = pci_get_drvdata(ha->pdev); qla24xx_process_response_queue(vha, rsp); - spin_unlock_irq(&rsp->hw->hardware_lock); + spin_unlock_irqrestore(&rsp->hw->hardware_lock, flags); } /* create response queue */ From 562ada612058133a5483c68a73605f3c5f42fffe Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 15 Jan 2010 18:54:03 -0600 Subject: [PATCH 337/640] net/9p: fix virtio transport to correctly update status on connect The 9p virtio transport was not updating its connection status correctly preventing it from being able to mount the server. Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index ea1e3daabefe..67c4bc704c5a 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -311,6 +311,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) } client->trans = (void *)chan; + client->status = Connected; chan->client = client; return 0; From 349d3bb878d71978650a0634b5445af3c1cc1cd8 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 15 Jan 2010 19:01:10 -0600 Subject: [PATCH 338/640] net/9p: fail when user specifies a transport which we can't find If the user specifies a transport and we can't find it, we failed back to the default trainsport silently. This patch will make the code complain more loudly and return an error code. Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/9p/client.c b/net/9p/client.c index 8af95b2dddd6..90a2eb926d19 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -108,6 +108,13 @@ static int parse_opts(char *opts, struct p9_client *clnt) break; case Opt_trans: clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); + if(clnt->trans_mod == NULL) { + P9_DPRINTK(P9_DEBUG_ERROR, + "Could not find request transport: %s\n", + (char *) &args[0]); + ret = -EINVAL; + goto free_and_return; + } break; case Opt_legacy: clnt->dotu = 0; @@ -117,6 +124,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) } } +free_and_return: kfree(options); return ret; } From 9d6939dac77102b09396ee0b89392ec7639612a7 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Fri, 15 Jan 2010 19:01:56 -0600 Subject: [PATCH 339/640] net/9p: fix statsize inside twstat stat structures contain a size prefix. In our twstat messages we were including the size of the size prefix in the prefix, which is not what the protocol wants, and Inferno servers would complain. Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index 90a2eb926d19..a2e2d61b903b 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1222,10 +1222,11 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional) { int ret; + /* NOTE: size shouldn't include its own length */ /* size[2] type[2] dev[4] qid[13] */ /* mode[4] atime[4] mtime[4] length[8]*/ /* name[s] uid[s] gid[s] muid[s] */ - ret = 2+2+4+13+4+4+4+8+2+2+2+2; + ret = 2+4+13+4+4+4+8+2+2+2+2; if (wst->name) ret += strlen(wst->name); @@ -1266,7 +1267,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); - req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst); + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; From 260c64d23532caf19abb77e696971da05c388489 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Feb 2010 13:42:26 -0500 Subject: [PATCH 340/640] Revert "nfsd4: fix error return when pseudoroot missing" Commit f39bde24b275ddc45d fixed the error return from PUTROOTFH in the case where there is no pseudofilesystem. This is really a case we shouldn't hit on a correctly configured server: in the absence of a root filehandle, there's no point accepting version 4 NFS rpc calls at all. But the shared responsibility between kernel and userspace here means the kernel on its own can't eliminate the possiblity of this happening. And we have indeed gotten this wrong in distro's, so new client-side mount code that attempts to negotiate v4 by default first has to work around this case. Therefore when commit f39bde24b275ddc45d arrived at roughly the same time as the new v4-default mount code, which explicitly checked only for the previous error, the result was previously fine mounts suddenly failing. We'll fix both sides for now: revert the error change, and make the client-side mount workaround more robust. Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c487810a2366..a0c4016413f1 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1316,19 +1316,11 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp) { - struct svc_export *exp; u32 fsidv[2]; mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); - /* - * We shouldn't have accepting an nfsv4 request at all if we - * don't have a pseudoexport!: - */ - if (IS_ERR(exp) && PTR_ERR(exp) == -ENOENT) - exp = ERR_PTR(-ESERVERFAULT); - return exp; + return rqst_exp_find(rqstp, FSID_NUM, fsidv); } /* From 86a06abab0ffbb9d8ce2b7f6b6652412ce2d2c36 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 5 Feb 2010 17:55:56 -0800 Subject: [PATCH 341/640] ocfs2/dlm: Fix printing of lockname The debug call printing the name of the lock resource was chopping off the last character. This patch fixes the problem. Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmdebug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 42b0bad7a612..0cd24cf54396 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) assert_spin_locked(&res->spinlock); stringify_lockname(res->lockname.name, res->lockname.len, - buf, sizeof(buf) - 1); + buf, sizeof(buf)); printk("lockres: %s, owner=%u, state=%u\n", buf, res->owner, res->state); printk(" last used: %lu, refcnt: %u, on purge list: %s\n", From 6efd806634f7526f723f3aa7ceffd3887a932d9c Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 5 Feb 2010 15:41:23 -0800 Subject: [PATCH 342/640] ocfs2/cluster: Make o2net connect messages KERN_NOTICE Connect and disconnect messages are more than informational as they are required during root cause analysis for failures. This patch changes them from KERN_INFO to KERN_NOTICE. Signed-off-by: Sunil Mushran Acked-by: Mark Faseh Signed-off-by: Joel Becker --- fs/ocfs2/cluster/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 938ba181a3d9..d8d0c65ac03c 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -485,7 +485,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - printk(KERN_INFO "o2net: no longer connected to " + printk(KERN_NOTICE "o2net: no longer connected to " SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -493,7 +493,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, if (!was_valid && valid) { o2quo_conn_up(o2net_num_from_nn(nn)); cancel_delayed_work(&nn->nn_connect_expired); - printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", + printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", o2nm_this_node() > sc->sc_node->nd_num ? "connected to" : "accepted connection from", SC_NODEF_ARGS(sc)); @@ -1476,7 +1476,7 @@ static void o2net_idle_timer(unsigned long data) do_gettimeofday(&now); - printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " + printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), o2net_idle_timeout() / 1000, o2net_idle_timeout() % 1000); From 0da780c269957783d341fc3559e6b4c9912af7b4 Mon Sep 17 00:00:00 2001 From: Benoit Papillault Date: Fri, 5 Feb 2010 01:21:03 +0100 Subject: [PATCH 343/640] mac80211: Fix probe request filtering in IBSS mode We only reply to probe request if either the requested SSID is the broadcast SSID or if the requested SSID matches our own SSID. This latter case was not properly handled since we were replying to different SSID with the same length as our own SSID. Signed-off-by: Benoit Papillault Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 1f2db647bb5c..22f0c2aa7a89 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -647,7 +647,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, } if (pos[1] != 0 && (pos[1] != ifibss->ssid_len || - !memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) { + memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) { /* Ignore ProbeReq for foreign SSID */ return; } From 098dfded5b1b09927995e89c6d689f85a0f53384 Mon Sep 17 00:00:00 2001 From: Wey-Yi Guy Date: Fri, 5 Feb 2010 11:40:00 -0800 Subject: [PATCH 344/640] iwlwifi: Fix to set correct ht configuration iwl_set_rxon_ht() only get called in iwl_post_associate(); which cause possible incorrect ht configuration. Adding the call in iwl_mac_config() if IEEE80211_CONF_CHANGE_CHANNEL flag is set to re-configure and send rxon command. Fixes http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2146 Signed-off-by: Wey-Yi Guy Signed-off-by: Reinette Chatre CC: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 5461f105bd2d..d10bea64fce3 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2744,6 +2744,7 @@ int iwl_mac_config(struct ieee80211_hw *hw, u32 changed) if ((le16_to_cpu(priv->staging_rxon.channel) != ch)) priv->staging_rxon.flags = 0; + iwl_set_rxon_ht(priv, ht_conf); iwl_set_rxon_channel(priv, conf->channel); iwl_set_flags_for_band(priv, conf->channel->band); From c0ce77b8323c1a0d4eeef97caf16c0ea971222a9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Feb 2010 10:22:31 +0100 Subject: [PATCH 345/640] mac80211: fix deferred hardware scan requests Reinette found the reason for the warnings that happened occasionally when a hw-offloaded scan finished; her description of the problem: mac80211 will defer the handling of scan requests if it is busy with management work at the time. The scan requests are deferred and run after the work has completed. When this occurs there are currently two problems. * The scan request for hardware scan is not fully populated with the band and channels to scan not initialized. * When the scan is queued the state is not correctly updated to reflect that a scan is in progress. The problem here is that when the driver completes the scan and calls ieee80211_scan_completed() a warning will be triggered since mac80211 was not aware that a scan was in progress. The reason is that the queued scan work will start the hw scan right away when the hw_scan_req struct has already been allocated. However, in the first pass it will not have been filled, which happens at the same time as setting the bits. To fix this, simply move the allocation after the pending work test as well, so that the first iteration of the scan work will call __ieee80211_start_scan() even in the hardware scan case. Bug-identified-by: Reinette Chatre Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/scan.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f934c9620b73..bc17cf7d68db 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -439,6 +439,16 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->scan_req) return -EBUSY; + if (req != local->int_scan_req && + sdata->vif.type == NL80211_IFTYPE_STATION && + !list_empty(&ifmgd->work_list)) { + /* actually wait for the work it's doing to finish/time out */ + set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request); + local->scan_req = req; + local->scan_sdata = sdata; + return 0; + } + if (local->ops->hw_scan) { u8 *ies; @@ -463,14 +473,6 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->scan_req = req; local->scan_sdata = sdata; - if (req != local->int_scan_req && - sdata->vif.type == NL80211_IFTYPE_STATION && - !list_empty(&ifmgd->work_list)) { - /* actually wait for the work it's doing to finish/time out */ - set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request); - return 0; - } - if (local->ops->hw_scan) __set_bit(SCAN_HW_SCANNING, &local->scanning); else From 7a4439c406c21b1e900ed497cec1a79d05b38c07 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 8 Feb 2010 15:36:48 -0600 Subject: [PATCH 346/640] 9p: Include fsync support for 9p client Implement the fsync in the client side by marking stat field values to 'don't touch' so that server may interpret it as a request to guarantee that the contents of the associated file are committed to stable storage before the Rwstat message is returned. Without this patch, calling fsync on a 9p file results in "Invalid argument" error. Please check the attached C program. Signed-off-by: Aneesh Kumar K.V Signed-off-by: M. Mohan Kumar Acked-by: Venkateswararao Jujjuri (JV) Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs_vfs.h | 1 + fs/9p/vfs_file.c | 19 +++++++++++++++++++ fs/9p/vfs_inode.c | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 3a7560e35865..ed835836e0dc 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -60,3 +60,4 @@ void v9fs_dentry_release(struct dentry *); int v9fs_uflags2omode(int uflags, int extended); ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); +void v9fs_blank_wstat(struct p9_wstat *wstat); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3902bf43a088..74a0461a9ac0 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -257,6 +257,23 @@ v9fs_file_write(struct file *filp, const char __user * data, return total; } +static int v9fs_file_fsync(struct file *filp, struct dentry *dentry, + int datasync) +{ + struct p9_fid *fid; + struct p9_wstat wstat; + int retval; + + P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp, + dentry, datasync); + + fid = filp->private_data; + v9fs_blank_wstat(&wstat); + + retval = p9_client_wstat(fid, &wstat); + return retval; +} + static const struct file_operations v9fs_cached_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -266,6 +283,7 @@ static const struct file_operations v9fs_cached_file_operations = { .release = v9fs_dir_release, .lock = v9fs_file_lock, .mmap = generic_file_readonly_mmap, + .fsync = v9fs_file_fsync, }; const struct file_operations v9fs_file_operations = { @@ -276,4 +294,5 @@ const struct file_operations v9fs_file_operations = { .release = v9fs_dir_release, .lock = v9fs_file_lock, .mmap = generic_file_readonly_mmap, + .fsync = v9fs_file_fsync, }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 9d03d1ebca6f..a407fa3388c0 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -176,7 +176,7 @@ int v9fs_uflags2omode(int uflags, int extended) * */ -static void +void v9fs_blank_wstat(struct p9_wstat *wstat) { wstat->type = ~0; From d8c8a9e36560e9ff4c99279d64ce5dd0e1a33fa6 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 8 Feb 2010 16:23:23 -0600 Subject: [PATCH 347/640] 9p: fix option parsing Options pointer is being moved before calling kfree() which seems to cause problems. This uses a separate pointer to track and free original allocation. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen w --- fs/9p/v9fs.c | 10 ++++++---- net/9p/client.c | 9 +++++---- net/9p/trans_fd.c | 10 ++++++---- net/9p/trans_rdma.c | 9 +++++---- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index cf62b05e296a..6848788a13db 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -84,7 +84,7 @@ static const match_table_t tokens = { static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) { - char *options; + char *options, *tmp_options; substring_t args[MAX_OPT_ARGS]; char *p; int option = 0; @@ -102,9 +102,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) if (!opts) return 0; - options = kstrdup(opts, GFP_KERNEL); - if (!options) + tmp_options = kstrdup(opts, GFP_KERNEL); + if (!tmp_options) goto fail_option_alloc; + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -194,7 +195,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) continue; } } - kfree(options); + + kfree(tmp_options); return ret; fail_option_alloc: diff --git a/net/9p/client.c b/net/9p/client.c index a2e2d61b903b..cbe066966b3c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -69,7 +69,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); static int parse_opts(char *opts, struct p9_client *clnt) { - char *options; + char *options, *tmp_options; char *p; substring_t args[MAX_OPT_ARGS]; int option; @@ -81,12 +81,13 @@ static int parse_opts(char *opts, struct p9_client *clnt) if (!opts) return 0; - options = kstrdup(opts, GFP_KERNEL); - if (!options) { + tmp_options = kstrdup(opts, GFP_KERNEL); + if (!tmp_options) { P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -125,7 +126,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) } free_and_return: - kfree(options); + kfree(tmp_options); return ret; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index be1cb909d8c0..31d0b05582a9 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -714,7 +714,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) char *p; substring_t args[MAX_OPT_ARGS]; int option; - char *options; + char *options, *tmp_options; int ret; opts->port = P9_PORT; @@ -724,12 +724,13 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!params) return 0; - options = kstrdup(params, GFP_KERNEL); - if (!options) { + tmp_options = kstrdup(params, GFP_KERNEL); + if (!tmp_options) { P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -760,7 +761,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) continue; } } - kfree(options); + + kfree(tmp_options); return 0; } diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 65cb29db03f8..2c95a89c0f46 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -166,7 +166,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) char *p; substring_t args[MAX_OPT_ARGS]; int option; - char *options; + char *options, *tmp_options; int ret; opts->port = P9_PORT; @@ -177,12 +177,13 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) if (!params) return 0; - options = kstrdup(params, GFP_KERNEL); - if (!options) { + tmp_options = kstrdup(params, GFP_KERNEL); + if (!tmp_options) { P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -216,7 +217,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) } /* RQ must be at least as large as the SQ */ opts->rq_depth = max(opts->rq_depth, opts->sq_depth); - kfree(options); + kfree(tmp_options); return 0; } From 4b9d2a2112163a757943c78ea98587fc9e828641 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 8 Feb 2010 13:16:55 +1000 Subject: [PATCH 348/640] drm/radeon/kms: don't crash if no DDC bus on VGA/DVI connector. This is strange - like really really strange, twilight zone of strange. VGA ports have DDC buses, but sometimes for some reasons the BIOS says we don't and we oops - AMD mentioned bios bugs so we'll have to add quirks. reported on irc by nirbheek and https://bugzilla.redhat.com/show_bug.cgi?id=554323 Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 20 ++++++++++++-------- drivers/gpu/drm/radeon/radeon_display.c | 11 ++++++++++- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 2d8e5a70f284..238188540017 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -580,16 +580,18 @@ static enum drm_connector_status radeon_vga_detect(struct drm_connector *connect struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; - bool dret; + bool dret = false; enum drm_connector_status ret = connector_status_disconnected; encoder = radeon_best_single_encoder(connector); if (!encoder) ret = connector_status_disconnected; - radeon_i2c_do_lock(radeon_connector->ddc_bus, 1); - dret = radeon_ddc_probe(radeon_connector); - radeon_i2c_do_lock(radeon_connector->ddc_bus, 0); + if (radeon_connector->ddc_bus) { + radeon_i2c_do_lock(radeon_connector->ddc_bus, 1); + dret = radeon_ddc_probe(radeon_connector); + radeon_i2c_do_lock(radeon_connector->ddc_bus, 0); + } if (dret) { if (radeon_connector->edid) { kfree(radeon_connector->edid); @@ -740,11 +742,13 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect struct drm_mode_object *obj; int i; enum drm_connector_status ret = connector_status_disconnected; - bool dret; + bool dret = false; - radeon_i2c_do_lock(radeon_connector->ddc_bus, 1); - dret = radeon_ddc_probe(radeon_connector); - radeon_i2c_do_lock(radeon_connector->ddc_bus, 0); + if (radeon_connector->ddc_bus) { + radeon_i2c_do_lock(radeon_connector->ddc_bus, 1); + dret = radeon_ddc_probe(radeon_connector); + radeon_i2c_do_lock(radeon_connector->ddc_bus, 0); + } if (dret) { if (radeon_connector->edid) { kfree(radeon_connector->edid); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 6a92f994cc26..7e17a362b54b 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -278,7 +278,7 @@ static void radeon_print_display_setup(struct drm_device *dev) DRM_INFO(" %s\n", connector_names[connector->connector_type]); if (radeon_connector->hpd.hpd != RADEON_HPD_NONE) DRM_INFO(" %s\n", hpd_names[radeon_connector->hpd.hpd]); - if (radeon_connector->ddc_bus) + if (radeon_connector->ddc_bus) { DRM_INFO(" DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", radeon_connector->ddc_bus->rec.mask_clk_reg, radeon_connector->ddc_bus->rec.mask_data_reg, @@ -288,6 +288,15 @@ static void radeon_print_display_setup(struct drm_device *dev) radeon_connector->ddc_bus->rec.en_data_reg, radeon_connector->ddc_bus->rec.y_clk_reg, radeon_connector->ddc_bus->rec.y_data_reg); + } else { + if (connector->connector_type == DRM_MODE_CONNECTOR_VGA || + connector->connector_type == DRM_MODE_CONNECTOR_DVII || + connector->connector_type == DRM_MODE_CONNECTOR_DVID || + connector->connector_type == DRM_MODE_CONNECTOR_DVIA || + connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) + DRM_INFO(" DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n"); + } DRM_INFO(" Encoders:\n"); list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { radeon_encoder = to_radeon_encoder(encoder); From 2fc1b5dd99f66d93ffc23fd8df82d384c1a354c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 15:00:39 -0800 Subject: [PATCH 349/640] dst: call cond_resched() in dst_gc_task() Kernel bugzilla #15239 On some workloads, it is quite possible to get a huge dst list to process in dst_gc_task(), and trigger soft lockup detection. Fix is to call cond_resched(), as we run in process context. Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dst.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dst.c b/net/core/dst.c index 57bc4d5b8d08..cb1b3488b739 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -79,6 +80,7 @@ loop: while ((dst = next) != NULL) { next = dst->next; prefetch(&next->next); + cond_resched(); if (likely(atomic_read(&dst->__refcnt))) { last->next = dst; last = dst; From efa8450f6c93c9d4c99adfea2f52f1d02d878d5b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 9 Feb 2010 09:06:00 +1000 Subject: [PATCH 350/640] drm/radeon/kms: add quirk for VGA without DDC on rv730 XFX card. Reported on irc by nirbheek. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_atombios.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index fa82ca74324e..2dcda6115874 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -287,6 +287,15 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, *connector_type = DRM_MODE_CONNECTOR_DVID; } + /* XFX Pine Group device rv730 reports no VGA DDC lines + * even though they are wired up to record 0x93 + */ + if ((dev->pdev->device == 0x9498) && + (dev->pdev->subsystem_vendor == 0x1682) && + (dev->pdev->subsystem_device == 0x2452)) { + struct radeon_device *rdev = dev->dev_private; + *i2c_bus = radeon_lookup_i2c_gpio(rdev, 0x93); + } return true; } From fb786100f7c75e154e63d0f5a2982e6d46dfb602 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 8 Feb 2010 11:50:32 +0000 Subject: [PATCH 351/640] 9p: Fix the kernel crash on a failed mount The patch fix the crash repoted below [ 15.149907] BUG: unable to handle kernel NULL pointer dereference at 00000001 [ 15.150806] IP: [] p9_virtio_close+0x18/0x24 ..... .... [ 15.150806] Call Trace: [ 15.150806] [] ? p9_client_destroy+0x3f/0x163 [ 15.150806] [] ? p9_client_create+0x25f/0x270 [ 15.150806] [] ? trace_hardirqs_on+0xb/0xd [ 15.150806] [] ? match_token+0x64/0x164 [ 15.150806] [] ? v9fs_session_init+0x2f1/0x3c8 [ 15.150806] [] ? kmem_cache_alloc+0x98/0xb8 [ 15.150806] [] ? trace_hardirqs_on+0xb/0xd [ 15.150806] [] ? v9fs_get_sb+0x47/0x1e8 [ 15.150806] [] ? v9fs_get_sb+0x60/0x1e8 [ 15.150806] [] ? vfs_kern_mount+0x81/0x11a [ 15.150806] [] ? do_kern_mount+0x33/0xbe [ 15.150806] [] ? do_mount+0x654/0x6b3 [ 15.150806] [] ? do_page_fault+0x0/0x284 [ 15.150806] [] ? copy_mount_options+0x73/0xd2 [ 15.150806] [] ? sys_mount+0x61/0x94 [ 15.150806] [] ? syscall_call+0x7/0xb .... [ 15.203562] ---[ end trace 1dd159357709eb4b ]--- [ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 67c4bc704c5a..cb50f4ae5eef 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -102,7 +102,8 @@ static void p9_virtio_close(struct p9_client *client) struct virtio_chan *chan = client->trans; mutex_lock(&virtio_9p_lock); - chan->inuse = false; + if (chan) + chan->inuse = false; mutex_unlock(&virtio_9p_lock); } From bf2d29c64dd777e9a40bc4533e721944a590250f Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 8 Feb 2010 17:59:34 -0600 Subject: [PATCH 352/640] 9p: fix memory leak in v9fs_parse_options() If match_strdup() fail this function exits without freeing the options string. Signed-off-by: Venkateswararao Jujjuri Sigend-off-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 6848788a13db..7d6c2139891d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -103,8 +103,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) return 0; tmp_options = kstrdup(opts, GFP_KERNEL); - if (!tmp_options) + if (!tmp_options) { + ret = -ENOMEM; goto fail_option_alloc; + } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { @@ -160,8 +162,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) break; case Opt_cache: s = match_strdup(&args[0]); - if (!s) - goto fail_option_alloc; + if (!s) { + ret = -ENOMEM; + P9_DPRINTK(P9_DEBUG_ERROR, + "problem allocating copy of cache arg\n"); + goto free_and_return; + } if (strcmp(s, "loose") == 0) v9ses->cache = CACHE_LOOSE; @@ -174,8 +180,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) case Opt_access: s = match_strdup(&args[0]); - if (!s) - goto fail_option_alloc; + if (!s) { + ret = -ENOMEM; + P9_DPRINTK(P9_DEBUG_ERROR, + "problem allocating copy of access arg\n"); + goto free_and_return; + } v9ses->flags &= ~V9FS_ACCESS_MASK; if (strcmp(s, "user") == 0) @@ -196,13 +206,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) } } +free_and_return: kfree(tmp_options); - return ret; - fail_option_alloc: - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option argument\n"); - return -ENOMEM; + return ret; } /** From 8781ff9495578dbb74065fae55305110d9f81cb9 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 8 Feb 2010 18:18:34 -0600 Subject: [PATCH 353/640] 9p: fix p9_client_destroy unconditional calling v9fs_put_trans restructure client create code to handle error cases better and only cleanup initialized portions of the stack. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index cbe066966b3c..09d4f1e2e4a8 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -676,18 +676,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); - clnt->fidpool = p9_idpool_create(); - if (IS_ERR(clnt->fidpool)) { - err = PTR_ERR(clnt->fidpool); - clnt->fidpool = NULL; - goto error; - } p9_tag_init(clnt); err = parse_opts(options, clnt); if (err < 0) - goto error; + goto free_client; if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); @@ -696,7 +690,14 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, "No transport defined or default transport\n"); - goto error; + goto free_client; + } + + clnt->fidpool = p9_idpool_create(); + if (IS_ERR(clnt->fidpool)) { + err = PTR_ERR(clnt->fidpool); + clnt->fidpool = NULL; + goto put_trans; } P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n", @@ -704,19 +705,25 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) err = clnt->trans_mod->create(clnt, dev_name, options); if (err) - goto error; + goto destroy_fidpool; if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; err = p9_client_version(clnt); if (err) - goto error; + goto close_trans; return clnt; -error: - p9_client_destroy(clnt); +close_trans: + clnt->trans_mod->close(clnt); +destroy_fidpool: + p9_idpool_destroy(clnt->fidpool); +put_trans: + v9fs_put_trans(clnt->trans_mod); +free_client: + kfree(clnt); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_create); From f927b8907cb25943d6275d4ea036c065b8fd3f33 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 27 Jan 2010 14:29:05 +1000 Subject: [PATCH 354/640] drm/nouveau: fix non-vram notifier blocks Due to a thinko, these were previously forced to VRAM even if we allocated them in GART. This commit fixes that bug, but keeps the previous behaviour of using VRAM by default until it's been tested properly across more chipsets. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drv.c | 2 +- drivers/gpu/drm/nouveau/nouveau_notifier.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 343ab7f17ccc..fc692e5553ad 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -56,7 +56,7 @@ int nouveau_vram_pushbuf; module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400); MODULE_PARM_DESC(vram_notify, "Force DMA notifiers to be in VRAM"); -int nouveau_vram_notify; +int nouveau_vram_notify = 1; module_param_named(vram_notify, nouveau_vram_notify, int, 0400); MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (>=GeForce 8)"); diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c index 6c66a34b6345..d99dc087f9b1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_notifier.c +++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c @@ -34,15 +34,20 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan) { struct drm_device *dev = chan->dev; struct nouveau_bo *ntfy = NULL; + uint32_t flags; int ret; - ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, nouveau_vram_notify ? - TTM_PL_FLAG_VRAM : TTM_PL_FLAG_TT, + if (nouveau_vram_notify) + flags = TTM_PL_FLAG_VRAM; + else + flags = TTM_PL_FLAG_TT; + + ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, flags, 0, 0x0000, false, true, &ntfy); if (ret) return ret; - ret = nouveau_bo_pin(ntfy, TTM_PL_FLAG_VRAM); + ret = nouveau_bo_pin(ntfy, flags); if (ret) goto out_err; @@ -128,6 +133,8 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, target = NV_DMA_TARGET_PCI; } else { target = NV_DMA_TARGET_AGP; + if (dev_priv->card_type >= NV_50) + offset += dev_priv->vm_gart_base; } } else { NV_ERROR(dev, "Bad DMA target, mem_type %d!\n", From a32ed69d7bb3cd259d813d71281d62993b9a70fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= Date: Tue, 26 Jan 2010 14:00:42 +0000 Subject: [PATCH 355/640] drm/nouveau: Add module options to disable acceleration. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit noaccel=1 disables all acceleration and doesn't even attempt initialising PGRAPH+PFIFO, nofbaccel=1 only makes fbcon unaccelerated. Signed-off-by: Marcin Kościelnicki Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drv.c | 8 +++++++ drivers/gpu/drm/nouveau/nouveau_drv.h | 2 ++ drivers/gpu/drm/nouveau/nouveau_fbcon.c | 10 +++++--- drivers/gpu/drm/nouveau/nouveau_state.c | 32 +++++++++++++++---------- 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index fc692e5553ad..da3b93b84502 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -75,6 +75,14 @@ MODULE_PARM_DESC(ignorelid, "Ignore ACPI lid status"); int nouveau_ignorelid = 0; module_param_named(ignorelid, nouveau_ignorelid, int, 0400); +MODULE_PARM_DESC(noagp, "Disable all acceleration"); +int nouveau_noaccel = 0; +module_param_named(noaccel, nouveau_noaccel, int, 0400); + +MODULE_PARM_DESC(noagp, "Disable fbcon acceleration"); +int nouveau_nofbaccel = 0; +module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400); + MODULE_PARM_DESC(tv_norm, "Default TV norm.\n" "\t\tSupported: PAL, PAL-M, PAL-N, PAL-Nc, NTSC-M, NTSC-J,\n" "\t\t\thd480i, hd480p, hd576i, hd576p, hd720p, hd1080i.\n" diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 6b9690418bc7..5445cefdd03e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -678,6 +678,8 @@ extern int nouveau_reg_debug; extern char *nouveau_vbios; extern int nouveau_ctxfw; extern int nouveau_ignorelid; +extern int nouveau_nofbaccel; +extern int nouveau_noaccel; /* nouveau_state.c */ extern void nouveau_preclose(struct drm_device *dev, struct drm_file *); diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 0b05c869e0e7..eddadaccc285 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -267,8 +267,12 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width, dev_priv->fbdev_info = info; strcpy(info->fix.id, "nouveaufb"); - info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA | - FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_IMAGEBLIT; + if (nouveau_nofbaccel) + info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_DISABLED; + else + info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA | + FBINFO_HWACCEL_FILLRECT | + FBINFO_HWACCEL_IMAGEBLIT; info->fbops = &nouveau_fbcon_ops; info->fix.smem_start = dev->mode_config.fb_base + nvbo->bo.offset - dev_priv->vm_vram_base; @@ -316,7 +320,7 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width, par->nouveau_fb = nouveau_fb; par->dev = dev; - if (dev_priv->channel) { + if (dev_priv->channel && !nouveau_nofbaccel) { switch (dev_priv->card_type) { case NV_50: nv50_fbcon_accel_init(info); diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index f2d0187ba152..241e24d60eb4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -427,15 +427,19 @@ nouveau_card_init(struct drm_device *dev) if (ret) goto out_timer; - /* PGRAPH */ - ret = engine->graph.init(dev); - if (ret) - goto out_fb; + if (nouveau_noaccel) + engine->graph.accel_blocked = true; + else { + /* PGRAPH */ + ret = engine->graph.init(dev); + if (ret) + goto out_fb; - /* PFIFO */ - ret = engine->fifo.init(dev); - if (ret) - goto out_graph; + /* PFIFO */ + ret = engine->fifo.init(dev); + if (ret) + goto out_graph; + } /* this call irq_preinstall, register irq handler and * call irq_postinstall @@ -479,9 +483,11 @@ nouveau_card_init(struct drm_device *dev) out_irq: drm_irq_uninstall(dev); out_fifo: - engine->fifo.takedown(dev); + if (!nouveau_noaccel) + engine->fifo.takedown(dev); out_graph: - engine->graph.takedown(dev); + if (!nouveau_noaccel) + engine->graph.takedown(dev); out_fb: engine->fb.takedown(dev); out_timer: @@ -518,8 +524,10 @@ static void nouveau_card_takedown(struct drm_device *dev) dev_priv->channel = NULL; } - engine->fifo.takedown(dev); - engine->graph.takedown(dev); + if (!nouveau_noaccel) { + engine->fifo.takedown(dev); + engine->graph.takedown(dev); + } engine->fb.takedown(dev); engine->timer.takedown(dev); engine->mc.takedown(dev); From 69c9700b544e496dc3ccf472a4f3a76dcf4abaf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= Date: Tue, 26 Jan 2010 18:39:20 +0000 Subject: [PATCH 356/640] drm/nouveau: Add getparam to get available PGRAPH units. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On nv50, this will be needed by applications using CUDA to know how much stack/local memory to allocate. Signed-off-by: Marcin Kościelnicki Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_reg.h | 1 + drivers/gpu/drm/nouveau/nouveau_state.c | 9 +++++++++ include/drm/nouveau_drm.h | 1 + 3 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h index 251f1b3b38b9..aa9b310e41be 100644 --- a/drivers/gpu/drm/nouveau/nouveau_reg.h +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h @@ -99,6 +99,7 @@ * the card will hang early on in the X init process. */ # define NV_PMC_ENABLE_UNK13 (1<<13) +#define NV40_PMC_GRAPH_UNITS 0x00001540 #define NV40_PMC_BACKLIGHT 0x000015f0 # define NV40_PMC_BACKLIGHT_MASK 0x001f0000 #define NV40_PMC_1700 0x00001700 diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index 241e24d60eb4..fcd7610817a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -825,6 +825,15 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data, case NOUVEAU_GETPARAM_VM_VRAM_BASE: getparam->value = dev_priv->vm_vram_base; break; + case NOUVEAU_GETPARAM_GRAPH_UNITS: + /* NV40 and NV50 versions are quite different, but register + * address is the same. User is supposed to know the card + * family anyway... */ + if (dev_priv->chipset >= 0x40) { + getparam->value = nv_rd32(dev, NV40_PMC_GRAPH_UNITS); + break; + } + /* FALLTHRU */ default: NV_ERROR(dev, "unknown parameter %lld\n", getparam->param); return -EINVAL; diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h index 1e67c441ea82..f745948b61e4 100644 --- a/include/drm/nouveau_drm.h +++ b/include/drm/nouveau_drm.h @@ -77,6 +77,7 @@ struct drm_nouveau_gpuobj_free { #define NOUVEAU_GETPARAM_PCI_PHYSICAL 10 #define NOUVEAU_GETPARAM_CHIPSET_ID 11 #define NOUVEAU_GETPARAM_VM_VRAM_BASE 12 +#define NOUVEAU_GETPARAM_GRAPH_UNITS 13 struct drm_nouveau_getparam { uint64_t param; uint64_t value; From 139295b671ff4ccd904f2fa58e9dbc0fe99cc7fe Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 30 Jan 2010 18:28:00 +0100 Subject: [PATCH 357/640] drm/nouveau: Fixup semaphores on pre-nv50 cards. Apparently, they generate a PFIFO interrupt each time one of the semaphore methods is executed if its ctxdma wasn't manually marked as valid. This patch makes it flip the valid bit in response to the DMA_SEMAPHORE method (which triggers the IRQ even for a valid ctxdma). Signed-off-by: Francisco Jerez Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_irq.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c index 3b9bad66162a..baa9b3e0b66b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_irq.c +++ b/drivers/gpu/drm/nouveau/nouveau_irq.c @@ -211,6 +211,20 @@ nouveau_fifo_irq_handler(struct drm_device *dev) get + 4); } + if (status & NV_PFIFO_INTR_SEMAPHORE) { + uint32_t sem; + + status &= ~NV_PFIFO_INTR_SEMAPHORE; + nv_wr32(dev, NV03_PFIFO_INTR_0, + NV_PFIFO_INTR_SEMAPHORE); + + sem = nv_rd32(dev, NV10_PFIFO_CACHE1_SEMAPHORE); + nv_wr32(dev, NV10_PFIFO_CACHE1_SEMAPHORE, sem | 0x1); + + nv_wr32(dev, NV03_PFIFO_CACHE1_GET, get + 4); + nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1); + } + if (status) { NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n", status, chid); From f0fbe3eb5f65fe5948219f4ceac68f8a665b1fc6 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 30 Jan 2010 23:21:38 +0100 Subject: [PATCH 358/640] drm/nouveau: call ttm_bo_wait with the bo lock held to prevent hang nouveau_gem_ioctl_cpu_prep calls ttm_bo_wait without the bo lock held. ttm_bo_wait unlocks that lock, and so must be called with it held. Currently this bug causes libdrm nouveau_bo_busy() to hang the machine. Signed-off-by: Luca Barbieri Acked-by: Maarten Maathuis Signed-off-by: Francisco Jerez Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 6ac804b0c9f9..70cc30803e3b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -925,7 +925,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, } if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) { + spin_lock(&nvbo->bo.lock); ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait); + spin_unlock(&nvbo->bo.lock); } else { ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait); if (ret == 0) From 7dad9ef6d9255b4d2d0a26305a785a55f3ba55e3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Feb 2010 14:40:30 -0800 Subject: [PATCH 359/640] drivers/gpu/drm/nouveau/nouveau_grctx.c: correct NULL test Test the just-allocated value for NULL rather than some other value. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,y; statement S; @@ x = \(kmalloc\|kcalloc\|kzalloc\)(...); ( if ((x) == NULL) S | if ( - y + x == NULL) S ) // Signed-off-by: Julia Lawall Cc: David Airlie Cc: Ben Skeggs Signed-off-by: Andrew Morton Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_grctx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.c b/drivers/gpu/drm/nouveau/nouveau_grctx.c index 419f4c2b3b89..c7ebec696747 100644 --- a/drivers/gpu/drm/nouveau/nouveau_grctx.c +++ b/drivers/gpu/drm/nouveau/nouveau_grctx.c @@ -97,8 +97,8 @@ nouveau_grctx_prog_load(struct drm_device *dev) } pgraph->ctxvals = kmalloc(fw->size, GFP_KERNEL); - if (!pgraph->ctxprog) { - NV_ERROR(dev, "OOM copying ctxprog\n"); + if (!pgraph->ctxvals) { + NV_ERROR(dev, "OOM copying ctxvals\n"); release_firmware(fw); nouveau_grctx_fini(dev); return -ENOMEM; From 126b5440565a1fa0cb49fd30041525d5a9a848f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= Date: Wed, 27 Jan 2010 14:03:18 +0000 Subject: [PATCH 360/640] drm/nouveau: Fix fbcon on mixed pre-NV50 + NV50 multicard. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used single shared fbops struct and patched it at fb init time with pointers to the right variant. On mixed multicard, this meant that it was either sending NV50-style commands to all cards, or NV04-style commands to all cards. Signed-off-by: Marcin Kościelnicki Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 30 +++++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_fbcon.h | 6 +++++ drivers/gpu/drm/nouveau/nv04_fbcon.c | 9 +++----- drivers/gpu/drm/nouveau/nv50_fbcon.c | 9 +++----- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index eddadaccc285..ea879a2efef3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -107,6 +107,34 @@ static struct fb_ops nouveau_fbcon_ops = { .fb_setcmap = drm_fb_helper_setcmap, }; +static struct fb_ops nv04_fbcon_ops = { + .owner = THIS_MODULE, + .fb_check_var = drm_fb_helper_check_var, + .fb_set_par = drm_fb_helper_set_par, + .fb_setcolreg = drm_fb_helper_setcolreg, + .fb_fillrect = nv04_fbcon_fillrect, + .fb_copyarea = nv04_fbcon_copyarea, + .fb_imageblit = nv04_fbcon_imageblit, + .fb_sync = nouveau_fbcon_sync, + .fb_pan_display = drm_fb_helper_pan_display, + .fb_blank = drm_fb_helper_blank, + .fb_setcmap = drm_fb_helper_setcmap, +}; + +static struct fb_ops nv50_fbcon_ops = { + .owner = THIS_MODULE, + .fb_check_var = drm_fb_helper_check_var, + .fb_set_par = drm_fb_helper_set_par, + .fb_setcolreg = drm_fb_helper_setcolreg, + .fb_fillrect = nv50_fbcon_fillrect, + .fb_copyarea = nv50_fbcon_copyarea, + .fb_imageblit = nv50_fbcon_imageblit, + .fb_sync = nouveau_fbcon_sync, + .fb_pan_display = drm_fb_helper_pan_display, + .fb_blank = drm_fb_helper_blank, + .fb_setcmap = drm_fb_helper_setcmap, +}; + static void nouveau_fbcon_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, u16 blue, int regno) { @@ -324,9 +352,11 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width, switch (dev_priv->card_type) { case NV_50: nv50_fbcon_accel_init(info); + info->fbops = &nv50_fbcon_ops; break; default: nv04_fbcon_accel_init(info); + info->fbops = &nv04_fbcon_ops; break; }; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h index 462e0b87b4bd..f9c34e1a8c11 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h @@ -40,7 +40,13 @@ int nouveau_fbcon_remove(struct drm_device *dev, struct drm_framebuffer *fb); void nouveau_fbcon_restore(void); void nouveau_fbcon_zfill(struct drm_device *dev); +void nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region); +void nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect); +void nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image); int nv04_fbcon_accel_init(struct fb_info *info); +void nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect); +void nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region); +void nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image); int nv50_fbcon_accel_init(struct fb_info *info); void nouveau_fbcon_gpu_lockup(struct fb_info *info); diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c index d910873c1368..fd01caabd5c3 100644 --- a/drivers/gpu/drm/nouveau/nv04_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c @@ -27,7 +27,7 @@ #include "nouveau_dma.h" #include "nouveau_fbcon.h" -static void +void nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region) { struct nouveau_fbcon_par *par = info->par; @@ -54,7 +54,7 @@ nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region) FIRE_RING(chan); } -static void +void nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) { struct nouveau_fbcon_par *par = info->par; @@ -88,7 +88,7 @@ nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) FIRE_RING(chan); } -static void +void nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) { struct nouveau_fbcon_par *par = info->par; @@ -307,9 +307,6 @@ nv04_fbcon_accel_init(struct fb_info *info) FIRE_RING(chan); - info->fbops->fb_fillrect = nv04_fbcon_fillrect; - info->fbops->fb_copyarea = nv04_fbcon_copyarea; - info->fbops->fb_imageblit = nv04_fbcon_imageblit; return 0; } diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index e4f279ee61cf..0f57cdf7ccb2 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -3,7 +3,7 @@ #include "nouveau_dma.h" #include "nouveau_fbcon.h" -static void +void nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) { struct nouveau_fbcon_par *par = info->par; @@ -46,7 +46,7 @@ nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) FIRE_RING(chan); } -static void +void nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region) { struct nouveau_fbcon_par *par = info->par; @@ -81,7 +81,7 @@ nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region) FIRE_RING(chan); } -static void +void nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) { struct nouveau_fbcon_par *par = info->par; @@ -262,9 +262,6 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->fix.smem_start - dev_priv->fb_phys + dev_priv->vm_vram_base); - info->fbops->fb_fillrect = nv50_fbcon_fillrect; - info->fbops->fb_copyarea = nv50_fbcon_copyarea; - info->fbops->fb_imageblit = nv50_fbcon_imageblit; return 0; } From 9967b9481d2387af4dbe5ceda7a209466ba004a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= Date: Mon, 8 Feb 2010 00:20:17 +0000 Subject: [PATCH 361/640] drm/nouveau: Add proper vgaarb support. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcin Kościelnicki Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index fcd7610817a1..a4851af5b05e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -310,6 +310,14 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) static unsigned int nouveau_vga_set_decode(void *priv, bool state) { + struct drm_device *dev = priv; + struct drm_nouveau_private *dev_priv = dev->dev_private; + + if (dev_priv->chipset >= 0x40) + nv_wr32(dev, 0x88054, state); + else + nv_wr32(dev, 0x1854, state); + if (state) return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; From e235c1f3e132a243a1f81b3d95c99ee199b4d3f3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 22 Jan 2010 13:17:28 +1000 Subject: [PATCH 362/640] drm/nv40: make INIT_COMPUTE_MEM a NOP, just like nv50 It appears we aren't required to do memory sizing ourselves on nv40 either. NV40 init tables read a strap from PEXTDEV_BOOT_0 into a CRTC register, and then later use that value to select a memory configuration (written to PFB_CFG0, just like INIT_COMPUTE_MEM on earlier cards) with INIT_IO_RESTRICT_PROG. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index d7f8d8b4a4b8..fb4793e65ff2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -1865,7 +1865,7 @@ init_compute_mem(struct nvbios *bios, uint16_t offset, struct init_exec *iexec) struct drm_nouveau_private *dev_priv = bios->dev->dev_private; - if (dev_priv->card_type >= NV_50) + if (dev_priv->card_type >= NV_40) return 1; /* From 1ee7698fc3ec3d7949fa55e5154c8f5de8f1c3d9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 9 Feb 2010 10:08:34 +1000 Subject: [PATCH 363/640] drm/nouveau: make dp auxch xfer len check for reads only Writes don't return a count, and adding the check broke native DP. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index dd4937224220..f954ad93e81f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -502,12 +502,12 @@ nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr, break; } - if ((stat & NV50_AUXCH_STAT_COUNT) != data_nr) { - ret = -EREMOTEIO; - goto out; - } - if (cmd & 1) { + if ((stat & NV50_AUXCH_STAT_COUNT) != data_nr) { + ret = -EREMOTEIO; + goto out; + } + for (i = 0; i < 4; i++) { data32[i] = nv_rd32(dev, NV50_AUXCH_DATA_IN(index, i)); NV_DEBUG_KMS(dev, "rd %d: 0x%08x\n", i, data32[i]); From 9eb07c259207d048e3ee8be2a77b2a4680b1edd4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 9 Feb 2010 12:31:47 +1100 Subject: [PATCH 364/640] md: fix 'degraded' calculation when starting a reshape. This code was written long ago when it was not possible to reshape a degraded array. Now it is so the current level of degraded-ness needs to be taken in to account. Also newly addded devices should only reduce degradedness if they are deemed to be in-sync. In particular, if you convert a RAID5 to a RAID6, and increase the number of devices at the same time, then the 5->6 conversion will make the array degraded so the current code will produce a wrong value for 'degraded' - "-1" to be precise. If the reshape runs to completion end_reshape will calculate a correct new value for 'degraded', but if a device fails during the reshape an incorrect decision might be made based on the incorrect value of "degraded". This patch is suitable for 2.6.32-stable and if they are still open, 2.6.31-stable and 2.6.30-stable as well. Cc: stable@kernel.org Reported-by: Michael Evans Signed-off-by: NeilBrown --- drivers/md/raid5.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e84204eb12df..b5629c3e14fa 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5464,11 +5464,11 @@ static int raid5_start_reshape(mddev_t *mddev) !test_bit(Faulty, &rdev->flags)) { if (raid5_add_disk(mddev, rdev) == 0) { char nm[20]; - if (rdev->raid_disk >= conf->previous_raid_disks) + if (rdev->raid_disk >= conf->previous_raid_disks) { set_bit(In_sync, &rdev->flags); - else + added_devices++; + } else rdev->recovery_offset = 0; - added_devices++; sprintf(nm, "rd%d", rdev->raid_disk); if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) @@ -5480,9 +5480,12 @@ static int raid5_start_reshape(mddev_t *mddev) break; } + /* When a reshape changes the number of devices, ->degraded + * is measured against the large of the pre and post number of + * devices.*/ if (mddev->delta_disks > 0) { spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) + mddev->degraded += (conf->raid_disks - conf->previous_raid_disks) - added_devices; spin_unlock_irqrestore(&conf->device_lock, flags); } From 2d171886b183982feb03446997c65e9e212b948e Mon Sep 17 00:00:00 2001 From: Divy Le Ray Date: Mon, 8 Feb 2010 22:37:24 -0800 Subject: [PATCH 365/640] cxgb3: fix GRO checksum check Verify the HW checksum state for frames handed to GRO processing. Signed-off-by: Divy Le Ray Signed-off-by: David S. Miller --- drivers/net/cxgb3/sge.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index bdbd14727e4b..318a018ca7c5 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -2079,6 +2079,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, struct sge_fl *fl, int len, int complete) { struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; + struct port_info *pi = netdev_priv(qs->netdev); struct sk_buff *skb = NULL; struct cpl_rx_pkt *cpl; struct skb_frag_struct *rx_frag; @@ -2116,11 +2117,18 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, if (!nr_frags) { offset = 2 + sizeof(struct cpl_rx_pkt); - qs->lro_va = sd->pg_chunk.va + 2; - } - len -= offset; + cpl = qs->lro_va = sd->pg_chunk.va + 2; - prefetch(qs->lro_va); + if ((pi->rx_offload & T3_RX_CSUM) && + cpl->csum_valid && cpl->csum == htons(0xffff)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; + } else + skb->ip_summed = CHECKSUM_NONE; + } else + cpl = qs->lro_va; + + len -= offset; rx_frag += nr_frags; rx_frag->page = sd->pg_chunk.page; @@ -2136,12 +2144,8 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, return; skb_record_rx_queue(skb, qs - &adap->sge.qs[0]); - skb->ip_summed = CHECKSUM_UNNECESSARY; - cpl = qs->lro_va; if (unlikely(cpl->vlan_valid)) { - struct net_device *dev = qs->netdev; - struct port_info *pi = netdev_priv(dev); struct vlan_group *grp = pi->vlan_grp; if (likely(grp != NULL)) { From d4ae20b3799e0b6fa0d832a645a422da9f239868 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Mon, 8 Feb 2010 22:41:44 -0800 Subject: [PATCH 366/640] net/sched: Fix module name in Kconfig The action modules have been prefixed with 'act_', but the Kconfig description was not changed. Signed-off-by: Jan Luebbe Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 929218a47620..21f9c7678aa3 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -433,7 +433,7 @@ config NET_ACT_POLICE module. To compile this code as a module, choose M here: the - module will be called police. + module will be called act_police. config NET_ACT_GACT tristate "Generic actions" @@ -443,7 +443,7 @@ config NET_ACT_GACT accepting packets. To compile this code as a module, choose M here: the - module will be called gact. + module will be called act_gact. config GACT_PROB bool "Probability support" @@ -459,7 +459,7 @@ config NET_ACT_MIRRED other devices. To compile this code as a module, choose M here: the - module will be called mirred. + module will be called act_mirred. config NET_ACT_IPT tristate "IPtables targets" @@ -469,7 +469,7 @@ config NET_ACT_IPT classification. To compile this code as a module, choose M here: the - module will be called ipt. + module will be called act_ipt. config NET_ACT_NAT tristate "Stateless NAT" @@ -479,7 +479,7 @@ config NET_ACT_NAT netfilter for NAT unless you know what you are doing. To compile this code as a module, choose M here: the - module will be called nat. + module will be called act_nat. config NET_ACT_PEDIT tristate "Packet Editing" @@ -488,7 +488,7 @@ config NET_ACT_PEDIT Say Y here if you want to mangle the content of packets. To compile this code as a module, choose M here: the - module will be called pedit. + module will be called act_pedit. config NET_ACT_SIMP tristate "Simple Example (Debug)" @@ -502,7 +502,7 @@ config NET_ACT_SIMP If unsure, say N. To compile this code as a module, choose M here: the - module will be called simple. + module will be called act_simple. config NET_ACT_SKBEDIT tristate "SKB Editing" @@ -513,7 +513,7 @@ config NET_ACT_SKBEDIT If unsure, say N. To compile this code as a module, choose M here: the - module will be called skbedit. + module will be called act_skbedit. config NET_CLS_IND bool "Incoming device classification" From 3af26f58d1920d904da87c3897d23070fe2266b4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 8 Feb 2010 22:42:40 -0800 Subject: [PATCH 367/640] MAINTAINERS: networking drivers - Add git net-next tree During the rc period, patches that are not bugfixes should be done using the net-next tree. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 03f38c18f323..602022d2c7a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3836,6 +3836,7 @@ NETWORKING DRIVERS L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git S: Odd Fixes F: drivers/net/ F: include/linux/if_* From bcf4d812e66ee95f762b38063d654fd1ff7156b0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 8 Feb 2010 22:44:18 -0800 Subject: [PATCH 368/640] drivers/net: Correct NULL test Test the value that was just allocated rather than the previously tested one. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ expression *x; expression e; identifier l; @@ if (x == NULL || ...) { ... when forall return ...; } ... when != goto l; when != x = e when != &x *x == NULL // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ax88796.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ax88796.c b/drivers/net/ax88796.c index 62d9c9cc5671..1dd4403247ca 100644 --- a/drivers/net/ax88796.c +++ b/drivers/net/ax88796.c @@ -921,7 +921,7 @@ static int ax_probe(struct platform_device *pdev) size = (res->end - res->start) + 1; ax->mem2 = request_mem_region(res->start, size, pdev->name); - if (ax->mem == NULL) { + if (ax->mem2 == NULL) { dev_err(&pdev->dev, "cannot reserve registers\n"); ret = -ENXIO; goto exit_mem1; From 4c52228d1b83ef67d4fa381c2ade70122c9e3c34 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 9 Feb 2010 09:46:07 +0100 Subject: [PATCH 369/640] [S390] qdio: continue polling for buffer state ERROR Inbound traffic handling may hang if next buffer to check is in state ERROR, polling is stopped and the final check for further available inbound buffers disregards buffers in state ERROR. This patch includes state ERROR when checking availability of more inbound buffers. Cc: Jan Glauber Signed-off-by: Ursula Braun Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 999fe80c4051..5781c5d10cff 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -531,7 +531,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q) qdio_siga_sync_q(q); get_buf_state(q, q->first_to_check, &state, 0); - if (state == SLSB_P_INPUT_PRIMED) + if (state == SLSB_P_INPUT_PRIMED || state == SLSB_P_INPUT_ERROR) /* more work coming */ return 0; From 959153d34544b7237bad263e73a5abdf94fc7722 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Tue, 9 Feb 2010 09:46:08 +0100 Subject: [PATCH 370/640] [S390] qdio: prevent call trace if CHPID is offline If a CHPID is offline during a device shutdown the ccw_device_halt|clear may fail and the qdio device stays in state STOPPED until the shutdown is finished. If an interrupt occurs before the device is set to INACTIVE the STOPPED state triggers a WARN_ON in the interrupt handler. Prevent this WARN_ON by catching the STOPPED state in the interrupt handler. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 5781c5d10cff..62b654af9237 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -960,6 +960,8 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, qdio_handle_activate_check(cdev, intparm, cstat, dstat); break; + case QDIO_IRQ_STATE_STOPPED: + break; default: WARN_ON(1); } From 7717aefff3290c61e5f9e6aa39e9e1dc63cd4e81 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Feb 2010 09:46:09 +0100 Subject: [PATCH 371/640] [S390] Fix struct _lowcore layout. Offsets and sizes are wrong for 32 bit. Got broken with 866ba284 "[S390] cleanup lowcore.h". Reported-by: Mahesh Salgaonkar Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index f2ef4b619ce1..c25dfac7dd76 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -293,12 +293,12 @@ struct _lowcore __u64 clock_comparator; /* 0x02d0 */ __u32 machine_flags; /* 0x02d8 */ __u32 ftrace_func; /* 0x02dc */ - __u8 pad_0x02f0[0x0300-0x02f0]; /* 0x02f0 */ + __u8 pad_0x02e0[0x0300-0x02e0]; /* 0x02e0 */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ - __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */ + __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */ /* * 0xe00 contains the address of the IPL Parameter Information From c93d89f3dbf0202bf19c07960ca8602b48c2f9a0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Jan 2010 19:13:40 +0800 Subject: [PATCH 372/640] Export the symbol of getboottime and mmonotonic_to_bootbased Export getboottime and monotonic_to_bootbased in order to let them could be used by following patch. Cc: stable@kernel.org Signed-off-by: Jason Wang Signed-off-by: Marcelo Tosatti --- kernel/time/timekeeping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7faaa32fbf4f..e2ab064c6d41 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -880,6 +880,7 @@ void getboottime(struct timespec *ts) set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); } +EXPORT_SYMBOL_GPL(getboottime); /** * monotonic_to_bootbased - Convert the monotonic time to boot based. @@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts) { *ts = timespec_add_safe(*ts, total_sleep_time); } +EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { From 923de3cf5bf12049628019010e36623fca5ef6d1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Jan 2010 19:13:49 +0800 Subject: [PATCH 373/640] kvmclock: count total_sleep_time when updating guest clock Current kvm wallclock does not consider the total_sleep_time which could cause wrong wallclock in guest after host suspend/resume. This patch solve this issue by counting total_sleep_time to get the correct host boot time. Cc: stable@kernel.org Signed-off-by: Jason Wang Acked-by: Glauber Costa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1ddcad452add..a1e1bc9d412d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) { static int version; struct pvclock_wall_clock wc; - struct timespec now, sys, boot; + struct timespec boot; if (!wall_clock) return; @@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) * wall clock specified here. guest system time equals host * system time for us, thus we must fill in host boot time here. */ - now = current_kernel_time(); - ktime_get_ts(&sys); - boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); + getboottime(&boot); wc.sec = boot.tv_sec; wc.nsec = boot.tv_nsec; @@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); ktime_get_ts(&ts); + monotonic_to_bootbased(&ts); local_irq_restore(flags); /* With all the info we got, fill in the values */ From ee73f656a604d5aa9df86a97102e4e462dd79924 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 29 Jan 2010 17:28:41 -0200 Subject: [PATCH 374/640] KVM: PIT: control word is write-only PIT control word (address 0x43) is write-only, reads are undefined. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8254.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 296aba49472a..15578f180e59 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -467,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this, return -EOPNOTSUPP; addr &= KVM_PIT_CHANNEL_MASK; + if (addr == 3) + return 0; + s = &pit_state->channels[addr]; mutex_lock(&pit_state->lock); From 6c8afef551fef87a3bf24f8a74c69a7f2f72fc82 Mon Sep 17 00:00:00 2001 From: Sujith Date: Tue, 9 Feb 2010 10:07:00 +0530 Subject: [PATCH 375/640] ath9k: Fix sequence numbers for PAE frames Currently, PAE frames are not assigned proper sequence numbers. Since sending PAE frames as part of aggregates breaks crupto with several APs, they are sent as normal MPDUs. Fix the seqeuence number issue by updating the frame with the internal sequence number. Tested-by: Felix Fietkau Signed-off-by: Sujith Cc: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/xmit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index fa12b9060b0b..29bf33692f71 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1615,7 +1615,7 @@ static int ath_tx_setup_buffer(struct ieee80211_hw *hw, struct ath_buf *bf, bf->bf_frmlen -= padsize; } - if (conf_is_ht(&hw->conf) && !is_pae(skb)) + if (conf_is_ht(&hw->conf)) bf->bf_state.bf_type |= BUF_HT; bf->bf_flags = setup_tx_flags(sc, skb, txctl->txq); @@ -1701,7 +1701,7 @@ static void ath_tx_start_dma(struct ath_softc *sc, struct ath_buf *bf, goto tx_done; } - if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) { + if ((tx_info->flags & IEEE80211_TX_CTL_AMPDU) && !is_pae(skb)) { /* * Try aggregation if it's a unicast data frame * and the destination is HT capable. From 33a5d083e786f0c3fb4efedb59b0e8e3de39963b Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 9 Feb 2010 12:07:41 +0100 Subject: [PATCH 376/640] iwmc3200wifi: Test of wrong pointer after kzalloc in iwm_mlme_update_bss_table() The wrong pointer was tested. Signed-off-by: Roel Kluin Acked-by: Samuel Ortiz Signed-off-by: John W. Linville --- drivers/net/wireless/iwmc3200wifi/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c index 3db3d8b07491..64d16fe37f9a 100644 --- a/drivers/net/wireless/iwmc3200wifi/rx.c +++ b/drivers/net/wireless/iwmc3200wifi/rx.c @@ -794,7 +794,7 @@ static int iwm_mlme_update_bss_table(struct iwm_priv *iwm, u8 *buf, } bss->bss = kzalloc(bss_len, GFP_KERNEL); - if (!bss) { + if (!bss->bss) { kfree(bss); IWM_ERR(iwm, "Couldn't allocate bss\n"); return -ENOMEM; From 2c1740098c708b465e87637b237feb2fd98f129a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:27 -0500 Subject: [PATCH 377/640] NFS: Fix a bug in nfs_fscache_release_page() Not having an fscache cookie is perfectly valid if the user didn't mount with the fscache option. This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=15234 Signed-off-by: Trond Myklebust Acked-by: David Howells Cc: stable@kernel.org --- fs/nfs/fscache.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index fa588006588d..237874f1af23 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) */ int nfs_fscache_release_page(struct page *page, gfp_t gfp) { - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - struct fscache_cookie *cookie = nfsi->fscache; - - BUG_ON(!cookie); - if (PageFsCache(page)) { + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); From 7549ad5f9b6eda49bbac4b14c5b8f37bf464f922 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:34 -0500 Subject: [PATCH 378/640] NFS: Remove a redundant check for PageFsCache in nfs_migrate_page() Signed-off-by: Trond Myklebust Acked-by: David Howells --- fs/nfs/write.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b54b8bb101f..d63d964a0392 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1598,8 +1598,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct nfs_page *req; int ret; - if (PageFsCache(page)) - nfs_fscache_release_page(page, GFP_KERNEL); + nfs_fscache_release_page(page, GFP_KERNEL); req = nfs_find_and_lock_request(page); ret = PTR_ERR(req); From fdcb45777a3d1689c5541e1f85ee3ebbd197d2c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:40 -0500 Subject: [PATCH 379/640] NFS: Fix the mapping of the NFSERR_SERVERFAULT error It was recently pointed out that the NFSERR_SERVERFAULT error, which is designed to inform the user of a serious internal error on the server, was being mapped to an error value that is internal to the kernel. This patch maps it to the error EREMOTEIO, which is exported to userland through errno.h. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/mount_clnt.c | 2 +- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs4xdr.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 0adefc40cc89..59047f8d7d72 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -120,7 +120,7 @@ static struct { { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, - { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, + { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, }, }; struct mountres { diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5e078b222b4e..7bc2da8efd4a 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -699,7 +699,7 @@ static struct { { NFSERR_BAD_COOKIE, -EBADCOOKIE }, { NFSERR_NOTSUPP, -ENOTSUPP }, { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -ESERVERFAULT }, + { NFSERR_SERVERFAULT, -EREMOTEIO }, { NFSERR_BADTYPE, -EBADTYPE }, { NFSERR_JUKEBOX, -EJUKEBOX }, { -1, -EIO } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e437fd6a819f..5cd5184b56db 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4631,7 +4631,7 @@ static int decode_sequence(struct xdr_stream *xdr, * If the server returns different values for sessionID, slotID or * sequence number, the server is looney tunes. */ - status = -ESERVERFAULT; + status = -EREMOTEIO; if (memcmp(id.data, res->sr_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { @@ -5774,7 +5774,7 @@ static struct { { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, { NFS4ERR_NOTSUPP, -ENOTSUPP }, { NFS4ERR_TOOSMALL, -ETOOSMALL }, - { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_SERVERFAULT, -EREMOTEIO }, { NFS4ERR_BADTYPE, -EBADTYPE }, { NFS4ERR_LOCKED, -EAGAIN }, { NFS4ERR_SYMLINK, -ELOOP }, @@ -5801,7 +5801,7 @@ nfs4_stat_to_errno(int stat) } if (stat <= 10000 || stat > 10100) { /* The server is looney tunes. */ - return -ESERVERFAULT; + return -EREMOTEIO; } /* If we cannot translate the error, the recovery routines should * handle it. From fed08d036f2aabd8d0c684439de37f8ebec2bbc2 Mon Sep 17 00:00:00 2001 From: Jody Bruchon Date: Sat, 6 Feb 2010 10:46:26 -0500 Subject: [PATCH 380/640] ALSA: hda-intel: Avoid divide by zero crash On my AMD780V chipset, hda_intel.c can crash the kernel with a divide by zero for as-yet unknown reasons. A simple check for zero prevents it, though the problem that causes it remains. Since the workaround is harmless and won't affect anyone except victims of this bug, it should be safe; moreover, because this crash can be triggered by a user-mode application, there are denial of service implications on the systems affected by the bug without the patch. Signed-off-by: Jody Bruchon Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b8faa6dc5abe..e767c3f395ab 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1893,6 +1893,12 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) if (!bdl_pos_adj[chip->dev_index]) return 1; /* no delayed ack */ + if (azx_dev->period_bytes == 0) { + printk(KERN_WARNING + "hda-intel: Divide by zero was avoided " + "in azx_dev->period_bytes.\n"); + return 0; + } if (pos % azx_dev->period_bytes > azx_dev->period_bytes / 2) return 0; /* NG - it's below the period boundary */ return 1; /* OK, it's fine */ From 39c9bfb453b748ce220ceefacbe2a5c19fabf67b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 9 Feb 2010 10:22:29 +1000 Subject: [PATCH 381/640] drm/nv50: prevent multiple init tables being parsed at the same time With DVI and DP plugged, the DVI clock change interrupts being run can cause DP link training to fail. This adds a spinlock around init table parsing to prevent this. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bios.c | 17 +++++++++-------- drivers/gpu/drm/nouveau/nouveau_bios.h | 2 ++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index fb4793e65ff2..2cd0fad17dac 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -3765,7 +3765,6 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent, */ struct drm_nouveau_private *dev_priv = dev->dev_private; - struct init_exec iexec = {true, false}; struct nvbios *bios = &dev_priv->VBIOS; uint8_t *table = &bios->data[bios->display.script_table_ptr]; uint8_t *otable = NULL; @@ -3845,8 +3844,6 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent, } } - bios->display.output = dcbent; - if (pxclk == 0) { script = ROM16(otable[6]); if (!script) { @@ -3855,7 +3852,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent, } NV_TRACE(dev, "0x%04X: parsing output script 0\n", script); - parse_init_table(bios, script, &iexec); + nouveau_bios_run_init_table(dev, script, dcbent); } else if (pxclk == -1) { script = ROM16(otable[8]); @@ -3865,7 +3862,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent, } NV_TRACE(dev, "0x%04X: parsing output script 1\n", script); - parse_init_table(bios, script, &iexec); + nouveau_bios_run_init_table(dev, script, dcbent); } else if (pxclk == -2) { if (table[4] >= 12) @@ -3878,7 +3875,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent, } NV_TRACE(dev, "0x%04X: parsing output script 2\n", script); - parse_init_table(bios, script, &iexec); + nouveau_bios_run_init_table(dev, script, dcbent); } else if (pxclk > 0) { script = ROM16(otable[table[4] + i*6 + 2]); @@ -3890,7 +3887,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent, } NV_TRACE(dev, "0x%04X: parsing clock script 0\n", script); - parse_init_table(bios, script, &iexec); + nouveau_bios_run_init_table(dev, script, dcbent); } else if (pxclk < 0) { script = ROM16(otable[table[4] + i*6 + 4]); @@ -3902,7 +3899,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent, } NV_TRACE(dev, "0x%04X: parsing clock script 1\n", script); - parse_init_table(bios, script, &iexec); + nouveau_bios_run_init_table(dev, script, dcbent); } return 0; @@ -5864,10 +5861,13 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table, struct drm_nouveau_private *dev_priv = dev->dev_private; struct nvbios *bios = &dev_priv->VBIOS; struct init_exec iexec = { true, false }; + unsigned long flags; + spin_lock_irqsave(&bios->lock, flags); bios->display.output = dcbent; parse_init_table(bios, table, &iexec); bios->display.output = NULL; + spin_unlock_irqrestore(&bios->lock, flags); } static bool NVInitVBIOS(struct drm_device *dev) @@ -5876,6 +5876,7 @@ static bool NVInitVBIOS(struct drm_device *dev) struct nvbios *bios = &dev_priv->VBIOS; memset(bios, 0, sizeof(struct nvbios)); + spin_lock_init(&bios->lock); bios->dev = dev; if (!NVShadowVBIOS(dev, bios->data)) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h index 058e98c76d89..68446fd4146b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.h +++ b/drivers/gpu/drm/nouveau/nouveau_bios.h @@ -205,6 +205,8 @@ struct nvbios { struct drm_device *dev; struct nouveau_bios_info pub; + spinlock_t lock; + uint8_t data[NV_PROM_SIZE]; unsigned int length; bool execute; From 5025b43120b629bdf11087a3c652dc9cbe172191 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 9 Feb 2010 12:30:35 +1000 Subject: [PATCH 382/640] drm/nv50: disregard dac outputs in nv50_sor_dpms() Fixes DVI+VGA on my 9400, and likely a lot of other configurations that got broken by the previos DVI-over-DP fix. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_sor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c index ecf1936b8224..c2fff543b06f 100644 --- a/drivers/gpu/drm/nouveau/nv50_sor.c +++ b/drivers/gpu/drm/nouveau/nv50_sor.c @@ -101,6 +101,7 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode) struct nouveau_encoder *nvenc = nouveau_encoder(enc); if (nvenc == nv_encoder || + nvenc->disconnect != nv50_sor_disconnect || nvenc->dcb->or != nv_encoder->dcb->or) continue; From eb1dba0ebaa5b7642b323fac148f9947522a48a8 Mon Sep 17 00:00:00 2001 From: Maarten Maathuis Date: Sun, 27 Dec 2009 12:22:07 +0100 Subject: [PATCH 383/640] drm/nv50: align size of buffer object to the right boundaries. - In the current situation the padding that is added is dangerous to write to, userspace could potentially overwrite parts of another bo. - Depth and stencil buffers are supposed to be large enough in general so the waste of memory should be acceptable. - Alternatives are hiding the padding from users or splitting vram into 2 zones. Signed-off-by: Maarten Maathuis Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index db0ed4c13f98..028719fddf76 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -65,8 +65,10 @@ nouveau_bo_fixup_align(struct drm_device *dev, /* * Some of the tile_flags have a periodic structure of N*4096 bytes, - * align to to that as well as the page size. Overallocate memory to - * avoid corruption of other buffer objects. + * align to to that as well as the page size. Align the size to the + * appropriate boundaries. This does imply that sizes are rounded up + * 3-7 pages, so be aware of this and do not waste memory by allocating + * many small buffers. */ if (dev_priv->card_type == NV_50) { uint32_t block_size = nouveau_mem_fb_amount(dev) >> 15; @@ -77,22 +79,20 @@ nouveau_bo_fixup_align(struct drm_device *dev, case 0x2800: case 0x4800: case 0x7a00: - *size = roundup(*size, block_size); if (is_power_of_2(block_size)) { - *size += 3 * block_size; for (i = 1; i < 10; i++) { *align = 12 * i * block_size; if (!(*align % 65536)) break; } } else { - *size += 6 * block_size; for (i = 1; i < 10; i++) { *align = 8 * i * block_size; if (!(*align % 65536)) break; } } + *size = roundup(*size, *align); break; default: break; From a51a3bf50d41708388f51ce63d965c0e77726eab Mon Sep 17 00:00:00 2001 From: Maarten Maathuis Date: Mon, 1 Feb 2010 18:32:09 +0100 Subject: [PATCH 384/640] drm/nv50: avoid unloading pgraph context when ctxprog is running - We need to disable pgraph fifo access before checking the current channel, otherwise we could still hit a running ctxprog. - The writes to 0x400500 are already handled by pgraph->fifo_access and are therefore redundant, moreover pgraph fifo access should not be reenabled before current context is set as invalid. So remove them altogether. Signed-off-by: Maarten Maathuis Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_channel.c | 7 +++---- drivers/gpu/drm/nouveau/nv50_graph.c | 10 +++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c index 343d718a9667..2281f99da7fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_channel.c +++ b/drivers/gpu/drm/nouveau/nouveau_channel.c @@ -278,12 +278,11 @@ nouveau_channel_free(struct nouveau_channel *chan) /* Ensure the channel is no longer active on the GPU */ pfifo->reassign(dev, false); - if (pgraph->channel(dev) == chan) { - pgraph->fifo_access(dev, false); + pgraph->fifo_access(dev, false); + if (pgraph->channel(dev) == chan) pgraph->unload_context(dev); - pgraph->fifo_access(dev, true); - } pgraph->destroy_context(chan); + pgraph->fifo_access(dev, true); if (pfifo->channel_id(dev) == chan->id) { pfifo->disable(dev); diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c index 20319e59d368..6d504801b514 100644 --- a/drivers/gpu/drm/nouveau/nv50_graph.c +++ b/drivers/gpu/drm/nouveau/nv50_graph.c @@ -165,6 +165,12 @@ nv50_graph_channel(struct drm_device *dev) uint32_t inst; int i; + /* Be sure we're not in the middle of a context switch or bad things + * will happen, such as unloading the wrong pgraph context. + */ + if (!nv_wait(0x400300, 0x00000001, 0x00000000)) + NV_ERROR(dev, "Ctxprog is still running\n"); + inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR); if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED)) return NULL; @@ -275,7 +281,7 @@ nv50_graph_load_context(struct nouveau_channel *chan) int nv50_graph_unload_context(struct drm_device *dev) { - uint32_t inst, fifo = nv_rd32(dev, 0x400500); + uint32_t inst; inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR); if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED)) @@ -283,12 +289,10 @@ nv50_graph_unload_context(struct drm_device *dev) inst &= NV50_PGRAPH_CTXCTL_CUR_INSTANCE; nouveau_wait_for_idle(dev); - nv_wr32(dev, 0x400500, fifo & ~1); nv_wr32(dev, 0x400784, inst); nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x20); nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 0x01); nouveau_wait_for_idle(dev); - nv_wr32(dev, 0x400500, fifo); nv_wr32(dev, NV50_PGRAPH_CTXCTL_CUR, inst); return 0; From a87ff62a80a6a65fc664cd410061910b8c52b896 Mon Sep 17 00:00:00 2001 From: Maarten Maathuis Date: Mon, 1 Feb 2010 18:47:52 +0100 Subject: [PATCH 385/640] drm/nv50: delete ramfc object after disabling fifo, not before ramfc is zero'ed upon destruction, so it's safer to do things in the right order. Signed-off-by: Maarten Maathuis Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_fifo.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c index 32b244bcb482..204a79ff10f4 100644 --- a/drivers/gpu/drm/nouveau/nv50_fifo.c +++ b/drivers/gpu/drm/nouveau/nv50_fifo.c @@ -317,17 +317,20 @@ void nv50_fifo_destroy_context(struct nouveau_channel *chan) { struct drm_device *dev = chan->dev; + struct nouveau_gpuobj_ref *ramfc = chan->ramfc; NV_DEBUG(dev, "ch%d\n", chan->id); - nouveau_gpuobj_ref_del(dev, &chan->ramfc); - nouveau_gpuobj_ref_del(dev, &chan->cache); - + /* This will ensure the channel is seen as disabled. */ + chan->ramfc = NULL; nv50_fifo_channel_disable(dev, chan->id, false); /* Dummy channel, also used on ch 127 */ if (chan->id == 0) nv50_fifo_channel_disable(dev, 127, false); + + nouveau_gpuobj_ref_del(dev, &ramfc); + nouveau_gpuobj_ref_del(dev, &chan->cache); } int From b1d37aa0aa43c5bf857364093ab2191acd37f2ec Mon Sep 17 00:00:00 2001 From: Maarten Maathuis Date: Wed, 20 Jan 2010 19:54:34 +0100 Subject: [PATCH 386/640] drm/nv50: make the pgraph irq handler loop like the pre-nv50 version Unset the bit that indicates that a ctxprog can continue at the end. Signed-off-by: Maarten Maathuis Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_irq.c | 133 ++++++++++++++------------ 1 file changed, 73 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c index baa9b3e0b66b..447f9f69d6b1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_irq.c +++ b/drivers/gpu/drm/nouveau/nouveau_irq.c @@ -580,86 +580,99 @@ nouveau_pgraph_irq_handler(struct drm_device *dev) static void nv50_pgraph_irq_handler(struct drm_device *dev) { - uint32_t status, nsource; + uint32_t status; - status = nv_rd32(dev, NV03_PGRAPH_INTR); - nsource = nv_rd32(dev, NV03_PGRAPH_NSOURCE); + while ((status = nv_rd32(dev, NV03_PGRAPH_INTR))) { + uint32_t nsource = nv_rd32(dev, NV03_PGRAPH_NSOURCE); - if (status & 0x00000001) { - nouveau_pgraph_intr_notify(dev, nsource); - status &= ~0x00000001; - nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000001); - } + if (status & 0x00000001) { + nouveau_pgraph_intr_notify(dev, nsource); + status &= ~0x00000001; + nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000001); + } - if (status & 0x00000010) { - nouveau_pgraph_intr_error(dev, nsource | - NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD); + if (status & 0x00000010) { + nouveau_pgraph_intr_error(dev, nsource | + NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD); - status &= ~0x00000010; - nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000010); - } + status &= ~0x00000010; + nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000010); + } - if (status & 0x00001000) { - nv_wr32(dev, 0x400500, 0x00000000); - nv_wr32(dev, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH); - nv_wr32(dev, NV40_PGRAPH_INTR_EN, nv_rd32(dev, - NV40_PGRAPH_INTR_EN) & ~NV_PGRAPH_INTR_CONTEXT_SWITCH); - nv_wr32(dev, 0x400500, 0x00010001); + if (status & 0x00001000) { + nv_wr32(dev, 0x400500, 0x00000000); + nv_wr32(dev, NV03_PGRAPH_INTR, + NV_PGRAPH_INTR_CONTEXT_SWITCH); + nv_wr32(dev, NV40_PGRAPH_INTR_EN, nv_rd32(dev, + NV40_PGRAPH_INTR_EN) & + ~NV_PGRAPH_INTR_CONTEXT_SWITCH); + nv_wr32(dev, 0x400500, 0x00010001); - nv50_graph_context_switch(dev); + nv50_graph_context_switch(dev); - status &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH; - } + status &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH; + } - if (status & 0x00100000) { - nouveau_pgraph_intr_error(dev, nsource | - NV03_PGRAPH_NSOURCE_DATA_ERROR); + if (status & 0x00100000) { + nouveau_pgraph_intr_error(dev, nsource | + NV03_PGRAPH_NSOURCE_DATA_ERROR); - status &= ~0x00100000; - nv_wr32(dev, NV03_PGRAPH_INTR, 0x00100000); - } + status &= ~0x00100000; + nv_wr32(dev, NV03_PGRAPH_INTR, 0x00100000); + } - if (status & 0x00200000) { - int r; + if (status & 0x00200000) { + int r; - nouveau_pgraph_intr_error(dev, nsource | - NV03_PGRAPH_NSOURCE_PROTECTION_ERROR); + nouveau_pgraph_intr_error(dev, nsource | + NV03_PGRAPH_NSOURCE_PROTECTION_ERROR); - NV_ERROR(dev, "magic set 1:\n"); - for (r = 0x408900; r <= 0x408910; r += 4) - NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r)); - nv_wr32(dev, 0x408900, nv_rd32(dev, 0x408904) | 0xc0000000); - for (r = 0x408e08; r <= 0x408e24; r += 4) - NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r)); - nv_wr32(dev, 0x408e08, nv_rd32(dev, 0x408e08) | 0xc0000000); + NV_ERROR(dev, "magic set 1:\n"); + for (r = 0x408900; r <= 0x408910; r += 4) + NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, + nv_rd32(dev, r)); + nv_wr32(dev, 0x408900, + nv_rd32(dev, 0x408904) | 0xc0000000); + for (r = 0x408e08; r <= 0x408e24; r += 4) + NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, + nv_rd32(dev, r)); + nv_wr32(dev, 0x408e08, + nv_rd32(dev, 0x408e08) | 0xc0000000); - NV_ERROR(dev, "magic set 2:\n"); - for (r = 0x409900; r <= 0x409910; r += 4) - NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r)); - nv_wr32(dev, 0x409900, nv_rd32(dev, 0x409904) | 0xc0000000); - for (r = 0x409e08; r <= 0x409e24; r += 4) - NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r)); - nv_wr32(dev, 0x409e08, nv_rd32(dev, 0x409e08) | 0xc0000000); + NV_ERROR(dev, "magic set 2:\n"); + for (r = 0x409900; r <= 0x409910; r += 4) + NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, + nv_rd32(dev, r)); + nv_wr32(dev, 0x409900, + nv_rd32(dev, 0x409904) | 0xc0000000); + for (r = 0x409e08; r <= 0x409e24; r += 4) + NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, + nv_rd32(dev, r)); + nv_wr32(dev, 0x409e08, + nv_rd32(dev, 0x409e08) | 0xc0000000); - status &= ~0x00200000; - nv_wr32(dev, NV03_PGRAPH_NSOURCE, nsource); - nv_wr32(dev, NV03_PGRAPH_INTR, 0x00200000); - } + status &= ~0x00200000; + nv_wr32(dev, NV03_PGRAPH_NSOURCE, nsource); + nv_wr32(dev, NV03_PGRAPH_INTR, 0x00200000); + } - if (status) { - NV_INFO(dev, "Unhandled PGRAPH_INTR - 0x%08x\n", status); - nv_wr32(dev, NV03_PGRAPH_INTR, status); - } + if (status) { + NV_INFO(dev, "Unhandled PGRAPH_INTR - 0x%08x\n", + status); + nv_wr32(dev, NV03_PGRAPH_INTR, status); + } - { - const int isb = (1 << 16) | (1 << 0); + { + const int isb = (1 << 16) | (1 << 0); - if ((nv_rd32(dev, 0x400500) & isb) != isb) - nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) | isb); - nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) & ~(1 << 31)); + if ((nv_rd32(dev, 0x400500) & isb) != isb) + nv_wr32(dev, 0x400500, + nv_rd32(dev, 0x400500) | isb); + } } nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PGRAPH_PENDING); + nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) & ~(1 << 31)); } static void From cf9db6c41f739a294286847aab1e85f39aef1781 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 8 Feb 2010 20:35:02 -0600 Subject: [PATCH 387/640] x86-32: Make AT_VECTOR_SIZE_ARCH=2 Both x86-32 and x86-64 with 32-bit compat use ARCH_DLINFO_IA32, which defines two saved_auxv entries. But system.h only defines AT_VECTOR_SIZE_ARCH as 2 for CONFIG_IA32_EMULATION, not for CONFIG_X86_32. Fix that. Signed-off-by: Serge E. Hallyn LKML-Reference: <20100209023502.GA15408@us.ibm.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/system.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index ecb544e65382..e04740f7a0bb 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -11,9 +11,9 @@ #include /* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION +#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) # define AT_VECTOR_SIZE_ARCH 2 -#else +#else /* else it's non-compat x86-64 */ # define AT_VECTOR_SIZE_ARCH 1 #endif From f036d9f3985a529a81e582f68aa984eb7b20d54d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Feb 2010 16:18:40 -0800 Subject: [PATCH 388/640] sparc: Align clone and signal stacks to 16 bytes. This is mandatory for 64-bit processes, and doing it also for 32-bit processes saves a conditional in the compat case. This fixes the glibc/nptl/tst-stdio1 test case, as well as many others, on 64-bit. Signed-off-by: David S. Miller --- arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/process_64.c | 8 ++++---- arch/sparc/kernel/signal32.c | 10 ++++++---- arch/sparc/kernel/signal_32.c | 6 ++++-- arch/sparc/kernel/signal_64.c | 8 +++++--- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 2830b415e214..f23c8fda6503 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -526,7 +526,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, * Set some valid stack frames to give to the child. */ childstack = (struct sparc_stackf __user *) - (sp & ~0x7UL); + (sp & ~0x15UL); parentstack = (struct sparc_stackf __user *) regs->u_regs[UREG_FP]; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 18d67854a1b8..6679eebfd2e8 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -406,11 +406,11 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) } else __get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6])); - /* Now 8-byte align the stack as this is mandatory in the - * Sparc ABI due to how register windows work. This hides - * the restriction from thread libraries etc. -DaveM + /* Now align the stack as this is mandatory in the Sparc ABI + * due to how register windows work. This hides the + * restriction from thread libraries etc. */ - csp &= ~7UL; + csp &= ~15UL; distance = fp - psp; rval = (csp - distance); diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index ba5b09ad6666..ea22cd373c64 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -120,8 +120,8 @@ struct rt_signal_frame32 { }; /* Align macros */ -#define SF_ALIGNEDSZ (((sizeof(struct signal_frame32) + 7) & (~7))) -#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame32) + 7) & (~7))) +#define SF_ALIGNEDSZ (((sizeof(struct signal_frame32) + 15) & (~15))) +#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame32) + 15) & (~15))) int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { @@ -420,15 +420,17 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns sp = current->sas_ss_sp + current->sas_ss_size; } + sp -= framesize; + /* Always align the stack frame. This handles two cases. First, * sigaltstack need not be mindful of platform specific stack * alignment. Second, if we took this signal because the stack * is not aligned properly, we'd like to take the signal cleanly * and report that. */ - sp &= ~7UL; + sp &= ~15UL; - return (void __user *)(sp - framesize); + return (void __user *) sp; } static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 7ce1a1005b1d..9882df92ba0a 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -267,15 +267,17 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re sp = current->sas_ss_sp + current->sas_ss_size; } + sp -= framesize; + /* Always align the stack frame. This handles two cases. First, * sigaltstack need not be mindful of platform specific stack * alignment. Second, if we took this signal because the stack * is not aligned properly, we'd like to take the signal cleanly * and report that. */ - sp &= ~7UL; + sp &= ~15UL; - return (void __user *)(sp - framesize); + return (void __user *) sp; } static inline int diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 647afbda7ae1..9fa48c30037e 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -353,7 +353,7 @@ segv: /* Checks if the fp is valid */ static int invalid_frame_pointer(void __user *fp, int fplen) { - if (((unsigned long) fp) & 7) + if (((unsigned long) fp) & 15) return 1; return 0; } @@ -396,15 +396,17 @@ static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs * sp = current->sas_ss_sp + current->sas_ss_size; } + sp -= framesize; + /* Always align the stack frame. This handles two cases. First, * sigaltstack need not be mindful of platform specific stack * alignment. Second, if we took this signal because the stack * is not aligned properly, we'd like to take the signal cleanly * and report that. */ - sp &= ~7UL; + sp &= ~15UL; - return (void __user *)(sp - framesize); + return (void __user *) sp; } static inline void From ef286f6fa673cd7fb367e1b145069d8dbfcc6081 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 9 Feb 2010 16:34:14 +1100 Subject: [PATCH 389/640] md: fix some lockdep issues between md and sysfs. ====== This fix is related to http://bugzilla.kernel.org/show_bug.cgi?id=15142 but does not address that exact issue. ====== sysfs does like attributes being removed while they are being accessed (i.e. read or written) and waits for the access to complete. As accessing some md attributes takes the same lock that is held while removing those attributes a deadlock can occur. This patch addresses 3 issues in md that could lead to this deadlock. Two relate to calling flush_scheduled_work while the lock is held. This is probably a bad idea in general and as we use schedule_work to delete various sysfs objects it is particularly bad. In one case flush_scheduled_work is called from md_alloc (called by md_probe) called from do_md_run which holds the lock. This call is only present to ensure that ->gendisk is set. However we can be sure that gendisk is always set (though possibly we couldn't when that code was originally written. This is because do_md_run is called in three different contexts: 1/ from md_ioctl. This requires that md_open has succeeded, and it fails if ->gendisk is not set. 2/ from writing a sysfs attribute. This can only happen if the mddev has been registered in sysfs which happens in md_alloc after ->gendisk has been set. 3/ from autorun_array which is only called by autorun_devices, which checks for ->gendisk to be set before calling autorun_array. So the call to md_probe in do_md_run can be removed, and the check on ->gendisk can also go. In the other case flush_scheduled_work is being called in do_md_stop, purportedly to wait for all md_delayed_delete calls (which delete the component rdevs) to complete. However there really isn't any need to wait for them - they have already been disconnected in all important ways. The third issue is that raid5->stop() removes some attribute names while the lock is held. There is already some infrastructure in place to delay attribute removal until after the lock is released (using schedule_work). So extend that infrastructure to remove the raid5_attrs_group. This does not address all lockdep issues related to the sysfs "s_active" lock. The rest can be address by splitting that lockdep context between symlinks and non-symlinks which hopefully will happen. Signed-off-by: NeilBrown --- drivers/md/md.c | 14 +++++--------- drivers/md/raid5.c | 3 +-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index dd3dfe42d5a9..a20a71e5efd3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4075,8 +4075,10 @@ static void mddev_delayed_delete(struct work_struct *ws) { mddev_t *mddev = container_of(ws, mddev_t, del_work); - if (mddev->private == &md_redundancy_group) { + if (mddev->private) { sysfs_remove_group(&mddev->kobj, &md_redundancy_group); + if (mddev->private != (void*)1) + sysfs_remove_group(&mddev->kobj, mddev->private); if (mddev->sysfs_action) sysfs_put(mddev->sysfs_action); mddev->sysfs_action = NULL; @@ -4287,10 +4289,7 @@ static int do_md_run(mddev_t * mddev) sysfs_notify_dirent(rdev->sysfs_state); } - md_probe(mddev->unit, NULL, NULL); disk = mddev->gendisk; - if (!disk) - return -ENOMEM; spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -4530,8 +4529,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->queue->unplug_fn = NULL; mddev->queue->backing_dev_info.congested_fn = NULL; module_put(mddev->pers->owner); - if (mddev->pers->sync_request) - mddev->private = &md_redundancy_group; + if (mddev->pers->sync_request && mddev->private == NULL) + mddev->private = (void*)1; mddev->pers = NULL; /* tell userspace to handle 'inactive' */ sysfs_notify_dirent(mddev->sysfs_state); @@ -4578,9 +4577,6 @@ out: } mddev->bitmap_info.offset = 0; - /* make sure all md_delayed_delete calls have finished */ - flush_scheduled_work(); - export_array(mddev); mddev->array_sectors = 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b5629c3e14fa..ceb24afdc147 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5136,9 +5136,8 @@ static int stop(mddev_t *mddev) mddev->thread = NULL; mddev->queue->backing_dev_info.congested_fn = NULL; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ - sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); free_conf(conf); - mddev->private = NULL; + mddev->private = &raid5_attrs_group; return 0; } From 77058e1adcc439151db41f2b84e4867a88113cd8 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 8 Feb 2010 20:09:03 +0000 Subject: [PATCH 390/640] powerpc: Fix address masking bug in hpte_need_flush() Commit f71dc176aa06359681c30ba6877ffccab6fba3a6 'Make hpte_need_flush() correctly mask for multiple page sizes' introduced bug, which is triggered when a kernel with a 64k base page size is run on a system whose hardware does not 64k hash PTEs. In this case, we emulate 64k pages with multiple 4k hash PTEs, however in hpte_need_flush() we incorrectly only mask the hardware page size from the address, instead of the logical page size. This causes things to go wrong when we later attempt to iterate through the hardware subpages of the logical page. This patch corrects the error. It has been tested on pSeries bare metal by Michael Neuling. Signed-off-by: David Gibson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/tlb_hash64.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 282d9306361f..1ec06576f619 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -63,15 +63,21 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, if (huge) { #ifdef CONFIG_HUGETLB_PAGE psize = get_slice_psize(mm, addr); + /* Mask the address for the correct page size */ + addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ #endif - } else + } else { psize = pte_pagesize_index(mm, addr, pte); + /* Mask the address for the standard page size. If we + * have a 64k page kernel, but the hardware does not + * support 64k pages, this might be different from the + * hardware page size encoded in the slice table. */ + addr &= PAGE_MASK; + } - /* Mask the address for the correct page size */ - addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); /* Build full vaddr */ if (!is_kernel_addr(addr)) { From 681ee44d40d7c93b42118320e4620d07d8704fd6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 9 Feb 2010 18:01:44 -0800 Subject: [PATCH 391/640] x86, apic: Don't use logical-flat mode when CPU hotplug may exceed 8 CPUs We need to fall back from logical-flat APIC mode to physical-flat mode when we have more than 8 CPUs. However, in the presence of CPU hotplug(with bios listing not enabled but possible cpus as disabled cpus in MADT), we have to consider the number of possible CPUs rather than the number of current CPUs; otherwise we may cross the 8-CPU boundary when CPUs are added later. 32bit apic code can use more cleanups (like the removal of vendor checks in 32bit default_setup_apic_routing()) and more unifications with 64bit code. Yinghai has some patches in works already. This patch addresses the boot issue that is reported in the virtualization guest context. [ hpa: incorporated function annotation feedback from Yinghai Lu ] Signed-off-by: Suresh Siddha LKML-Reference: <1265767304.2833.19.camel@sbs-t61.sc.intel.com> Acked-by: Shaohui Zheng Reviewed-by: Yinghai Lu Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/boot.c | 5 ----- arch/x86/kernel/apic/apic.c | 17 ----------------- arch/x86/kernel/apic/probe_32.c | 29 +++++++++++++++++++++++++++-- arch/x86/kernel/apic/probe_64.c | 2 +- arch/x86/kernel/mpparse.c | 7 ------- arch/x86/kernel/smpboot.c | 2 -- 6 files changed, 28 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 036d28adf59d..0acbcdfa5ca4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1185,9 +1185,6 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_BIGSMP - generic_bigsmp_probe(); -#endif /* * Parse MADT IO-APIC entries */ @@ -1197,8 +1194,6 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; - if (apic->setup_apic_routing) - apic->setup_apic_routing(); } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3987e4408f75..dfca210f6a10 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void) #endif enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif verify_local_APIC(); connect_bsp_APIC(); @@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; -#ifdef CONFIG_X86_32 - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1a6559f6768c..99d2fe016084 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -52,7 +52,32 @@ static int __init print_ipi_mode(void) } late_initcall(print_ipi_mode); -void default_setup_apic_routing(void) +void __init default_setup_apic_routing(void) +{ + int version = apic_version[boot_cpu_physical_apicid]; + + if (num_possible_cpus() > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } + +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); +#endif + + if (apic->setup_apic_routing) + apic->setup_apic_routing(); +} + +static void setup_apic_flat_routing(void) { #ifdef CONFIG_X86_IO_APIC printk(KERN_INFO @@ -103,7 +128,7 @@ struct apic apic_default = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = default_setup_apic_routing, + .setup_apic_routing = setup_apic_flat_routing, .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 450fe2064a14..83e9be4778e2 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void) } #endif - if (apic == &apic_flat && num_processors > 8) + if (apic == &apic_flat && num_possible_cpus() > 8) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 40b54ceb68b5..a2c1edd2d3ac 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) x86_init.mpparse.mpc_record(1); } -#ifdef CONFIG_X86_BIGSMP - generic_bigsmp_probe(); -#endif - - if (apic->setup_apic_routing) - apic->setup_apic_routing(); - if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8c26f3..b4e870cbdc60 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1083,9 +1083,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); From f79f11852831ba8837e82b73364e6f1cd0145499 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Feb 2010 16:14:04 +0100 Subject: [PATCH 392/640] compat_ioctl: ignore RAID_VERSION ioctl md ioctls are now handled by the md driver itself, but mdadm may call RAID_VERSION on other devices as well. Mark the command as IGNORE_IOCTL so this fails silently rather than printing an annoying message. Reported-by: "Michael S. Tsirkin" Cc: "Rafael J. Wysocki" Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c5c45de1a2ee..b6f23b25370e 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1038,6 +1038,8 @@ COMPATIBLE_IOCTL(FIOQSIZE) #ifdef CONFIG_BLOCK /* loop */ IGNORE_IOCTL(LOOP_CLR_FD) +/* md calls this on random blockdevs */ +IGNORE_IOCTL(RAID_VERSION) /* SG stuff */ COMPATIBLE_IOCTL(SG_SET_TIMEOUT) COMPATIBLE_IOCTL(SG_GET_TIMEOUT) From a6c7fdd29350a74ba5f76809436de9c3d6763009 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 4 Feb 2010 13:06:59 +0200 Subject: [PATCH 393/640] OMAP: hsmmc: fix memory leak The platform data allocated with kmalloc() will become unreachable once the init is complete, so it should be freed. The problem was discovered by kmemleak. Signed-off-by: Aaro Koskinen Acked-by: Adrian Hunter Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/mmc-twl4030.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c index 0c3c72d934bf..8afe9dd3f150 100644 --- a/arch/arm/mach-omap2/mmc-twl4030.c +++ b/arch/arm/mach-omap2/mmc-twl4030.c @@ -408,6 +408,7 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers) { struct twl4030_hsmmc_info *c; int nr_hsmmc = ARRAY_SIZE(hsmmc_data); + int i; if (cpu_is_omap2430()) { control_pbias_offset = OMAP243X_CONTROL_PBIAS_LITE; @@ -434,7 +435,7 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers) mmc = kzalloc(sizeof(struct omap_mmc_platform_data), GFP_KERNEL); if (!mmc) { pr_err("Cannot allocate memory for mmc device!\n"); - return; + goto done; } if (c->name) @@ -532,6 +533,10 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers) continue; c->dev = mmc->dev; } + +done: + for (i = 0; i < nr_hsmmc; i++) + kfree(hsmmc_data[i]); } #endif From 174b24963eaf96dc5e093502ee09639aed13eb2f Mon Sep 17 00:00:00 2001 From: Jelle Martijn Kok Date: Wed, 10 Feb 2010 09:34:09 -0600 Subject: [PATCH 394/640] rtl8187: Add new device ID Add new RTL8187B device. Signed-off-by: Larry Finger Cc: Stable Signed-off-by: John W. Linville --- drivers/net/wireless/rtl818x/rtl8187_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c index bc5726dd5fe4..7ba3052b0708 100644 --- a/drivers/net/wireless/rtl818x/rtl8187_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c @@ -65,6 +65,7 @@ static struct usb_device_id rtl8187_table[] __devinitdata = { /* Sitecom */ {USB_DEVICE(0x0df6, 0x000d), .driver_info = DEVICE_RTL8187}, {USB_DEVICE(0x0df6, 0x0028), .driver_info = DEVICE_RTL8187B}, + {USB_DEVICE(0x0df6, 0x0029), .driver_info = DEVICE_RTL8187B}, /* Sphairon Access Systems GmbH */ {USB_DEVICE(0x114B, 0x0150), .driver_info = DEVICE_RTL8187}, /* Dick Smith Electronics */ From d6cade0f7f40834ff3b48f2469d00b1be0ea0db6 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Tue, 9 Feb 2010 23:37:54 +0000 Subject: [PATCH 395/640] via-velocity: Remove unused IRQ status parameter from rx_srv and tx_srv Signed-off-by: Simon Kagstrom Signed-off-by: David S. Miller --- drivers/net/via-velocity.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index c93f58f5c6f2..133069738ba0 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -1877,13 +1877,12 @@ static void velocity_error(struct velocity_info *vptr, int status) /** * tx_srv - transmit interrupt service * @vptr; Velocity - * @status: * * Scan the queues looking for transmitted packets that * we can complete and clean up. Update any statistics as * necessary/ */ -static int velocity_tx_srv(struct velocity_info *vptr, u32 status) +static int velocity_tx_srv(struct velocity_info *vptr) { struct tx_desc *td; int qnum; @@ -2090,14 +2089,12 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx) /** * velocity_rx_srv - service RX interrupt * @vptr: velocity - * @status: adapter status (unused) * * Walk the receive ring of the velocity adapter and remove * any received packets from the receive queue. Hand the ring * slots back to the adapter for reuse. */ -static int velocity_rx_srv(struct velocity_info *vptr, int status, - int budget_left) +static int velocity_rx_srv(struct velocity_info *vptr, int budget_left) { struct net_device_stats *stats = &vptr->dev->stats; int rd_curr = vptr->rx.curr; @@ -2165,10 +2162,10 @@ static int velocity_poll(struct napi_struct *napi, int budget) * Do rx and tx twice for performance (taken from the VIA * out-of-tree driver). */ - rx_done = velocity_rx_srv(vptr, isr_status, budget / 2); - velocity_tx_srv(vptr, isr_status); - rx_done += velocity_rx_srv(vptr, isr_status, budget - rx_done); - velocity_tx_srv(vptr, isr_status); + rx_done = velocity_rx_srv(vptr, budget / 2); + velocity_tx_srv(vptr); + rx_done += velocity_rx_srv(vptr, budget - rx_done); + velocity_tx_srv(vptr); spin_unlock(&vptr->lock); @@ -3100,7 +3097,7 @@ static int velocity_resume(struct pci_dev *pdev) velocity_init_registers(vptr, VELOCITY_INIT_WOL); mac_disable_int(vptr->mac_regs); - velocity_tx_srv(vptr, 0); + velocity_tx_srv(vptr); for (i = 0; i < vptr->tx.numq; i++) { if (vptr->tx.used[i]) From 39c2ff43ea3830ccc693f965abdace96e514b1c5 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Tue, 9 Feb 2010 23:38:07 +0000 Subject: [PATCH 396/640] via-velocity: Take spinlock on set coalesce velocity_set_coalesce touches ISR and some other sensitive registers not covered by the rtnl lock, so take the velocity spinlock. Signed-off-by: Simon Kagstrom Signed-off-by: David S. Miller --- drivers/net/via-velocity.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 133069738ba0..54bafdab1f9d 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -3341,6 +3341,7 @@ static int velocity_set_coalesce(struct net_device *dev, { struct velocity_info *vptr = netdev_priv(dev); int max_us = 0x3f * 64; + unsigned long flags; /* 6 bits of */ if (ecmd->tx_coalesce_usecs > max_us) @@ -3362,6 +3363,7 @@ static int velocity_set_coalesce(struct net_device *dev, ecmd->tx_coalesce_usecs); /* Setup the interrupt suppression and queue timers */ + spin_lock_irqsave(&vptr->lock, flags); mac_disable_int(vptr->mac_regs); setup_adaptive_interrupts(vptr); setup_queue_timers(vptr); @@ -3369,6 +3371,7 @@ static int velocity_set_coalesce(struct net_device *dev, mac_write_int_mask(vptr->int_mask, vptr->mac_regs); mac_clear_isr(vptr->mac_regs); mac_enable_int(vptr->mac_regs); + spin_unlock_irqrestore(&vptr->lock, flags); return 0; } From 3f2e8d9f13246382fbda6f03178eef867a9bfbe2 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Tue, 9 Feb 2010 23:38:25 +0000 Subject: [PATCH 397/640] via-velocity: Fix races on shared interrupts This patch fixes two potential races in the velocity driver: * Move the ACK and error handler to the interrupt handler. This fixes a potential race with shared interrupts when the other device interrupts before the NAPI poll handler has finished. As the velocity driver hasn't acked it's own interrupt, it will then steal the interrupt from the other device. * Use spin_lock_irqsave in velocity_poll. In the current code, the interrupt handler will deadlock if e.g., the NAPI poll handler is executing when an interrupt (for another device) comes in since it tries to take the already held lock. Also unlock the spinlock only after enabling the interrupt in velocity_poll. The error path is moved to the interrupt handler since this is where the ISR is checked now. Signed-off-by: Simon Kagstrom Signed-off-by: Anders Grafstrom Signed-off-by: David S. Miller --- drivers/net/via-velocity.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 54bafdab1f9d..317aa34b21cf 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -2148,16 +2148,9 @@ static int velocity_poll(struct napi_struct *napi, int budget) struct velocity_info *vptr = container_of(napi, struct velocity_info, napi); unsigned int rx_done; - u32 isr_status; - - spin_lock(&vptr->lock); - isr_status = mac_read_isr(vptr->mac_regs); - - /* Ack the interrupt */ - mac_write_isr(vptr->mac_regs, isr_status); - if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI))) - velocity_error(vptr, isr_status); + unsigned long flags; + spin_lock_irqsave(&vptr->lock, flags); /* * Do rx and tx twice for performance (taken from the VIA * out-of-tree driver). @@ -2167,13 +2160,12 @@ static int velocity_poll(struct napi_struct *napi, int budget) rx_done += velocity_rx_srv(vptr, budget - rx_done); velocity_tx_srv(vptr); - spin_unlock(&vptr->lock); - /* If budget not fully consumed, exit the polling mode */ if (rx_done < budget) { napi_complete(napi); mac_enable_int(vptr->mac_regs); } + spin_unlock_irqrestore(&vptr->lock, flags); return rx_done; } @@ -2203,10 +2195,17 @@ static irqreturn_t velocity_intr(int irq, void *dev_instance) return IRQ_NONE; } + /* Ack the interrupt */ + mac_write_isr(vptr->mac_regs, isr_status); + if (likely(napi_schedule_prep(&vptr->napi))) { mac_disable_int(vptr->mac_regs); __napi_schedule(&vptr->napi); } + + if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI))) + velocity_error(vptr, isr_status); + spin_unlock(&vptr->lock); return IRQ_HANDLED; From 8f98781e0f15207b6ab33bee1fae05428be0475b Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 10 Feb 2010 17:32:38 +0100 Subject: [PATCH 398/640] async-tx: fix buffer submission error handling in ipu_idma.c If submitting new buffer failed, a wrong descriptor gets completed and it doesn't check, if a callback is at all defined, which can lead to an Oops. Fix these bugs and make ipu_update_channel_buffer() void, because it never fails. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Dan Williams --- drivers/dma/ipu/ipu_idmac.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index 9a5bc1a7389e..e80bae1673fa 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -761,12 +761,10 @@ static void ipu_select_buffer(enum ipu_channel channel, int buffer_n) * @buffer_n: buffer number to update. * 0 or 1 are the only valid values. * @phyaddr: buffer physical address. - * @return: Returns 0 on success or negative error code on failure. This - * function will fail if the buffer is set to ready. */ /* Called under spin_lock(_irqsave)(&ichan->lock) */ -static int ipu_update_channel_buffer(struct idmac_channel *ichan, - int buffer_n, dma_addr_t phyaddr) +static void ipu_update_channel_buffer(struct idmac_channel *ichan, + int buffer_n, dma_addr_t phyaddr) { enum ipu_channel channel = ichan->dma_chan.chan_id; uint32_t reg; @@ -806,8 +804,6 @@ static int ipu_update_channel_buffer(struct idmac_channel *ichan, } spin_unlock_irqrestore(&ipu_data.lock, flags); - - return 0; } /* Called under spin_lock_irqsave(&ichan->lock) */ @@ -816,7 +812,6 @@ static int ipu_submit_buffer(struct idmac_channel *ichan, { unsigned int chan_id = ichan->dma_chan.chan_id; struct device *dev = &ichan->dma_chan.dev->device; - int ret; if (async_tx_test_ack(&desc->txd)) return -EINTR; @@ -827,14 +822,7 @@ static int ipu_submit_buffer(struct idmac_channel *ichan, * could make it conditional on status >= IPU_CHANNEL_ENABLED, but * doing it again shouldn't hurt either. */ - ret = ipu_update_channel_buffer(ichan, buf_idx, - sg_dma_address(sg)); - - if (ret < 0) { - dev_err(dev, "Updating sg %p on channel 0x%x buffer %d failed!\n", - sg, chan_id, buf_idx); - return ret; - } + ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg)); ipu_select_buffer(chan_id, buf_idx); dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n", @@ -1379,10 +1367,11 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id) if (likely(sgnew) && ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) { - callback = desc->txd.callback; - callback_param = desc->txd.callback_param; + callback = descnew->txd.callback; + callback_param = descnew->txd.callback_param; spin_unlock(&ichan->lock); - callback(callback_param); + if (callback) + callback(callback_param); spin_lock(&ichan->lock); } From 734c2992828c66cee3feb21ecd30a6ac44aecc51 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 6 Feb 2010 09:43:41 +0100 Subject: [PATCH 399/640] drivers/dma: Correct NULL test cohd_fin has already been verified not to be NULL, so the argument to BUG_ON cannot be true. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ expression *x; expression e; identifier l; @@ if (x == NULL || ...) { ... when forall return ...; } ... when != goto l; when != x = e when != &x *x == NULL // Signed-off-by: Julia Lawall Acked-by: Linus Walleij Signed-off-by: Dan Williams --- drivers/dma/coh901318.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index b5f2ee0f8e2c..64a937262a40 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -613,8 +613,6 @@ static void dma_tasklet(unsigned long data) cohd_fin->pending_irqs--; cohc->completed = cohd_fin->desc.cookie; - BUG_ON(cohc->nbr_active_done && cohd_fin == NULL); - if (cohc->nbr_active_done == 0) return; From 8523c0480979080e8088e40f25459e5b2d19f621 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 8 Feb 2010 16:41:15 -0800 Subject: [PATCH 400/640] RDMA/cm: Revert association of an RDMA device when binding to loopback Revert the following change from commit 6f8372b6 ("RDMA/cm: fix loopback address support") The defined behavior of rdma_bind_addr is to associate an RDMA device with an rdma_cm_id, as long as the user specified a non- zero address. (ie they weren't just trying to reserve a port) Currently, if the loopback address is passed to rdma_bind_addr, no device is associated with the rdma_cm_id. Fix this. It turns out that important apps such as Open MPI depend on rdma_bind_addr() NOT associating any RDMA device when binding to a loopback address. Open MPI is being updated to deal with this, but at least until a new Open MPI release is available, maintain the previous behavior: allow rdma_bind_addr() to succeed, but do not bind to a device. Signed-off-by: Sean Hefty Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index cc9b5940fa97..875e34e0b235 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2115,9 +2115,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (ret) goto err1; - if (cma_loopback_addr(addr)) { - ret = cma_bind_loopback(id_priv); - } else if (!cma_zero_addr(addr)) { + if (!cma_any_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); if (ret) goto err1; From b91ad0ec52770dcb622b94fc1f57e076686f427a Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 5 Feb 2010 09:14:17 +0800 Subject: [PATCH 401/640] drm/i915: Rework DPLL calculation parameters for Ironlake Got Ironlake DPLL parameter table, which reflects the hardware optimized values. So this one trys to list DPLL parameters for different output types, should potential fix clock issue seen on new Arrandale CPUs. This fixes DPLL setting failure on one 1920x1080 dual channel LVDS for Ironlake. Test has also been made on LVDS panels with smaller size and CRT/HDMI/DP ports for different monitors on their all supported modes. Update: - Change name of double LVDS to dual LVDS. - Fix SSC 120M reference clock to use the right range. Cc: CSJ Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 217 +++++++++++++++++++++------ 1 file changed, 167 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 12775df1bbfd..7e9c835f9ae0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -240,33 +240,86 @@ struct intel_limit { #define IRONLAKE_DOT_MAX 350000 #define IRONLAKE_VCO_MIN 1760000 #define IRONLAKE_VCO_MAX 3510000 -#define IRONLAKE_N_MIN 1 -#define IRONLAKE_N_MAX 6 -#define IRONLAKE_M_MIN 79 -#define IRONLAKE_M_MAX 127 #define IRONLAKE_M1_MIN 12 #define IRONLAKE_M1_MAX 22 #define IRONLAKE_M2_MIN 5 #define IRONLAKE_M2_MAX 9 -#define IRONLAKE_P_SDVO_DAC_MIN 5 -#define IRONLAKE_P_SDVO_DAC_MAX 80 -#define IRONLAKE_P_LVDS_MIN 28 -#define IRONLAKE_P_LVDS_MAX 112 -#define IRONLAKE_P1_MIN 1 -#define IRONLAKE_P1_MAX 8 -#define IRONLAKE_P2_SDVO_DAC_SLOW 10 -#define IRONLAKE_P2_SDVO_DAC_FAST 5 -#define IRONLAKE_P2_LVDS_SLOW 14 /* single channel */ -#define IRONLAKE_P2_LVDS_FAST 7 /* double channel */ #define IRONLAKE_P2_DOT_LIMIT 225000 /* 225Mhz */ -#define IRONLAKE_P_DISPLAY_PORT_MIN 10 -#define IRONLAKE_P_DISPLAY_PORT_MAX 20 -#define IRONLAKE_P2_DISPLAY_PORT_FAST 10 -#define IRONLAKE_P2_DISPLAY_PORT_SLOW 10 -#define IRONLAKE_P2_DISPLAY_PORT_LIMIT 0 -#define IRONLAKE_P1_DISPLAY_PORT_MIN 1 -#define IRONLAKE_P1_DISPLAY_PORT_MAX 2 +/* We have parameter ranges for different type of outputs. */ + +/* DAC & HDMI Refclk 120Mhz */ +#define IRONLAKE_DAC_N_MIN 1 +#define IRONLAKE_DAC_N_MAX 5 +#define IRONLAKE_DAC_M_MIN 79 +#define IRONLAKE_DAC_M_MAX 127 +#define IRONLAKE_DAC_P_MIN 5 +#define IRONLAKE_DAC_P_MAX 80 +#define IRONLAKE_DAC_P1_MIN 1 +#define IRONLAKE_DAC_P1_MAX 8 +#define IRONLAKE_DAC_P2_SLOW 10 +#define IRONLAKE_DAC_P2_FAST 5 + +/* LVDS single-channel 120Mhz refclk */ +#define IRONLAKE_LVDS_S_N_MIN 1 +#define IRONLAKE_LVDS_S_N_MAX 3 +#define IRONLAKE_LVDS_S_M_MIN 79 +#define IRONLAKE_LVDS_S_M_MAX 118 +#define IRONLAKE_LVDS_S_P_MIN 28 +#define IRONLAKE_LVDS_S_P_MAX 112 +#define IRONLAKE_LVDS_S_P1_MIN 2 +#define IRONLAKE_LVDS_S_P1_MAX 8 +#define IRONLAKE_LVDS_S_P2_SLOW 14 +#define IRONLAKE_LVDS_S_P2_FAST 14 + +/* LVDS dual-channel 120Mhz refclk */ +#define IRONLAKE_LVDS_D_N_MIN 1 +#define IRONLAKE_LVDS_D_N_MAX 3 +#define IRONLAKE_LVDS_D_M_MIN 79 +#define IRONLAKE_LVDS_D_M_MAX 127 +#define IRONLAKE_LVDS_D_P_MIN 14 +#define IRONLAKE_LVDS_D_P_MAX 56 +#define IRONLAKE_LVDS_D_P1_MIN 2 +#define IRONLAKE_LVDS_D_P1_MAX 8 +#define IRONLAKE_LVDS_D_P2_SLOW 7 +#define IRONLAKE_LVDS_D_P2_FAST 7 + +/* LVDS single-channel 100Mhz refclk */ +#define IRONLAKE_LVDS_S_SSC_N_MIN 1 +#define IRONLAKE_LVDS_S_SSC_N_MAX 2 +#define IRONLAKE_LVDS_S_SSC_M_MIN 79 +#define IRONLAKE_LVDS_S_SSC_M_MAX 126 +#define IRONLAKE_LVDS_S_SSC_P_MIN 28 +#define IRONLAKE_LVDS_S_SSC_P_MAX 112 +#define IRONLAKE_LVDS_S_SSC_P1_MIN 2 +#define IRONLAKE_LVDS_S_SSC_P1_MAX 8 +#define IRONLAKE_LVDS_S_SSC_P2_SLOW 14 +#define IRONLAKE_LVDS_S_SSC_P2_FAST 14 + +/* LVDS dual-channel 100Mhz refclk */ +#define IRONLAKE_LVDS_D_SSC_N_MIN 1 +#define IRONLAKE_LVDS_D_SSC_N_MAX 3 +#define IRONLAKE_LVDS_D_SSC_M_MIN 79 +#define IRONLAKE_LVDS_D_SSC_M_MAX 126 +#define IRONLAKE_LVDS_D_SSC_P_MIN 14 +#define IRONLAKE_LVDS_D_SSC_P_MAX 42 +#define IRONLAKE_LVDS_D_SSC_P1_MIN 2 +#define IRONLAKE_LVDS_D_SSC_P1_MAX 6 +#define IRONLAKE_LVDS_D_SSC_P2_SLOW 7 +#define IRONLAKE_LVDS_D_SSC_P2_FAST 7 + +/* DisplayPort */ +#define IRONLAKE_DP_N_MIN 1 +#define IRONLAKE_DP_N_MAX 2 +#define IRONLAKE_DP_M_MIN 81 +#define IRONLAKE_DP_M_MAX 90 +#define IRONLAKE_DP_P_MIN 10 +#define IRONLAKE_DP_P_MAX 20 +#define IRONLAKE_DP_P2_FAST 10 +#define IRONLAKE_DP_P2_SLOW 10 +#define IRONLAKE_DP_P2_LIMIT 0 +#define IRONLAKE_DP_P1_MIN 1 +#define IRONLAKE_DP_P1_MAX 2 static bool intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, @@ -474,33 +527,78 @@ static const intel_limit_t intel_limits_pineview_lvds = { .find_pll = intel_find_best_PLL, }; -static const intel_limit_t intel_limits_ironlake_sdvo = { +static const intel_limit_t intel_limits_ironlake_dac = { .dot = { .min = IRONLAKE_DOT_MIN, .max = IRONLAKE_DOT_MAX }, .vco = { .min = IRONLAKE_VCO_MIN, .max = IRONLAKE_VCO_MAX }, - .n = { .min = IRONLAKE_N_MIN, .max = IRONLAKE_N_MAX }, - .m = { .min = IRONLAKE_M_MIN, .max = IRONLAKE_M_MAX }, + .n = { .min = IRONLAKE_DAC_N_MIN, .max = IRONLAKE_DAC_N_MAX }, + .m = { .min = IRONLAKE_DAC_M_MIN, .max = IRONLAKE_DAC_M_MAX }, .m1 = { .min = IRONLAKE_M1_MIN, .max = IRONLAKE_M1_MAX }, .m2 = { .min = IRONLAKE_M2_MIN, .max = IRONLAKE_M2_MAX }, - .p = { .min = IRONLAKE_P_SDVO_DAC_MIN, .max = IRONLAKE_P_SDVO_DAC_MAX }, - .p1 = { .min = IRONLAKE_P1_MIN, .max = IRONLAKE_P1_MAX }, + .p = { .min = IRONLAKE_DAC_P_MIN, .max = IRONLAKE_DAC_P_MAX }, + .p1 = { .min = IRONLAKE_DAC_P1_MIN, .max = IRONLAKE_DAC_P1_MAX }, .p2 = { .dot_limit = IRONLAKE_P2_DOT_LIMIT, - .p2_slow = IRONLAKE_P2_SDVO_DAC_SLOW, - .p2_fast = IRONLAKE_P2_SDVO_DAC_FAST }, + .p2_slow = IRONLAKE_DAC_P2_SLOW, + .p2_fast = IRONLAKE_DAC_P2_FAST }, .find_pll = intel_g4x_find_best_PLL, }; -static const intel_limit_t intel_limits_ironlake_lvds = { +static const intel_limit_t intel_limits_ironlake_single_lvds = { .dot = { .min = IRONLAKE_DOT_MIN, .max = IRONLAKE_DOT_MAX }, .vco = { .min = IRONLAKE_VCO_MIN, .max = IRONLAKE_VCO_MAX }, - .n = { .min = IRONLAKE_N_MIN, .max = IRONLAKE_N_MAX }, - .m = { .min = IRONLAKE_M_MIN, .max = IRONLAKE_M_MAX }, + .n = { .min = IRONLAKE_LVDS_S_N_MIN, .max = IRONLAKE_LVDS_S_N_MAX }, + .m = { .min = IRONLAKE_LVDS_S_M_MIN, .max = IRONLAKE_LVDS_S_M_MAX }, .m1 = { .min = IRONLAKE_M1_MIN, .max = IRONLAKE_M1_MAX }, .m2 = { .min = IRONLAKE_M2_MIN, .max = IRONLAKE_M2_MAX }, - .p = { .min = IRONLAKE_P_LVDS_MIN, .max = IRONLAKE_P_LVDS_MAX }, - .p1 = { .min = IRONLAKE_P1_MIN, .max = IRONLAKE_P1_MAX }, + .p = { .min = IRONLAKE_LVDS_S_P_MIN, .max = IRONLAKE_LVDS_S_P_MAX }, + .p1 = { .min = IRONLAKE_LVDS_S_P1_MIN, .max = IRONLAKE_LVDS_S_P1_MAX }, .p2 = { .dot_limit = IRONLAKE_P2_DOT_LIMIT, - .p2_slow = IRONLAKE_P2_LVDS_SLOW, - .p2_fast = IRONLAKE_P2_LVDS_FAST }, + .p2_slow = IRONLAKE_LVDS_S_P2_SLOW, + .p2_fast = IRONLAKE_LVDS_S_P2_FAST }, + .find_pll = intel_g4x_find_best_PLL, +}; + +static const intel_limit_t intel_limits_ironlake_dual_lvds = { + .dot = { .min = IRONLAKE_DOT_MIN, .max = IRONLAKE_DOT_MAX }, + .vco = { .min = IRONLAKE_VCO_MIN, .max = IRONLAKE_VCO_MAX }, + .n = { .min = IRONLAKE_LVDS_D_N_MIN, .max = IRONLAKE_LVDS_D_N_MAX }, + .m = { .min = IRONLAKE_LVDS_D_M_MIN, .max = IRONLAKE_LVDS_D_M_MAX }, + .m1 = { .min = IRONLAKE_M1_MIN, .max = IRONLAKE_M1_MAX }, + .m2 = { .min = IRONLAKE_M2_MIN, .max = IRONLAKE_M2_MAX }, + .p = { .min = IRONLAKE_LVDS_D_P_MIN, .max = IRONLAKE_LVDS_D_P_MAX }, + .p1 = { .min = IRONLAKE_LVDS_D_P1_MIN, .max = IRONLAKE_LVDS_D_P1_MAX }, + .p2 = { .dot_limit = IRONLAKE_P2_DOT_LIMIT, + .p2_slow = IRONLAKE_LVDS_D_P2_SLOW, + .p2_fast = IRONLAKE_LVDS_D_P2_FAST }, + .find_pll = intel_g4x_find_best_PLL, +}; + +static const intel_limit_t intel_limits_ironlake_single_lvds_100m = { + .dot = { .min = IRONLAKE_DOT_MIN, .max = IRONLAKE_DOT_MAX }, + .vco = { .min = IRONLAKE_VCO_MIN, .max = IRONLAKE_VCO_MAX }, + .n = { .min = IRONLAKE_LVDS_S_SSC_N_MIN, .max = IRONLAKE_LVDS_S_SSC_N_MAX }, + .m = { .min = IRONLAKE_LVDS_S_SSC_M_MIN, .max = IRONLAKE_LVDS_S_SSC_M_MAX }, + .m1 = { .min = IRONLAKE_M1_MIN, .max = IRONLAKE_M1_MAX }, + .m2 = { .min = IRONLAKE_M2_MIN, .max = IRONLAKE_M2_MAX }, + .p = { .min = IRONLAKE_LVDS_S_SSC_P_MIN, .max = IRONLAKE_LVDS_S_SSC_P_MAX }, + .p1 = { .min = IRONLAKE_LVDS_S_SSC_P1_MIN,.max = IRONLAKE_LVDS_S_SSC_P1_MAX }, + .p2 = { .dot_limit = IRONLAKE_P2_DOT_LIMIT, + .p2_slow = IRONLAKE_LVDS_S_SSC_P2_SLOW, + .p2_fast = IRONLAKE_LVDS_S_SSC_P2_FAST }, + .find_pll = intel_g4x_find_best_PLL, +}; + +static const intel_limit_t intel_limits_ironlake_dual_lvds_100m = { + .dot = { .min = IRONLAKE_DOT_MIN, .max = IRONLAKE_DOT_MAX }, + .vco = { .min = IRONLAKE_VCO_MIN, .max = IRONLAKE_VCO_MAX }, + .n = { .min = IRONLAKE_LVDS_D_SSC_N_MIN, .max = IRONLAKE_LVDS_D_SSC_N_MAX }, + .m = { .min = IRONLAKE_LVDS_D_SSC_M_MIN, .max = IRONLAKE_LVDS_D_SSC_M_MAX }, + .m1 = { .min = IRONLAKE_M1_MIN, .max = IRONLAKE_M1_MAX }, + .m2 = { .min = IRONLAKE_M2_MIN, .max = IRONLAKE_M2_MAX }, + .p = { .min = IRONLAKE_LVDS_D_SSC_P_MIN, .max = IRONLAKE_LVDS_D_SSC_P_MAX }, + .p1 = { .min = IRONLAKE_LVDS_D_SSC_P1_MIN,.max = IRONLAKE_LVDS_D_SSC_P1_MAX }, + .p2 = { .dot_limit = IRONLAKE_P2_DOT_LIMIT, + .p2_slow = IRONLAKE_LVDS_D_SSC_P2_SLOW, + .p2_fast = IRONLAKE_LVDS_D_SSC_P2_FAST }, .find_pll = intel_g4x_find_best_PLL, }; @@ -509,34 +607,53 @@ static const intel_limit_t intel_limits_ironlake_display_port = { .max = IRONLAKE_DOT_MAX }, .vco = { .min = IRONLAKE_VCO_MIN, .max = IRONLAKE_VCO_MAX}, - .n = { .min = IRONLAKE_N_MIN, - .max = IRONLAKE_N_MAX }, - .m = { .min = IRONLAKE_M_MIN, - .max = IRONLAKE_M_MAX }, + .n = { .min = IRONLAKE_DP_N_MIN, + .max = IRONLAKE_DP_N_MAX }, + .m = { .min = IRONLAKE_DP_M_MIN, + .max = IRONLAKE_DP_M_MAX }, .m1 = { .min = IRONLAKE_M1_MIN, .max = IRONLAKE_M1_MAX }, .m2 = { .min = IRONLAKE_M2_MIN, .max = IRONLAKE_M2_MAX }, - .p = { .min = IRONLAKE_P_DISPLAY_PORT_MIN, - .max = IRONLAKE_P_DISPLAY_PORT_MAX }, - .p1 = { .min = IRONLAKE_P1_DISPLAY_PORT_MIN, - .max = IRONLAKE_P1_DISPLAY_PORT_MAX}, - .p2 = { .dot_limit = IRONLAKE_P2_DISPLAY_PORT_LIMIT, - .p2_slow = IRONLAKE_P2_DISPLAY_PORT_SLOW, - .p2_fast = IRONLAKE_P2_DISPLAY_PORT_FAST }, + .p = { .min = IRONLAKE_DP_P_MIN, + .max = IRONLAKE_DP_P_MAX }, + .p1 = { .min = IRONLAKE_DP_P1_MIN, + .max = IRONLAKE_DP_P1_MAX}, + .p2 = { .dot_limit = IRONLAKE_DP_P2_LIMIT, + .p2_slow = IRONLAKE_DP_P2_SLOW, + .p2_fast = IRONLAKE_DP_P2_FAST }, .find_pll = intel_find_pll_ironlake_dp, }; static const intel_limit_t *intel_ironlake_limit(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; const intel_limit_t *limit; - if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) - limit = &intel_limits_ironlake_lvds; - else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) || + int refclk = 120; + + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { + if (dev_priv->lvds_use_ssc && dev_priv->lvds_ssc_freq == 100) + refclk = 100; + + if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == + LVDS_CLKB_POWER_UP) { + /* LVDS dual channel */ + if (refclk == 100) + limit = &intel_limits_ironlake_dual_lvds_100m; + else + limit = &intel_limits_ironlake_dual_lvds; + } else { + if (refclk == 100) + limit = &intel_limits_ironlake_single_lvds_100m; + else + limit = &intel_limits_ironlake_single_lvds; + } + } else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) || HAS_eDP) limit = &intel_limits_ironlake_display_port; else - limit = &intel_limits_ironlake_sdvo; + limit = &intel_limits_ironlake_dac; return limit; } From f4fc580bec5fb76560329c8c537b9b71d8d032b6 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Mon, 1 Feb 2010 17:10:55 +0800 Subject: [PATCH 402/640] MIPS: Fixup of the r4k timer As reported by Maxime Bizon, the commit "MIPS: PowerTV: Fix support for timer interrupts with > 64 external IRQs" have broken the r4k timer since it didn't initialize the cp0_compare_irq_shift variable used in c0_compare_int_pending() on the architectures whose cpu_has_mips_r2 is false. This patch fixes it via initializing the cp0_compare_irq_shift as the cp0_compare_irq used in the old c0_compare_int_pending(). Reported-by: Maxime Bizon Signed-off-by: Wu Zhangjin Cc: David VomLehn Cc: mbizon@freebox.fr Cc: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/922/ Tested-by: Shane McDonald Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 338dfe8ed002..31b204b26ba0 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1501,6 +1501,7 @@ void __cpuinit per_cpu_trap_init(void) cp0_perfcount_irq = -1; } else { cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ; + cp0_compare_irq_shift = cp0_compare_irq; cp0_perfcount_irq = -1; } From 59d302b342e5d451c4448479e82e1105864a3112 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 3 Feb 2010 19:16:34 +0100 Subject: [PATCH 403/640] MIPS: IP27: Make defconfig useful again. RTC support was rewritten but the defconfig files were not updated. Enable IPv6 support which for some folks already is a must have. Assign useful values to other new options. Signed-off-by: Ralf Baechle --- arch/mips/configs/ip27_defconfig | 917 ++++++++++++++++++++++++------- 1 file changed, 729 insertions(+), 188 deletions(-) diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index ed84b4cb3c8d..84b6503f10b9 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.23-rc2 -# Tue Aug 7 13:04:24 2007 +# Linux kernel version: 2.6.33-rc6 +# Wed Feb 3 18:12:31 2010 # CONFIG_MIPS=y @@ -9,20 +9,28 @@ CONFIG_MIPS=y # Machine selection # # CONFIG_MACH_ALCHEMY is not set +# CONFIG_AR7 is not set +# CONFIG_BCM47XX is not set +# CONFIG_BCM63XX is not set # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MACH_JAZZ is not set -# CONFIG_LEMOTE_FULONG is not set +# CONFIG_LASAT is not set +# CONFIG_MACH_LOONGSON is not set # CONFIG_MIPS_MALTA is not set # CONFIG_MIPS_SIM is not set -# CONFIG_MARKEINS is not set +# CONFIG_NEC_MARKEINS is not set # CONFIG_MACH_VR41XX is not set +# CONFIG_NXP_STB220 is not set +# CONFIG_NXP_STB225 is not set # CONFIG_PNX8550_JBS is not set # CONFIG_PNX8550_STB810 is not set # CONFIG_PMC_MSP is not set # CONFIG_PMC_YOSEMITE is not set +# CONFIG_POWERTV is not set # CONFIG_SGI_IP22 is not set CONFIG_SGI_IP27=y +# CONFIG_SGI_IP28 is not set # CONFIG_SGI_IP32 is not set # CONFIG_SIBYTE_CRHINE is not set # CONFIG_SIBYTE_CARMEL is not set @@ -33,32 +41,39 @@ CONFIG_SGI_IP27=y # CONFIG_SIBYTE_SENTOSA is not set # CONFIG_SIBYTE_BIGSUR is not set # CONFIG_SNI_RM is not set -# CONFIG_TOSHIBA_JMR3927 is not set -# CONFIG_TOSHIBA_RBTX4927 is not set -# CONFIG_TOSHIBA_RBTX4938 is not set +# CONFIG_MACH_TX39XX is not set +# CONFIG_MACH_TX49XX is not set +# CONFIG_MIKROTIK_RB532 is not set # CONFIG_WR_PPMC is not set +# CONFIG_CAVIUM_OCTEON_SIMULATOR is not set +# CONFIG_CAVIUM_OCTEON_REFERENCE_BOARD is not set +# CONFIG_ALCHEMY_GPIO_INDIRECT is not set CONFIG_SGI_SN_M_MODE=y # CONFIG_SGI_SN_N_MODE is not set # CONFIG_MAPPED_KERNEL is not set # CONFIG_REPLICATE_KTEXT is not set # CONFIG_REPLICATE_EXHANDLERS is not set +CONFIG_LOONGSON_UART_BASE=y CONFIG_RWSEM_GENERIC_SPINLOCK=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_SUPPORTS_OPROFILE=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_TIME=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_ARC=y CONFIG_DMA_COHERENT=y -CONFIG_EARLY_PRINTK=y CONFIG_SYS_HAS_EARLY_PRINTK=y # CONFIG_NO_IOPORT is not set CONFIG_CPU_BIG_ENDIAN=y # CONFIG_CPU_LITTLE_ENDIAN is not set CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y +CONFIG_DEFAULT_SGI_PARTITION=y CONFIG_MIPS_L1_CACHE_SHIFT=7 CONFIG_ARC64=y CONFIG_BOOT_ELF64=y @@ -66,7 +81,8 @@ CONFIG_BOOT_ELF64=y # # CPU selection # -# CONFIG_CPU_LOONGSON2 is not set +# CONFIG_CPU_LOONGSON2E is not set +# CONFIG_CPU_LOONGSON2F is not set # CONFIG_CPU_MIPS32_R1 is not set # CONFIG_CPU_MIPS32_R2 is not set # CONFIG_CPU_MIPS64_R1 is not set @@ -79,6 +95,7 @@ CONFIG_BOOT_ELF64=y # CONFIG_CPU_TX49XX is not set # CONFIG_CPU_R5000 is not set # CONFIG_CPU_R5432 is not set +# CONFIG_CPU_R5500 is not set # CONFIG_CPU_R6000 is not set # CONFIG_CPU_NEVADA is not set # CONFIG_CPU_R8000 is not set @@ -86,6 +103,7 @@ CONFIG_CPU_R10000=y # CONFIG_CPU_RM7000 is not set # CONFIG_CPU_RM9000 is not set # CONFIG_CPU_SB1 is not set +# CONFIG_CPU_CAVIUM_OCTEON is not set CONFIG_SYS_HAS_CPU_R10000=y CONFIG_SYS_SUPPORTS_64BIT_KERNEL=y CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y @@ -99,6 +117,7 @@ CONFIG_64BIT=y CONFIG_PAGE_SIZE_4KB=y # CONFIG_PAGE_SIZE_8KB is not set # CONFIG_PAGE_SIZE_16KB is not set +# CONFIG_PAGE_SIZE_32KB is not set # CONFIG_PAGE_SIZE_64KB is not set CONFIG_CPU_HAS_PREFETCH=y CONFIG_MIPS_MT_DISABLED=y @@ -110,6 +129,7 @@ CONFIG_GENERIC_IRQ_PROBE=y CONFIG_IRQ_PER_CPU=y CONFIG_CPU_SUPPORTS_HIGHMEM=y CONFIG_ARCH_DISCONTIGMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_NUMA=y CONFIG_SYS_SUPPORTS_NUMA=y CONFIG_NODES_SHIFT=6 @@ -120,16 +140,22 @@ CONFIG_DISCONTIGMEM_MANUAL=y CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y -# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y -CONFIG_RESOURCES_64BIT=y +CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=0 CONFIG_VIRT_TO_BUS=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 CONFIG_SMP=y CONFIG_SYS_SUPPORTS_SMP=y CONFIG_NR_CPUS_DEFAULT_64=y CONFIG_NR_CPUS=64 +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y # CONFIG_HZ_48 is not set # CONFIG_HZ_100 is not set # CONFIG_HZ_128 is not set @@ -142,13 +168,13 @@ CONFIG_HZ=1000 CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y # CONFIG_MIPS_INSANE_LARGE is not set # CONFIG_KEXEC is not set CONFIG_SECCOMP=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y # # General setup @@ -162,20 +188,41 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set # CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_TINY_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=64 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 +# CONFIG_GROUP_SCHED is not set CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +# CONFIG_CGROUP_NS is not set +# CONFIG_CGROUP_FREEZER is not set +# CONFIG_CGROUP_DEVICE is not set CONFIG_CPUSETS=y -CONFIG_SYSFS_DEPRECATED=y +CONFIG_PROC_PID_CPUSET=y +# CONFIG_CGROUP_CPUACCT is not set +# CONFIG_RESOURCE_COUNTERS is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set CONFIG_RELAY=y +# CONFIG_NAMESPACES is not set # CONFIG_BLK_DEV_INITRD is not set # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y CONFIG_EMBEDDED=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y @@ -184,44 +231,92 @@ CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +# CONFIG_PCSPKR_PLATFORM is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y -CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y +CONFIG_AIO=y + +# +# Kernel Performance Events And Counters +# CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y +CONFIG_COMPAT_BRK=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_HAVE_OPROFILE=y +CONFIG_HAVE_SYSCALL_WRAPPERS=y +CONFIG_USE_GENERIC_SMP_HELPERS=y + +# +# GCOV-based kernel profiling +# +CONFIG_SLOW_WORK=y +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set # CONFIG_MODVERSIONS is not set CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set +# CONFIG_BLK_CGROUP is not set +CONFIG_BLOCK_COMPAT=y # # IO Schedulers # CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y -CONFIG_DEFAULT_AS=y +# CONFIG_CFQ_GROUP_IOSCHED is not set # CONFIG_DEFAULT_DEADLINE is not set -# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +CONFIG_INLINE_SPIN_UNLOCK=y +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +CONFIG_INLINE_SPIN_UNLOCK_IRQ=y +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +CONFIG_INLINE_READ_UNLOCK=y +# CONFIG_INLINE_READ_UNLOCK_BH is not set +CONFIG_INLINE_READ_UNLOCK_IRQ=y +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +CONFIG_INLINE_WRITE_UNLOCK=y +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +CONFIG_INLINE_WRITE_UNLOCK_IRQ=y +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +CONFIG_MUTEX_SPIN_ON_OWNER=y +# CONFIG_FREEZER is not set # # Bus options (PCI, PCMCIA, EISA, ISA, TC) @@ -230,11 +325,10 @@ CONFIG_HW_HAS_PCI=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y # CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCI_LEGACY is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set CONFIG_MMU=y - -# -# PCCARD (PCMCIA/CardBus) support -# # CONFIG_PCCARD is not set # CONFIG_HOTPLUG_PCI is not set @@ -242,8 +336,9 @@ CONFIG_MMU=y # Executable file formats # CONFIG_BINFMT_ELF=y +CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +# CONFIG_HAVE_AOUT is not set # CONFIG_BINFMT_MISC is not set -# CONFIG_BUILD_ELF64 is not set CONFIG_MIPS32_COMPAT=y CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y @@ -255,13 +350,10 @@ CONFIG_BINFMT_ELF32=y # Power management options # CONFIG_PM=y -# CONFIG_PM_LEGACY is not set # CONFIG_PM_DEBUG is not set - -# -# Networking -# +# CONFIG_PM_RUNTIME is not set CONFIG_NET=y +CONFIG_COMPAT_NETLINK_MESSAGES=y # # Networking options @@ -273,6 +365,8 @@ CONFIG_XFRM=y CONFIG_XFRM_USER=m # CONFIG_XFRM_SUB_POLICY is not set CONFIG_XFRM_MIGRATE=y +CONFIG_XFRM_STATISTICS=y +CONFIG_XFRM_IPCOMP=m CONFIG_NET_KEY=y CONFIG_NET_KEY_MIGRATE=y CONFIG_INET=y @@ -292,19 +386,40 @@ CONFIG_IP_PNP=y # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set -# CONFIG_INET_TUNNEL is not set +CONFIG_INET_TUNNEL=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_LRO=y CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -# CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_SIT_6RD=y +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_PIMSM_V2=y CONFIG_NETWORK_SECMARK=y # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set @@ -314,9 +429,11 @@ CONFIG_IP_SCTP=m # CONFIG_SCTP_HMAC_NONE is not set # CONFIG_SCTP_HMAC_SHA1 is not set CONFIG_SCTP_HMAC_MD5=y +# CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set # CONFIG_LLC2 is not set @@ -326,12 +443,9 @@ CONFIG_SCTP_HMAC_MD5=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set CONFIG_NET_SCHED=y -CONFIG_NET_SCH_FIFO=y # # Queueing/Scheduling @@ -340,7 +454,7 @@ CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_HFSC=m CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RR=m +CONFIG_NET_SCH_MULTIQ=y CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_SFQ=m CONFIG_NET_SCH_TEQL=m @@ -348,6 +462,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m +# CONFIG_NET_SCH_DRR is not set CONFIG_NET_SCH_INGRESS=m # @@ -364,41 +479,63 @@ CONFIG_NET_CLS_U32=m CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y # CONFIG_NET_EMATCH is not set CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m # CONFIG_NET_ACT_SIMP is not set -CONFIG_NET_CLS_POLICE=y +CONFIG_NET_ACT_SKBEDIT=m # CONFIG_NET_CLS_IND is not set +CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set # # Network testing # # CONFIG_NET_PKTGEN is not set # CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set - -# -# Wireless -# -CONFIG_CFG80211=m +CONFIG_FIB_RULES=y +CONFIG_WIRELESS=y CONFIG_WIRELESS_EXT=y +CONFIG_WEXT_CORE=y +CONFIG_WEXT_PROC=y +CONFIG_WEXT_SPY=y +CONFIG_WEXT_PRIV=y +CONFIG_CFG80211=m +# CONFIG_NL80211_TESTMODE is not set +# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +# CONFIG_CFG80211_REG_DEBUG is not set +CONFIG_CFG80211_DEFAULT_PS=y +# CONFIG_WIRELESS_OLD_REGULATORY is not set +CONFIG_CFG80211_WEXT=y +CONFIG_WIRELESS_EXT_SYSFS=y +CONFIG_LIB80211=m +CONFIG_LIB80211_CRYPT_WEP=m +CONFIG_LIB80211_CRYPT_CCMP=m +CONFIG_LIB80211_CRYPT_TKIP=m +# CONFIG_LIB80211_DEBUG is not set CONFIG_MAC80211=m -# CONFIG_MAC80211_DEBUG is not set -CONFIG_IEEE80211=m -# CONFIG_IEEE80211_DEBUG is not set -CONFIG_IEEE80211_CRYPT_WEP=m -CONFIG_IEEE80211_CRYPT_CCMP=m -CONFIG_IEEE80211_CRYPT_TKIP=m -CONFIG_IEEE80211_SOFTMAC=m -# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set +CONFIG_MAC80211_RC_PID=y +CONFIG_MAC80211_RC_MINSTREL=y +# CONFIG_MAC80211_RC_DEFAULT_PID is not set +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel" +# CONFIG_MAC80211_MESH is not set +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUG_MENU is not set +# CONFIG_WIMAX is not set CONFIG_RFKILL=m +CONFIG_RFKILL_LEDS=y # CONFIG_NET_9P is not set # @@ -408,9 +545,13 @@ CONFIG_RFKILL=m # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_DEVTMPFS is not set CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" # CONFIG_SYS_HYPERVISOR is not set CONFIG_CONNECTOR=m # CONFIG_MTD is not set @@ -423,14 +564,19 @@ CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=m +# CONFIG_BLK_DEV_DRBD is not set # CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_OSD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_RAM is not set CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set CONFIG_ATA_OVER_ETH=m +# CONFIG_BLK_DEV_HD is not set # CONFIG_MISC_DEVICES is not set +CONFIG_EEPROM_93CX6=m +CONFIG_HAVE_IDE=y # CONFIG_IDE is not set # @@ -453,10 +599,6 @@ CONFIG_BLK_DEV_SR=m CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_CHR_DEV_SCH=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# # CONFIG_SCSI_MULTI_LUN is not set CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y @@ -471,11 +613,18 @@ CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SAS_HOST_SMP=y # CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set +# CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set +CONFIG_SCSI_CXGB3_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_BE2ISCSI=m # CONFIG_BLK_DEV_3W_XXXX_RAID is not set +CONFIG_SCSI_HPSA=m # CONFIG_SCSI_3W_9XXX is not set +CONFIG_SCSI_3W_SAS=m # CONFIG_SCSI_ACARD is not set # CONFIG_SCSI_AACRAID is not set # CONFIG_SCSI_AIC7XXX is not set @@ -483,11 +632,21 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_AIC79XX is not set CONFIG_SCSI_AIC94XX=m # CONFIG_AIC94XX_DEBUG is not set +CONFIG_SCSI_MVSAS=m +# CONFIG_SCSI_MVSAS_DEBUG is not set +CONFIG_SCSI_DPT_I2O=m +# CONFIG_SCSI_ADVANSYS is not set # CONFIG_SCSI_ARCMSR is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set +CONFIG_SCSI_MPT2SAS=m +CONFIG_SCSI_MPT2SAS_MAX_SGE=128 +# CONFIG_SCSI_MPT2SAS_LOGGING is not set # CONFIG_SCSI_HPTIOP is not set +CONFIG_LIBFC=m +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_IPS is not set @@ -502,16 +661,31 @@ CONFIG_SCSI_QLOGIC_1280=y # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set +CONFIG_SCSI_PMCRAID=m +# CONFIG_SCSI_PM8001 is not set # CONFIG_SCSI_SRP is not set +CONFIG_SCSI_BFA_FC=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +CONFIG_SCSI_OSD_DPRINT_SENSE=1 +# CONFIG_SCSI_OSD_DEBUG is not set # CONFIG_ATA is not set CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_AUTODETECT=y CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_MD_RAID10=m CONFIG_MD_RAID456=y -CONFIG_MD_RAID5_RESHAPE=y +# CONFIG_MULTICORE_RAID456 is not set +CONFIG_MD_RAID6_PQ=y +# CONFIG_ASYNC_RAID6_TEST is not set CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m @@ -519,36 +693,39 @@ CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_EMC=m -CONFIG_DM_MULTIPATH_RDAC=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m # CONFIG_DM_DELAY is not set - -# -# Fusion MPT device support -# +CONFIG_DM_UEVENT=y # CONFIG_FUSION is not set -# CONFIG_FUSION_SPI is not set -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support # + +# +# You can enable one or both FireWire driver stacks. +# + +# +# The newer stack is recommended. +# # CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_NETDEVICES=y -CONFIG_NETDEVICES_MULTIQUEUE=y CONFIG_IFB=m # CONFIG_DUMMY is not set # CONFIG_BONDING is not set CONFIG_MACVLAN=m # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +CONFIG_VETH=m # CONFIG_ARCNET is not set -CONFIG_PHYLIB=m +CONFIG_PHYLIB=y # # MII PHY device drivers @@ -562,23 +739,51 @@ CONFIG_VITESSE_PHY=m CONFIG_SMSC_PHY=m # CONFIG_BROADCOM_PHY is not set CONFIG_ICPLUS_PHY=m +CONFIG_REALTEK_PHY=m +CONFIG_NATIONAL_PHY=m +CONFIG_STE10XP=m +CONFIG_LSI_ET1011C_PHY=m # CONFIG_FIXED_PHY is not set +CONFIG_MDIO_BITBANG=m CONFIG_NET_ETHERNET=y CONFIG_MII=y CONFIG_AX88796=m +CONFIG_AX88796_93CX6=y CONFIG_SGI_IOC3_ETH=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set +CONFIG_SMC91X=m # CONFIG_DM9000 is not set +CONFIG_ETHOC=m +CONFIG_SMSC911X=m +CONFIG_DNET=m # CONFIG_NET_TULIP is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_NET_PCI is not set +CONFIG_B44=m +CONFIG_B44_PCI_AUTOSELECT=y +CONFIG_B44_PCICORE_AUTOSELECT=y +CONFIG_B44_PCI=y +CONFIG_KS8842=m +CONFIG_KS8851_MLL=m +CONFIG_ATL2=m CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set # CONFIG_E1000 is not set +CONFIG_E1000E=m +CONFIG_IP1000=m +CONFIG_IGB=m +CONFIG_IGBVF=m # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -588,24 +793,75 @@ CONFIG_NETDEV_1000=y # CONFIG_SKY2 is not set CONFIG_VIA_VELOCITY=m # CONFIG_TIGON3 is not set -# CONFIG_BNX2 is not set +CONFIG_BNX2=m +CONFIG_CNIC=m CONFIG_QLA3XXX=m # CONFIG_ATL1 is not set +CONFIG_ATL1E=m +CONFIG_ATL1C=m +CONFIG_JME=m CONFIG_NETDEV_10000=y +CONFIG_MDIO=m # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y CONFIG_CHELSIO_T3=m +CONFIG_ENIC=m +CONFIG_IXGBE=m # CONFIG_IXGB is not set # CONFIG_S2IO is not set +CONFIG_VXGE=m +# CONFIG_VXGE_DEBUG_TRACE_ALL is not set # CONFIG_MYRI10GE is not set CONFIG_NETXEN_NIC=m -# CONFIG_MLX4_CORE is not set +CONFIG_NIU=m +CONFIG_MLX4_EN=m +CONFIG_MLX4_CORE=m +# CONFIG_MLX4_DEBUG is not set +CONFIG_TEHUTI=m +CONFIG_BNX2X=m +CONFIG_QLGE=m +CONFIG_SFC=m +CONFIG_BE2NET=m # CONFIG_TR is not set - -# -# Wireless LAN -# -# CONFIG_WLAN_PRE80211 is not set -CONFIG_WLAN_80211=y +CONFIG_WLAN=y +CONFIG_LIBERTAS_THINFIRM=m +CONFIG_ATMEL=m +CONFIG_PCI_ATMEL=m +CONFIG_PRISM54=m +CONFIG_RTL8180=m +CONFIG_ADM8211=m +# CONFIG_MAC80211_HWSIM is not set +CONFIG_MWL8K=m +CONFIG_ATH_COMMON=m +# CONFIG_ATH_DEBUG is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_ATH9K_HW=m +CONFIG_ATH9K_COMMON=m +CONFIG_ATH9K=m +CONFIG_B43=m +CONFIG_B43_PCI_AUTOSELECT=y +CONFIG_B43_PCICORE_AUTOSELECT=y +CONFIG_B43_PHY_LP=y +CONFIG_B43_LEDS=y +CONFIG_B43_HWRNG=y +# CONFIG_B43_DEBUG is not set +CONFIG_B43LEGACY=m +CONFIG_B43LEGACY_PCI_AUTOSELECT=y +CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y +CONFIG_B43LEGACY_LEDS=y +CONFIG_B43LEGACY_HWRNG=y +# CONFIG_B43LEGACY_DEBUG is not set +CONFIG_B43LEGACY_DMA=y +CONFIG_B43LEGACY_PIO=y +CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y +# CONFIG_B43LEGACY_DMA_MODE is not set +# CONFIG_B43LEGACY_PIO_MODE is not set +CONFIG_HOSTAP=m +CONFIG_HOSTAP_FIRMWARE=y +CONFIG_HOSTAP_FIRMWARE_NVRAM=y +CONFIG_HOSTAP_PLX=m +CONFIG_HOSTAP_PCI=m CONFIG_IPW2100=m CONFIG_IPW2100_MONITOR=y CONFIG_IPW2100_DEBUG=y @@ -615,38 +871,57 @@ CONFIG_IPW2200_RADIOTAP=y CONFIG_IPW2200_PROMISCUOUS=y CONFIG_IPW2200_QOS=y CONFIG_IPW2200_DEBUG=y +CONFIG_LIBIPW=m +# CONFIG_LIBIPW_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLWIFI_SPECTRUM_MEASUREMENT=y +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLAGN=m +CONFIG_IWL4965=y +CONFIG_IWL5000=y +CONFIG_IWL3945=m +CONFIG_IWL3945_SPECTRUM_MEASUREMENT=y CONFIG_LIBERTAS=m # CONFIG_LIBERTAS_DEBUG is not set CONFIG_HERMES=m +# CONFIG_HERMES_CACHE_FW_ON_INIT is not set CONFIG_PLX_HERMES=m CONFIG_TMD_HERMES=m CONFIG_NORTEL_HERMES=m CONFIG_PCI_HERMES=m -CONFIG_ATMEL=m -CONFIG_PCI_ATMEL=m -CONFIG_PRISM54=m -CONFIG_HOSTAP=m -CONFIG_HOSTAP_FIRMWARE=y -CONFIG_HOSTAP_FIRMWARE_NVRAM=y -CONFIG_HOSTAP_PLX=m -CONFIG_HOSTAP_PCI=m -CONFIG_BCM43XX=m -CONFIG_BCM43XX_DEBUG=y -CONFIG_BCM43XX_DMA=y -CONFIG_BCM43XX_PIO=y -CONFIG_BCM43XX_DMA_AND_PIO_MODE=y -# CONFIG_BCM43XX_DMA_MODE is not set -# CONFIG_BCM43XX_PIO_MODE is not set +CONFIG_P54_COMMON=m +CONFIG_P54_PCI=m +CONFIG_P54_LEDS=y +CONFIG_RT2X00=m +CONFIG_RT2400PCI=m +CONFIG_RT2500PCI=m +CONFIG_RT61PCI=m +CONFIG_RT2800PCI_PCI=m +CONFIG_RT2800PCI=m +CONFIG_RT2800_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_HT=y +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2X00_LIB_CRYPTO=y +CONFIG_RT2X00_LIB_LEDS=y +# CONFIG_RT2X00_DEBUG is not set +CONFIG_WL12XX=m +CONFIG_WL1251=m + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_VMXNET3 is not set # CONFIG_ISDN is not set # CONFIG_PHONE is not set @@ -664,13 +939,16 @@ CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=m CONFIG_SERIO_RAW=m +CONFIG_SERIO_ALTERA_PS2=m # CONFIG_GAMEPORT is not set # # Character devices # # CONFIG_VT is not set +CONFIG_DEVKMEM=y # CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_NOZOMI=m # # Serial drivers @@ -693,95 +971,258 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=m -# CONFIG_RTC is not set +CONFIG_HW_RANDOM_TIMERIOMEM=m # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set -# CONFIG_DRM is not set # CONFIG_RAW_DRIVER is not set # CONFIG_TCG_TPM is not set CONFIG_DEVPORT=y -# CONFIG_I2C is not set +CONFIG_I2C=m +CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_COMPAT=y +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_ALGOBIT=m +CONFIG_I2C_ALGOPCA=m # -# SPI support +# I2C Hardware Bus support # + +# +# PC SMBus host controller drivers +# +CONFIG_I2C_ALI1535=m +CONFIG_I2C_ALI1563=m +CONFIG_I2C_ALI15X3=m +CONFIG_I2C_AMD756=m +CONFIG_I2C_AMD8111=m +CONFIG_I2C_I801=m +CONFIG_I2C_ISCH=m +CONFIG_I2C_PIIX4=m +CONFIG_I2C_NFORCE2=m +CONFIG_I2C_SIS5595=m +CONFIG_I2C_SIS630=m +CONFIG_I2C_SIS96X=m +CONFIG_I2C_VIA=m +CONFIG_I2C_VIAPRO=m + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +CONFIG_I2C_OCORES=m +CONFIG_I2C_SIMTEC=m + +# +# External I2C/SMBus adapter drivers +# +CONFIG_I2C_PARPORT_LIGHT=m +CONFIG_I2C_TAOS_EVM=m + +# +# Other I2C/SMBus bus drivers +# +CONFIG_I2C_PCA_PLATFORM=m +CONFIG_I2C_STUB=m + +# +# Miscellaneous I2C Chip support +# +CONFIG_SENSORS_TSL2550=m +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set + +# +# PPS support +# +CONFIG_PPS=m +# CONFIG_PPS_DEBUG is not set # CONFIG_W1 is not set # CONFIG_POWER_SUPPLY is not set # CONFIG_HWMON is not set +CONFIG_THERMAL=m +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +CONFIG_SSB=m +CONFIG_SSB_SPROM=y +CONFIG_SSB_PCIHOST_POSSIBLE=y +CONFIG_SSB_PCIHOST=y +CONFIG_SSB_B43_PCI_BRIDGE=y +# CONFIG_SSB_SILENT is not set +# CONFIG_SSB_DEBUG is not set +CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y +CONFIG_SSB_DRIVER_PCICORE=y +# CONFIG_SSB_DRIVER_MIPS is not set # # Multifunction device drivers # +# CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set -# CONFIG_DVB_CORE is not set -# CONFIG_DAB is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_WM8400 is not set +CONFIG_MFD_WM8350=m +CONFIG_MFD_WM8350_I2C=m +CONFIG_MFD_PCF50633=m +CONFIG_PCF50633_ADC=m +CONFIG_PCF50633_GPIO=m +CONFIG_AB3100_CORE=m +CONFIG_AB3100_OTP=m +# CONFIG_REGULATOR is not set +# CONFIG_MEDIA_SUPPORT is not set # # Graphics support # +# CONFIG_VGA_ARB is not set +# CONFIG_DRM is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_VIDEO_OUTPUT_CONTROL is not set -# CONFIG_FB is not set - -# -# Sound -# # CONFIG_SOUND is not set CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_USB is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# Enable Host or Gadget support to see Inventra options # # -# USB Gadget Support +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_UWB is not set # CONFIG_MMC is not set -# CONFIG_NEW_LEDS is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=m + +# +# LED drivers +# +CONFIG_LEDS_LP3944=m +CONFIG_LEDS_PCA955X=m +CONFIG_LEDS_WM8350=m +CONFIG_LEDS_BD2802=m + +# +# LED Triggers +# +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=m +CONFIG_LEDS_TRIGGER_HEARTBEAT=m +CONFIG_LEDS_TRIGGER_BACKLIGHT=m +CONFIG_LEDS_TRIGGER_DEFAULT_ON=m + +# +# iptables trigger is under Netfilter config (LED target) +# +# CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set -# CONFIG_RTC_CLASS is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set # -# DMA Engine support +# RTC interfaces # -# CONFIG_DMA_ENGINE is not set +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set # -# DMA Clients +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_BQ32K is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set +# CONFIG_RTC_DRV_RX8025 is not set + +# +# SPI RTC drivers # # -# DMA Devices +# Platform RTC drivers # +# CONFIG_RTC_DRV_CMOS is not set +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +CONFIG_RTC_DRV_M48T35=y +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_MSM6242 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_RP5C01 is not set +# CONFIG_RTC_DRV_V3020 is not set +# CONFIG_RTC_DRV_WM8350 is not set +# CONFIG_RTC_DRV_PCF50633 is not set +CONFIG_RTC_DRV_AB3100=m # -# Userspace I/O +# on-CPU RTC drivers # +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set CONFIG_UIO=y # CONFIG_UIO_CIF is not set +# CONFIG_UIO_PDRV is not set +# CONFIG_UIO_PDRV_GENIRQ is not set +CONFIG_UIO_SMX=m +CONFIG_UIO_AEC=m +CONFIG_UIO_SERCOS3=m +CONFIG_UIO_PCI_GENERIC=m + +# +# TI VLYNQ +# +# CONFIG_STAGING is not set # # File systems @@ -792,35 +1233,57 @@ CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4DEV_FS is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_XATTR=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_EXT4_DEBUG is not set CONFIG_JBD=y -CONFIG_JBD_DEBUG=y +CONFIG_JBD2=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y -CONFIG_XFS_SECURITY=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set +# CONFIG_XFS_DEBUG is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +# CONFIG_NILFS2_FS is not set +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set +CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QUOTACTL=y -CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m # CONFIG_AUTOFS4_FS is not set CONFIG_FUSE_FS=m +CONFIG_CUSE=m CONFIG_GENERIC_ACL=y +# +# Caches +# +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +# CONFIG_FSCACHE_HISTOGRAM is not set +# CONFIG_FSCACHE_DEBUG is not set +# CONFIG_FSCACHE_OBJECT_LIST is not set +CONFIG_CACHEFILES=m +# CONFIG_CACHEFILES_DEBUG is not set +# CONFIG_CACHEFILES_HISTOGRAM is not set + # # CD-ROM/DVD Filesystems # @@ -840,16 +1303,13 @@ CONFIG_GENERIC_ACL=y CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y # CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y CONFIG_CONFIGFS_FS=m - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set @@ -859,28 +1319,32 @@ CONFIG_CONFIGFS_FS=m # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +CONFIG_SQUASHFS=m +# CONFIG_SQUASHFS_EMBEDDED is not set +CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +CONFIG_OMFS_FS=m # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# +CONFIG_EXOFS_FS=m +# CONFIG_EXOFS_DEBUG is not set +CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y # CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set # CONFIG_ROOT_NFS is not set +# CONFIG_NFSD is not set CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_BIND34 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -910,35 +1374,37 @@ CONFIG_SGI_PARTITION=y # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # CONFIG_SYSV68_PARTITION is not set - -# -# Native Language Support -# # CONFIG_NLS is not set - -# -# Distributed Lock Manager -# CONFIG_DLM=m # CONFIG_DLM_DEBUG is not set -# -# Profiling support -# -# CONFIG_PROFILING is not set - # # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=2048 # CONFIG_MAGIC_SYSRQ is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_UNUSED_SYMBOLS is not set # CONFIG_DEBUG_FS is not set # CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set -CONFIG_CROSSCOMPILE=y +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y +# CONFIG_FTRACE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +CONFIG_EARLY_PRINTK=y # CONFIG_CMDLINE_BOOL is not set # @@ -947,65 +1413,140 @@ CONFIG_CROSSCOMPILE=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y # CONFIG_SECURITY is not set -CONFIG_XOR_BLOCKS=m -CONFIG_ASYNC_CORE=m -CONFIG_ASYNC_MEMCPY=m -CONFIG_ASYNC_XOR=m +CONFIG_SECURITYFS=y +# CONFIG_DEFAULT_SECURITY_SELINUX is not set +# CONFIG_DEFAULT_SECURITY_SMACK is not set +# CONFIG_DEFAULT_SECURITY_TOMOYO is not set +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" +CONFIG_XOR_BLOCKS=y +CONFIG_ASYNC_CORE=y +CONFIG_ASYNC_MEMCPY=y +CONFIG_ASYNC_XOR=y +CONFIG_ASYNC_PQ=y +CONFIG_ASYNC_RAID6_RECOV=y CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_ABLKCIPHER=m +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=m +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +CONFIG_CRYPTO_GF128MUL=m +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_WORKQUEUE=y +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_AUTHENC=m +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_SEQIV=m + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_CTR=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m + +# +# Hash modes +# CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_VMAC=m + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=m +CONFIG_CRYPTO_GHASH=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_GF128MUL=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_COMMON=m -CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_WP512=m + +# +# Ciphers +# CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_DES=y +CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m + +# +# Compression +# CONFIG_CRYPTO_DEFLATE=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_CRC32C=m -CONFIG_CRYPTO_CAMELLIA=m -# CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m + +# +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_HW=y +CONFIG_CRYPTO_DEV_HIFN_795X=m +# CONFIG_CRYPTO_DEV_HIFN_795X_RNG is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC_CCITT=m -# CONFIG_CRC16 is not set -# CONFIG_CRC_ITU_T is not set +CONFIG_CRC16=y +CONFIG_CRC_T10DIF=m +CONFIG_CRC_ITU_T=m CONFIG_CRC32=y -# CONFIG_CRC7 is not set +CONFIG_CRC7=m CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=m CONFIG_ZLIB_DEFLATE=m -CONFIG_PLIST=y +CONFIG_LZO_COMPRESS=m +CONFIG_LZO_DECOMPRESS=m CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y +CONFIG_NLATTR=y From 63731c964d6cd9de4800891bd33b6f9e47a249bc Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 4 Feb 2010 15:48:49 -0800 Subject: [PATCH 404/640] MIPS: Fix __devinit __cpuinit confusion in cpu_cache_init cpu_cache_init and the things it calls should all be __cpuinit instead of __devinit. Signed-off-by: David Daney To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/938/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-octeon.c | 4 ++-- arch/mips/mm/cache.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 94e05e5733c1..e06f1af760a7 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -174,7 +174,7 @@ static void octeon_flush_cache_page(struct vm_area_struct *vma, * Probe Octeon's caches * */ -static void __devinit probe_octeon(void) +static void __cpuinit probe_octeon(void) { unsigned long icache_size; unsigned long dcache_size; @@ -235,7 +235,7 @@ static void __devinit probe_octeon(void) * Setup the Octeon cache flush routines * */ -void __devinit octeon_cache_init(void) +void __cpuinit octeon_cache_init(void) { extern unsigned long ebase; extern char except_vec2_octeon; diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 102b2dfa542a..e716cafc346d 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -155,7 +155,7 @@ static inline void setup_protection_map(void) protection_map[15] = PAGE_SHARED; } -void __devinit cpu_cache_init(void) +void __cpuinit cpu_cache_init(void) { if (cpu_has_3k_cache) { extern void __weak r3k_cache_init(void); From c2d5b5e525a354987b9c3de3661133f982bf9ba0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 6 Feb 2010 09:42:16 +0100 Subject: [PATCH 405/640] MIPS: SNI: Correct NULL test Test the value that was just allocated rather than the previously tested one. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ expression *x; expression e; identifier l; @@ if (x == NULL || ...) { ... when forall return ...; } ... when != goto l; when != x = e when != &x *x == NULL // Signed-off-by: Julia Lawall To: linux-mips@linux-mips.org To: linux-kernel@vger.kernel.org To: kernel-janitors@vger.kernel.org Patchwork: http://patchwork.linux-mips.org/patch/945/ Acked-by: Thomas Bogendoerfer Signed-off-by: Ralf Baechle --- arch/mips/sni/rm200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c index 46f00691f448..31e2583ec622 100644 --- a/arch/mips/sni/rm200.c +++ b/arch/mips/sni/rm200.c @@ -404,7 +404,7 @@ void __init sni_rm200_i8259_irqs(void) if (!rm200_pic_master) return; rm200_pic_slave = ioremap_nocache(0x160000a0, 4); - if (!rm200_pic_master) { + if (!rm200_pic_slave) { iounmap(rm200_pic_master); return; } From 5b7efa898b357e6ebe4024c520e62024eb969b5f Mon Sep 17 00:00:00 2001 From: David Daney Date: Mon, 8 Feb 2010 12:27:00 -0800 Subject: [PATCH 406/640] MIPS: Don't probe reserved EntryHi bits. The patch that adds cpu_probe_vmbits is erroneously writing to reserved bit 12. Since we are really only probing high bits, don't write this bit with a one. Signed-off-by: David Daney To: linux-mips@linux-mips.org Cc: Guenter Roeck Patchwork: http://patchwork.linux-mips.org/patch/949/ Acked-by: Guenter Roeck Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 9c187a64649b..758ad426c57f 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -287,9 +287,9 @@ static inline int __cpu_has_fpu(void) static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) { #ifdef __NEED_VMBITS_PROBE - write_c0_entryhi(0x3ffffffffffff000ULL); + write_c0_entryhi(0x3fffffffffffe000ULL); back_to_back_c0_hazard(); - c->vmbits = fls64(read_c0_entryhi() & 0x3ffffffffffff000ULL); + c->vmbits = fls64(read_c0_entryhi() & 0x3fffffffffffe000ULL); #endif } From 99fcb766a3a50466fe31d743260a3400c1aee855 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 7 Feb 2010 16:20:18 +0100 Subject: [PATCH 407/640] drm/i915: Update write_domains on active list after flush. Before changing the status of a buffer with a pending write we will await upon a new flush for that buffer. So we can take advantage of any flushes posted whilst the buffer is active and pending processing by the GPU, by clearing its write_domain and updating its last_rendering_seqno -- thus saving a potential flush in deep queues and improves flushing behaviour upon eviction for both GTT space and fences. In order to reduce the time spent searching the active list for matching write_domains, we move those to a separate list whose elements are the buffers belong to the active/flushing list with pending writes. Orignal patch by Chris Wilson , forward-ported by me. In addition to better performance, this also fixes a real bug. Before this changes, i915_gem_evict_everything didn't work as advertised. When the gpu was actually busy and processing request, the flush and subsequent wait would not move active and dirty buffers to the inactive list, but just to the flushing list. Which triggered the BUG_ON at the end of this function. With the more tight dirty buffer tracking, all currently busy and dirty buffers get moved to the inactive list by one i915_gem_flush operation. I've left the BUG_ON I've used to prove this in there. References: Bug 25911 - 2.10.0 causes kernel oops and system hangs http://bugs.freedesktop.org/show_bug.cgi?id=25911 Bug 26101 - [i915] xf86-video-intel 2.10.0 (and git) triggers kernel oops within seconds after login http://bugs.freedesktop.org/show_bug.cgi?id=26101 Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson Tested-by: Adam Lantos Cc: stable@kernel.org Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 11 +++++++++++ drivers/gpu/drm/i915/i915_gem.c | 23 +++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index aaf934d96f21..b99b6a841d95 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -492,6 +492,15 @@ typedef struct drm_i915_private { */ struct list_head flushing_list; + /** + * List of objects currently pending a GPU write flush. + * + * All elements on this list will belong to either the + * active_list or flushing_list, last_rendering_seqno can + * be used to differentiate between the two elements. + */ + struct list_head gpu_write_list; + /** * LRU list of objects which are not in the ringbuffer and * are ready to unbind, but are still in the GTT. @@ -592,6 +601,8 @@ struct drm_i915_gem_object { /** This object's place on the active/flushing/inactive lists */ struct list_head list; + /** This object's place on GPU write list */ + struct list_head gpu_write_list; /** This object's place on the fenced object LRU */ struct list_head fence_list; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b4c8c0230689..11daa618385f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1552,6 +1552,8 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj) else list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); + BUG_ON(!list_empty(&obj_priv->gpu_write_list)); + obj_priv->last_rendering_seqno = 0; if (obj_priv->active) { obj_priv->active = 0; @@ -1622,7 +1624,8 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, struct drm_i915_gem_object *obj_priv, *next; list_for_each_entry_safe(obj_priv, next, - &dev_priv->mm.flushing_list, list) { + &dev_priv->mm.gpu_write_list, + gpu_write_list) { struct drm_gem_object *obj = obj_priv->obj; if ((obj->write_domain & flush_domains) == @@ -1630,6 +1633,7 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, uint32_t old_write_domain = obj->write_domain; obj->write_domain = 0; + list_del_init(&obj_priv->gpu_write_list); i915_gem_object_move_to_active(obj, seqno); trace_i915_gem_object_change_domain(obj, @@ -2084,8 +2088,8 @@ static int i915_gem_evict_everything(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t seqno; int ret; + uint32_t seqno; bool lists_empty; spin_lock(&dev_priv->mm.active_list_lock); @@ -2107,6 +2111,8 @@ i915_gem_evict_everything(struct drm_device *dev) if (ret) return ret; + BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); + ret = i915_gem_evict_from_inactive_list(dev); if (ret) return ret; @@ -2701,7 +2707,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) old_write_domain = obj->write_domain; i915_gem_flush(dev, 0, obj->write_domain); seqno = i915_add_request(dev, NULL, obj->write_domain); - obj->write_domain = 0; + BUG_ON(obj->write_domain); i915_gem_object_move_to_active(obj, seqno); trace_i915_gem_object_change_domain(obj, @@ -3850,16 +3856,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_flush(dev, dev->invalidate_domains, dev->flush_domains); - if (dev->flush_domains) + if (dev->flush_domains & I915_GEM_GPU_DOMAINS) (void)i915_add_request(dev, file_priv, dev->flush_domains); } for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; + struct drm_i915_gem_object *obj_priv = obj->driver_private; uint32_t old_write_domain = obj->write_domain; obj->write_domain = obj->pending_write_domain; + if (obj->write_domain) + list_move_tail(&obj_priv->gpu_write_list, + &dev_priv->mm.gpu_write_list); + else + list_del_init(&obj_priv->gpu_write_list); + trace_i915_gem_object_change_domain(obj, obj->read_domains, old_write_domain); @@ -4370,6 +4383,7 @@ int i915_gem_init_object(struct drm_gem_object *obj) obj_priv->obj = obj; obj_priv->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj_priv->list); + INIT_LIST_HEAD(&obj_priv->gpu_write_list); INIT_LIST_HEAD(&obj_priv->fence_list); obj_priv->madv = I915_MADV_WILLNEED; @@ -4821,6 +4835,7 @@ i915_gem_load(struct drm_device *dev) spin_lock_init(&dev_priv->mm.active_list_lock); INIT_LIST_HEAD(&dev_priv->mm.active_list); INIT_LIST_HEAD(&dev_priv->mm.flushing_list); + INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); INIT_LIST_HEAD(&dev_priv->mm.request_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); From fd2e8ea597222b8f38ae8948776a61ea7958232e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Feb 2010 14:14:36 +0000 Subject: [PATCH 408/640] drm/i915: Increase fb alignment to 64k An untiled framebuffer must be aligned to 64k. This is normally handled by intel_pin_and_fence_fb_obj(), but the intelfb_create() likes to be different and do the pinning itself. However, it aligns the buffer object incorrectly for pre-i965 chipsets causing a PGTBL_ERR when it is installed onto the output. Fixes: KMS error message while initializing modesetting - render error detected: EIR: 0x10 [i915] http://bugs.freedesktop.org/show_bug.cgi?id=22936 Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 371d753e362b..aaabbcbe5905 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -148,7 +148,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width, mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_pin(fbo, PAGE_SIZE); + ret = i915_gem_object_pin(fbo, 64*1024); if (ret) { DRM_ERROR("failed to pin fb: %d\n", ret); goto out_unref; From ee25df2bc379728c45d81e04cf87984db1425edf Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Sat, 6 Feb 2010 10:41:53 -0800 Subject: [PATCH 409/640] drm/i915: handle FBC and self-refresh better On 945, we need to avoid entering self-refresh if the compressor is busy, or we may cause display FIFO underruns leading to ugly flicker. Fixes fdo bug #24314, kernel bug #15043. Tested-by: Alexander Lam Signed-off-by: Jesse Barnes Tested-by: Julien Cristau (fd.o #25371) Cc: stable@kernel.org Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_display.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 847006c5218e..ab1bd2d3d3b6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -338,6 +338,7 @@ #define FBC_CTL_PERIODIC (1<<30) #define FBC_CTL_INTERVAL_SHIFT (16) #define FBC_CTL_UNCOMPRESSIBLE (1<<14) +#define FBC_C3_IDLE (1<<13) #define FBC_CTL_STRIDE_SHIFT (5) #define FBC_CTL_FENCENO (1<<0) #define FBC_COMMAND 0x0320c diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7e9c835f9ae0..a4d382c8bf58 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1031,6 +1031,8 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval) /* enable it... */ fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC; + if (IS_I945GM(dev)) + fbc_ctl |= FBC_C3_IDLE; /* 945 needs special SR handling */ fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT; if (obj_priv->tiling_mode != I915_TILING_NONE) From b1b87f6b65a770a69f3632cf7c1f9182547c1249 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 26 Jan 2010 14:40:05 -0800 Subject: [PATCH 410/640] drm/i915: untangle page flip completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a new page flip is requested, we need to both queue an unpin for the current framebuffer, and also increment the flip pending count on the newly submitted buffer. At flip finish time, we need to unpin the old fb and decrement the flip pending count on the new buffer. The old code was conflating the two, and led to hangs when new direct rendered apps were started, replacing the existing frame buffer. This patch splits out the buffers and prevents the hangs. Signed-off-by: Jesse Barnes Reviewed-by: Chris Wilson Reviewed-by: Kristian Høgsberg Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a4d382c8bf58..dc6ffe82d2cd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4081,7 +4081,8 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) struct intel_unpin_work { struct work_struct work; struct drm_device *dev; - struct drm_gem_object *obj; + struct drm_gem_object *old_fb_obj; + struct drm_gem_object *pending_flip_obj; struct drm_pending_vblank_event *event; int pending; }; @@ -4092,8 +4093,8 @@ static void intel_unpin_work_fn(struct work_struct *__work) container_of(__work, struct intel_unpin_work, work); mutex_lock(&work->dev->struct_mutex); - i915_gem_object_unpin(work->obj); - drm_gem_object_unreference(work->obj); + i915_gem_object_unpin(work->old_fb_obj); + drm_gem_object_unreference(work->old_fb_obj); mutex_unlock(&work->dev->struct_mutex); kfree(work); } @@ -4117,7 +4118,7 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe) work = intel_crtc->unpin_work; if (work == NULL || !work->pending) { if (work && !work->pending) { - obj_priv = work->obj->driver_private; + obj_priv = work->pending_flip_obj->driver_private; DRM_DEBUG_DRIVER("flip finish: %p (%d) not pending?\n", obj_priv, atomic_read(&obj_priv->pending_flip)); @@ -4142,7 +4143,7 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe) spin_unlock_irqrestore(&dev->event_lock, flags); - obj_priv = work->obj->driver_private; + obj_priv = work->pending_flip_obj->driver_private; /* Initial scanout buffer will have a 0 pending flip count */ if ((atomic_read(&obj_priv->pending_flip) == 0) || @@ -4191,7 +4192,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, work->event = event; work->dev = crtc->dev; intel_fb = to_intel_framebuffer(crtc->fb); - work->obj = intel_fb->obj; + work->old_fb_obj = intel_fb->obj; INIT_WORK(&work->work, intel_unpin_work_fn); /* We borrow the event spin lock for protecting unpin_work */ @@ -4220,13 +4221,14 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } /* Reference the old fb object for the scheduled work. */ - drm_gem_object_reference(work->obj); + drm_gem_object_reference(work->old_fb_obj); crtc->fb = fb; i915_gem_object_flush_write_domain(obj); drm_vblank_get(dev, intel_crtc->pipe); obj_priv = obj->driver_private; atomic_inc(&obj_priv->pending_flip); + work->pending_flip_obj = obj; BEGIN_LP_RING(4); OUT_RING(MI_DISPLAY_FLIP | From f072d2e77128c5b332ce217764cf170b660b99dc Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Tue, 9 Feb 2010 09:46:19 +0800 Subject: [PATCH 411/640] drm/i915: fix flip done interrupt on Ironlake On Ironlake plane flip interrupt means flip done event already, the behavior is not like old chips, and perform like other usual interrupt. So only need to handle flip done event when receiving that interrupt. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_irq.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 50ddf4a95c5e..a17d6bdfe63e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -309,22 +309,22 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev) if (de_iir & DE_GSE) ironlake_opregion_gse_intr(dev); - if (de_iir & DE_PLANEA_FLIP_DONE) + if (de_iir & DE_PLANEA_FLIP_DONE) { intel_prepare_page_flip(dev, 0); - - if (de_iir & DE_PLANEB_FLIP_DONE) - intel_prepare_page_flip(dev, 1); - - if (de_iir & DE_PIPEA_VBLANK) { - drm_handle_vblank(dev, 0); intel_finish_page_flip(dev, 0); } - if (de_iir & DE_PIPEB_VBLANK) { - drm_handle_vblank(dev, 1); + if (de_iir & DE_PLANEB_FLIP_DONE) { + intel_prepare_page_flip(dev, 1); intel_finish_page_flip(dev, 1); } + if (de_iir & DE_PIPEA_VBLANK) + drm_handle_vblank(dev, 0); + + if (de_iir & DE_PIPEB_VBLANK) + drm_handle_vblank(dev, 1); + /* check event from PCH */ if ((de_iir & DE_PCH_EVENT) && (pch_iir & SDE_HOTPLUG_MASK)) { From aacef09b59e99d9e919ede74d107d5d7f3721432 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Tue, 9 Feb 2010 09:46:20 +0800 Subject: [PATCH 412/640] drm/i915: fix pipe source image setting in flip command The MI_DISPLAY_FLIP command needs to be set the same pipe source image like in pipe source register, e.g source image size minus one. This fixes screen corrupt issue on Ironlake. Signed-off-by: Zhenyu Wang Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dc6ffe82d2cd..c161ace7132d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4180,7 +4180,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_unpin_work *work; unsigned long flags; - int ret; + int pipesrc_reg = (intel_crtc->pipe == 0) ? PIPEASRC : PIPEBSRC; + int ret, pipesrc; RING_LOCALS; work = kzalloc(sizeof *work, GFP_KERNEL); @@ -4236,7 +4237,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, OUT_RING(fb->pitch); if (IS_I965G(dev)) { OUT_RING(obj_priv->gtt_offset | obj_priv->tiling_mode); - OUT_RING((fb->width << 16) | fb->height); + pipesrc = I915_READ(pipesrc_reg); + OUT_RING(pipesrc & 0x0fff0fff); } else { OUT_RING(obj_priv->gtt_offset); OUT_RING(MI_NOOP); From a40e8d3139e9eb54bf1d29f91639a6c5e05f652e Mon Sep 17 00:00:00 2001 From: Owain Ainsworth Date: Tue, 9 Feb 2010 14:25:55 +0000 Subject: [PATCH 413/640] drm/i915: Correctly return -ENOMEM on allocation failure in cmdbuf ioctls. Signed-off-by: Owain G. Ainsworth Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_dma.c | 4 +++- drivers/gpu/drm/i915/i915_gem.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index e660ac07f3b2..2307f98349f7 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -735,8 +735,10 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, if (cmdbuf->num_cliprects) { cliprects = kcalloc(cmdbuf->num_cliprects, sizeof(struct drm_clip_rect), GFP_KERNEL); - if (cliprects == NULL) + if (cliprects == NULL) { + ret = -ENOMEM; goto fail_batch_free; + } ret = copy_from_user(cliprects, cmdbuf->cliprects, cmdbuf->num_cliprects * diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 11daa618385f..ec8a0d7ffa39 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3688,8 +3688,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->num_cliprects != 0) { cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), GFP_KERNEL); - if (cliprects == NULL) + if (cliprects == NULL) { + ret = -ENOMEM; goto pre_mutex_err; + } ret = copy_from_user(cliprects, (struct drm_clip_rect __user *) From dd19e44b28b12f7ea59ebb54d8ea18054da7f9d1 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sat, 30 Jan 2010 15:41:00 +0100 Subject: [PATCH 414/640] drm/nouveau: move dereferences after null checks Reported-by: Dan Carpenter Signed-off-by: Marcin Slusarz Signed-off-by: Maarten Maathuis Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_connector.c | 7 ++++--- drivers/gpu/drm/nouveau/nouveau_object.c | 3 ++- drivers/gpu/drm/nouveau/nouveau_sgdma.c | 7 ++++--- drivers/gpu/drm/nouveau/nv50_crtc.c | 11 +++++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 7e6d673f3a23..d2f63353ea97 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -88,13 +88,14 @@ nouveau_connector_destroy(struct drm_connector *drm_connector) { struct nouveau_connector *nv_connector = nouveau_connector(drm_connector); - struct drm_device *dev = nv_connector->base.dev; - - NV_DEBUG_KMS(dev, "\n"); + struct drm_device *dev; if (!nv_connector) return; + dev = nv_connector->base.dev; + NV_DEBUG_KMS(dev, "\n"); + kfree(nv_connector->edid); drm_sysfs_connector_remove(drm_connector); drm_connector_cleanup(drm_connector); diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c index 6c2cf81716df..e7c100ba63a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_object.c +++ b/drivers/gpu/drm/nouveau/nouveau_object.c @@ -885,11 +885,12 @@ int nouveau_gpuobj_sw_new(struct nouveau_channel *chan, int class, struct nouveau_gpuobj **gpuobj_ret) { - struct drm_nouveau_private *dev_priv = chan->dev->dev_private; + struct drm_nouveau_private *dev_priv; struct nouveau_gpuobj *gpuobj; if (!chan || !gpuobj_ret || *gpuobj_ret != NULL) return -EINVAL; + dev_priv = chan->dev->dev_private; gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL); if (!gpuobj) diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c index 4c7f1e403e80..ed1590577b6c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c @@ -54,11 +54,12 @@ static void nouveau_sgdma_clear(struct ttm_backend *be) { struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be; - struct drm_device *dev = nvbe->dev; - - NV_DEBUG(nvbe->dev, "\n"); + struct drm_device *dev; if (nvbe && nvbe->pages) { + dev = nvbe->dev; + NV_DEBUG(dev, "\n"); + if (nvbe->bound) be->func->unbind(be); diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c index 40b7360841f8..d1a651e3400c 100644 --- a/drivers/gpu/drm/nouveau/nv50_crtc.c +++ b/drivers/gpu/drm/nouveau/nv50_crtc.c @@ -298,14 +298,17 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk) static void nv50_crtc_destroy(struct drm_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - - NV_DEBUG_KMS(dev, "\n"); + struct drm_device *dev; + struct nouveau_crtc *nv_crtc; if (!crtc) return; + dev = crtc->dev; + nv_crtc = nouveau_crtc(crtc); + + NV_DEBUG_KMS(dev, "\n"); + drm_crtc_cleanup(&nv_crtc->base); nv50_cursor_fini(nv_crtc); From 13876c6e5fec94e9ea51b73ac025583dd7655345 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 10 Feb 2010 20:50:34 +1000 Subject: [PATCH 415/640] nouveau: fix state detection with switchable graphics Signed-off-by: Matthew Garrett Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_acpi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 1cf488247a16..48227e744753 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -90,21 +90,21 @@ int nouveau_hybrid_setup(struct drm_device *dev) { int result; - if (nouveau_dsm(dev, NOUVEAU_DSM_ACTIVE, NOUVEAU_DSM_ACTIVE_QUERY, + if (nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STATE, &result)) return -ENODEV; NV_INFO(dev, "_DSM hardware status gave 0x%x\n", result); - if (result & 0x1) { /* Stamina mode - disable the external GPU */ + if (result) { /* Ensure that the external GPU is enabled */ + nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_SPEED, NULL); + nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_SPEED, + NULL); + } else { /* Stamina mode - disable the external GPU */ nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_STAMINA, NULL); nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STAMINA, NULL); - } else { /* Ensure that the external GPU is enabled */ - nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_SPEED, NULL); - nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_SPEED, - NULL); } return 0; From 6719fc663c6cd30da5dd02d08aaefb031a7a98fd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 9 Feb 2010 12:31:08 +1000 Subject: [PATCH 416/640] drm/radeon/kms: fix screen clearing before fbcon. This memset_io was added to debug something way back and got left behind, memset the fb to black so the borders don't be all white. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 66055b3d8668..23aa393634f2 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -248,7 +248,7 @@ int radeonfb_create(struct drm_device *dev, if (ret) goto out_unref; - memset_io(fbptr, 0xff, aligned_size); + memset_io(fbptr, 0x0, aligned_size); strcpy(info->fix.id, "radeondrmfb"); From 84b79f8d2882b0a84330c04839ed4d3cefd2ff77 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Feb 2010 21:48:24 +0100 Subject: [PATCH 417/640] drm/i915: Fix crash while aborting hibernation Commit cbda12d77ea590082edb6d30bd342a67ebc459e0 (drm/i915: implement new pm ops for i915) introduced the problem that if s2disk hibernation is aborted, the system will crash, because i915_pm_freeze() does nothing, while it should at least reverse some operations carried out by i915_suspend(). Fix this issue by splitting the i915 suspend into a freeze part a suspend part, where the latter is not executed before creating a hibernation image, and the i915 resume into a "low-level" resume part and a thaw part, where the former is not executed after the image has been created. Signed-off-by: Rafael J. Wysocki Tested-by: Alan Jenkins Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.c | 180 +++++++++++++++++++------------- 1 file changed, 107 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ecac882e1d54..79beffcf5936 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -174,12 +174,42 @@ const static struct pci_device_id pciidlist[] = { MODULE_DEVICE_TABLE(pci, pciidlist); #endif -static int i915_suspend(struct drm_device *dev, pm_message_t state) +static int i915_drm_freeze(struct drm_device *dev) +{ + pci_save_state(dev->pdev); + + /* If KMS is active, we do the leavevt stuff here */ + if (drm_core_check_feature(dev, DRIVER_MODESET)) { + int error = i915_gem_idle(dev); + if (error) { + dev_err(&dev->pdev->dev, + "GEM idle failed, resume might fail\n"); + return error; + } + drm_irq_uninstall(dev); + } + + i915_save_state(dev); + + return 0; +} + +static void i915_drm_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (!dev || !dev_priv) { - DRM_ERROR("dev: %p, dev_priv: %p\n", dev, dev_priv); + intel_opregion_free(dev, 1); + + /* Modeset on resume, not lid events */ + dev_priv->modeset_on_lid = 0; +} + +static int i915_suspend(struct drm_device *dev, pm_message_t state) +{ + int error; + + if (!dev || !dev->dev_private) { + DRM_ERROR("dev: %p\n", dev); DRM_ERROR("DRM not initialized, aborting suspend.\n"); return -ENODEV; } @@ -187,19 +217,11 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state) if (state.event == PM_EVENT_PRETHAW) return 0; - pci_save_state(dev->pdev); + error = i915_drm_freeze(dev); + if (error) + return error; - /* If KMS is active, we do the leavevt stuff here */ - if (drm_core_check_feature(dev, DRIVER_MODESET)) { - if (i915_gem_idle(dev)) - dev_err(&dev->pdev->dev, - "GEM idle failed, resume may fail\n"); - drm_irq_uninstall(dev); - } - - i915_save_state(dev); - - intel_opregion_free(dev, 1); + i915_drm_suspend(dev); if (state.event == PM_EVENT_SUSPEND) { /* Shut down the device */ @@ -207,45 +229,45 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state) pci_set_power_state(dev->pdev, PCI_D3hot); } - /* Modeset on resume, not lid events */ - dev_priv->modeset_on_lid = 0; - return 0; } -static int i915_resume(struct drm_device *dev) +static int i915_drm_thaw(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; - - if (pci_enable_device(dev->pdev)) - return -1; - pci_set_master(dev->pdev); - - i915_restore_state(dev); - - intel_opregion_init(dev, 1); + int error = 0; /* KMS EnterVT equivalent */ if (drm_core_check_feature(dev, DRIVER_MODESET)) { mutex_lock(&dev->struct_mutex); dev_priv->mm.suspended = 0; - ret = i915_gem_init_ringbuffer(dev); - if (ret != 0) - ret = -1; + error = i915_gem_init_ringbuffer(dev); mutex_unlock(&dev->struct_mutex); drm_irq_install(dev); - } - if (drm_core_check_feature(dev, DRIVER_MODESET)) { + /* Resume the modeset for every activated CRTC */ drm_helper_resume_force_mode(dev); } dev_priv->modeset_on_lid = 0; - return ret; + return error; +} + +static int i915_resume(struct drm_device *dev) +{ + if (pci_enable_device(dev->pdev)) + return -EIO; + + pci_set_master(dev->pdev); + + i915_restore_state(dev); + + intel_opregion_init(dev, 1); + + return i915_drm_thaw(dev); } /** @@ -386,57 +408,69 @@ i915_pci_remove(struct pci_dev *pdev) drm_put_dev(dev); } -static int -i915_pci_suspend(struct pci_dev *pdev, pm_message_t state) +static int i915_pm_suspend(struct device *dev) { - struct drm_device *dev = pci_get_drvdata(pdev); + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + int error; - return i915_suspend(dev, state); -} + if (!drm_dev || !drm_dev->dev_private) { + dev_err(dev, "DRM not initialized, aborting suspend.\n"); + return -ENODEV; + } -static int -i915_pci_resume(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); + error = i915_drm_freeze(drm_dev); + if (error) + return error; - return i915_resume(dev); -} + i915_drm_suspend(drm_dev); -static int -i915_pm_suspend(struct device *dev) -{ - return i915_pci_suspend(to_pci_dev(dev), PMSG_SUSPEND); -} + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); -static int -i915_pm_resume(struct device *dev) -{ - return i915_pci_resume(to_pci_dev(dev)); -} - -static int -i915_pm_freeze(struct device *dev) -{ - return i915_pci_suspend(to_pci_dev(dev), PMSG_FREEZE); -} - -static int -i915_pm_thaw(struct device *dev) -{ - /* thaw during hibernate, do nothing! */ return 0; } -static int -i915_pm_poweroff(struct device *dev) +static int i915_pm_resume(struct device *dev) { - return i915_pci_suspend(to_pci_dev(dev), PMSG_HIBERNATE); + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + + return i915_resume(drm_dev); } -static int -i915_pm_restore(struct device *dev) +static int i915_pm_freeze(struct device *dev) { - return i915_pci_resume(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + + if (!drm_dev || !drm_dev->dev_private) { + dev_err(dev, "DRM not initialized, aborting suspend.\n"); + return -ENODEV; + } + + return i915_drm_freeze(drm_dev); +} + +static int i915_pm_thaw(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + + return i915_drm_thaw(drm_dev); +} + +static int i915_pm_poweroff(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + int error; + + error = i915_drm_freeze(drm_dev); + if (!error) + i915_drm_suspend(drm_dev); + + return error; } const struct dev_pm_ops i915_pm_ops = { @@ -445,7 +479,7 @@ const struct dev_pm_ops i915_pm_ops = { .freeze = i915_pm_freeze, .thaw = i915_pm_thaw, .poweroff = i915_pm_poweroff, - .restore = i915_pm_restore, + .restore = i915_pm_resume, }; static struct vm_operations_struct i915_gem_vm_ops = { From 75dfca80a610e4e87d3b9ccfb3f520692808697d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 10 Feb 2010 15:09:44 -0800 Subject: [PATCH 418/640] drm/i915: hold ref on flip object until it completes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will prevent things from falling over if the user frees the flip buffer before we complete the flip, since we'll hold an internal reference. Reported-by: Kristian Høgsberg Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c161ace7132d..b27202d23ebc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4094,6 +4094,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) mutex_lock(&work->dev->struct_mutex); i915_gem_object_unpin(work->old_fb_obj); + drm_gem_object_unreference(work->pending_flip_obj); drm_gem_object_unreference(work->old_fb_obj); mutex_unlock(&work->dev->struct_mutex); kfree(work); @@ -4221,8 +4222,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, return ret; } - /* Reference the old fb object for the scheduled work. */ + /* Reference the objects for the scheduled work. */ drm_gem_object_reference(work->old_fb_obj); + drm_gem_object_reference(obj); crtc->fb = fb; i915_gem_object_flush_write_domain(obj); From f77cef3db357aeea22d82a2aa4f0ef8fbae41d47 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 9 Feb 2010 19:41:55 +0000 Subject: [PATCH 419/640] drm/vmwgfx: Update the user-space interface. When time-based throttling is implemented, we need to bump minor. When the old way of detecting scanout is removed, we need to bump major. In the meantime, this change should not break existing user-space. Signed-off-by: Thomas Hellstrom Signed-off-by: Jakob Bornecrantz Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 6 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 6 ++++++ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 5 +++-- include/drm/vmwgfx_drm.h | 12 +++++++++--- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 135be9688c90..0eaf68273eaf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -39,10 +39,10 @@ #include "ttm/ttm_execbuf_util.h" #include "ttm/ttm_module.h" -#define VMWGFX_DRIVER_DATE "20090724" +#define VMWGFX_DRIVER_DATE "20100118" #define VMWGFX_DRIVER_MAJOR 0 -#define VMWGFX_DRIVER_MINOR 1 -#define VMWGFX_DRIVER_PATCHLEVEL 2 +#define VMWGFX_DRIVER_MINOR 9 +#define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) #define VMWGFX_MAX_RELOCATIONS 2048 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index 778851f9f1d6..1c7a316454d8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -48,6 +48,12 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, case DRM_VMW_PARAM_FIFO_OFFSET: param->value = dev_priv->mmio_start; break; + case DRM_VMW_PARAM_HW_CAPS: + param->value = dev_priv->capabilities; + break; + case DRM_VMW_PARAM_FIFO_CAPS: + param->value = dev_priv->fifo.capabilities; + break; default: DRM_ERROR("Illegal vmwgfx get param request: %d\n", param->param); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index c7efbd47ab84..933e90d82866 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -610,9 +610,10 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, */ srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT; srf->scanout = true; - } else { + } else if (req->scanout) + srf->scanout = true; + else srf->scanout = false; - } if (srf->scanout && srf->num_sizes == 1 && diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h index 2be7e1249b6f..dfaf3c2d2c8e 100644 --- a/include/drm/vmwgfx_drm.h +++ b/include/drm/vmwgfx_drm.h @@ -68,7 +68,8 @@ #define DRM_VMW_PARAM_NUM_FREE_STREAMS 1 #define DRM_VMW_PARAM_3D 2 #define DRM_VMW_PARAM_FIFO_OFFSET 3 - +#define DRM_VMW_PARAM_HW_CAPS 4 +#define DRM_VMW_PARAM_FIFO_CAPS 5 /** * struct drm_vmw_getparam_arg @@ -181,6 +182,8 @@ struct drm_vmw_context_arg { * The size of the array should equal the total number of mipmap levels. * @shareable: Boolean whether other clients (as identified by file descriptors) * may reference this surface. + * @scanout: Boolean whether the surface is intended to be used as a + * scanout. * * Input data to the DRM_VMW_CREATE_SURFACE Ioctl. * Output data from the DRM_VMW_REF_SURFACE Ioctl. @@ -192,7 +195,7 @@ struct drm_vmw_surface_create_req { uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES]; uint64_t size_addr; int32_t shareable; - uint32_t pad64; + int32_t scanout; }; /** @@ -295,6 +298,9 @@ union drm_vmw_surface_reference_arg { * * @commands: User-space address of a command buffer cast to an uint64_t. * @command-size: Size in bytes of the command buffer. + * @throttle-us: Sleep until software is less than @throttle_us + * microseconds ahead of hardware. The driver may round this value + * to the nearest kernel tick. * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an * uint64_t. * @@ -304,7 +310,7 @@ union drm_vmw_surface_reference_arg { struct drm_vmw_execbuf_arg { uint64_t commands; uint32_t command_size; - uint32_t pad64; + uint32_t throttle_us; uint64_t fence_rep; }; From 3bef35721018d2bac08d0d03979606b65347211e Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 9 Feb 2010 19:41:57 +0000 Subject: [PATCH 420/640] drm/vmwgfx: Report propper framebuffer_{max|min}_{width|height} Signed-off-by: Jakob Bornecrantz Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index eeba6d1d06e4..31f9afed0a63 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -769,10 +769,10 @@ int vmw_kms_init(struct vmw_private *dev_priv) drm_mode_config_init(dev); dev->mode_config.funcs = &vmw_kms_funcs; - dev->mode_config.min_width = 640; - dev->mode_config.min_height = 480; - dev->mode_config.max_width = 2048; - dev->mode_config.max_height = 2048; + dev->mode_config.min_width = 1; + dev->mode_config.min_height = 1; + dev->mode_config.max_width = dev_priv->fb_max_width; + dev->mode_config.max_height = dev_priv->fb_max_height; ret = vmw_kms_init_legacy_display_system(dev_priv); From a87897edbae2d60db7bcb6bb0a75e82013d68305 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 9 Feb 2010 21:29:47 +0000 Subject: [PATCH 421/640] drm/vmwgfx: Drop scanout flag compat and add execbuf ioctl parameter members. Bumps major. Even if this bumps the version to 1 it does not mean the driver is out of staging. From what we know this is the last backwards incompatible change to the driver. Signed-off-by: Jakob Bornecrantz Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 6 +++--- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 17 +---------------- include/drm/vmwgfx_drm.h | 8 ++++++++ 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 0eaf68273eaf..3e4e670d3216 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -39,9 +39,9 @@ #include "ttm/ttm_execbuf_util.h" #include "ttm/ttm_module.h" -#define VMWGFX_DRIVER_DATE "20100118" -#define VMWGFX_DRIVER_MAJOR 0 -#define VMWGFX_DRIVER_MINOR 9 +#define VMWGFX_DRIVER_DATE "20100209" +#define VMWGFX_DRIVER_MAJOR 1 +#define VMWGFX_DRIVER_MINOR 0 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 933e90d82866..f8fbbc67a406 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -35,11 +35,6 @@ #define VMW_RES_SURFACE ttm_driver_type1 #define VMW_RES_STREAM ttm_driver_type2 -/* XXX: This isn't a real hardware flag, but just a hack for kernel to - * know about primary surfaces. Find a better way to accomplish this. - */ -#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9) - struct vmw_user_context { struct ttm_base_object base; struct vmw_resource res; @@ -579,6 +574,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, srf->flags = req->flags; srf->format = req->format; + srf->scanout = req->scanout; memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels)); srf->num_sizes = 0; for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) @@ -604,17 +600,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, if (unlikely(ret != 0)) goto out_err1; - if (srf->flags & SVGA3D_SURFACE_HINT_SCANOUT) { - /* we should not send this flag down to hardware since - * its not a official one - */ - srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT; - srf->scanout = true; - } else if (req->scanout) - srf->scanout = true; - else - srf->scanout = false; - if (srf->scanout && srf->num_sizes == 1 && srf->sizes[0].width == 64 && diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h index dfaf3c2d2c8e..c7645f480d12 100644 --- a/include/drm/vmwgfx_drm.h +++ b/include/drm/vmwgfx_drm.h @@ -303,15 +303,23 @@ union drm_vmw_surface_reference_arg { * to the nearest kernel tick. * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an * uint64_t. + * @version: Allows expanding the execbuf ioctl parameters without breaking + * backwards compatibility, since user-space will always tell the kernel + * which version it uses. + * @flags: Execbuf flags. None currently. * * Argument to the DRM_VMW_EXECBUF Ioctl. */ +#define DRM_VMW_EXECBUF_VERSION 0 + struct drm_vmw_execbuf_arg { uint64_t commands; uint32_t command_size; uint32_t throttle_us; uint64_t fence_rep; + uint32_t version; + uint32_t flags; }; /** From 598856407d4e20ebb4de01a91a93d89325924d43 Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Wed, 10 Feb 2010 18:04:08 -0800 Subject: [PATCH 422/640] tcp: fix ICMP-RTO war Make sure, that TCP has a nonzero RTT estimation after three-way handshake. Currently, a listening TCP has a value of 0 for srtt, rttvar and rto right after the three-way handshake is completed with TCP timestamps disabled. This will lead to corrupt RTO recalculation and retransmission flood when RTO is recalculated on backoff reversion as introduced in "Revert RTO on ICMP destination unreachable" (f1ecd5d9e7366609d640ff4040304ea197fbc618). This behaviour can be provoked by connecting to a server which "responds first" (like SMTP) and rejecting every packet after the handshake with dest-unreachable, which will lead to softirq load on the server (up to 30% per socket in some tests). Thanks to Ilpo Jarvinen for providing debug patches and to Denys Fedoryshchenko for reporting and testing. Changes since v3: Removed bad characters in patchfile. Reported-by: Denys Fedoryshchenko Signed-off-by: Damian Lukowski Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28e029632493..3fddc69ccccc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5783,11 +5783,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* tcp_ack considers this ACK as duplicate * and does not calculate rtt. - * Fix it at least with timestamps. + * Force it here. */ - if (tp->rx_opt.saw_tstamp && - tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(sk, 0); + tcp_ack_update_rtt(sk, 0, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; From 85b9e4878f3b16993fba871c0c68d0948ec9c7c6 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 8 Feb 2010 09:57:25 +0000 Subject: [PATCH 423/640] drm/vmwgfx: Fix a circular locking dependency bug. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 3 ++- drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 17 +++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 13 +++---------- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 3e4e670d3216..356dc935ec13 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -113,6 +113,7 @@ struct vmw_fifo_state { unsigned long static_buffer_size; bool using_bounce_buffer; uint32_t capabilities; + struct mutex fifo_mutex; struct rw_semaphore rwsem; }; @@ -213,7 +214,7 @@ struct vmw_private { * Fencing and IRQs. */ - uint32_t fence_seq; + atomic_t fence_seq; wait_queue_head_t fence_queue; wait_queue_head_t fifo_queue; atomic_t fence_queue_waiters; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index 4157547cc6e4..39d43a01d846 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -74,6 +74,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) fifo->reserved_size = 0; fifo->using_bounce_buffer = false; + mutex_init(&fifo->fifo_mutex); init_rwsem(&fifo->rwsem); /* @@ -117,7 +118,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) (unsigned int) min, (unsigned int) fifo->capabilities); - dev_priv->fence_seq = dev_priv->last_read_sequence; + atomic_set(&dev_priv->fence_seq, dev_priv->last_read_sequence); iowrite32(dev_priv->last_read_sequence, fifo_mem + SVGA_FIFO_FENCE); return vmw_fifo_send_fence(dev_priv, &dummy); @@ -283,7 +284,7 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes) uint32_t reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE; int ret; - down_write(&fifo_state->rwsem); + mutex_lock(&fifo_state->fifo_mutex); max = ioread32(fifo_mem + SVGA_FIFO_MAX); min = ioread32(fifo_mem + SVGA_FIFO_MIN); next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD); @@ -351,7 +352,7 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes) } out_err: fifo_state->reserved_size = 0; - up_write(&fifo_state->rwsem); + mutex_unlock(&fifo_state->fifo_mutex); return NULL; } @@ -426,6 +427,7 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes) } + down_write(&fifo_state->rwsem); if (fifo_state->using_bounce_buffer || reserveable) { next_cmd += bytes; if (next_cmd >= max) @@ -437,8 +439,9 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes) if (reserveable) iowrite32(0, fifo_mem + SVGA_FIFO_RESERVED); mb(); - vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); up_write(&fifo_state->rwsem); + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); + mutex_unlock(&fifo_state->fifo_mutex); } int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence) @@ -451,9 +454,7 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence) fm = vmw_fifo_reserve(dev_priv, bytes); if (unlikely(fm == NULL)) { - down_write(&fifo_state->rwsem); - *sequence = dev_priv->fence_seq; - up_write(&fifo_state->rwsem); + *sequence = atomic_read(&dev_priv->fence_seq); ret = -ENOMEM; (void)vmw_fallback_wait(dev_priv, false, true, *sequence, false, 3*HZ); @@ -461,7 +462,7 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence) } do { - *sequence = dev_priv->fence_seq++; + *sequence = atomic_add_return(1, &dev_priv->fence_seq); } while (*sequence == 0); if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index d40086fc8647..4d7cb5393860 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -84,20 +84,13 @@ bool vmw_fence_signaled(struct vmw_private *dev_priv, vmw_fifo_idle(dev_priv, sequence)) return true; - /** - * Below is to signal stale fences that have wrapped. - * First, block fence submission. - */ - - down_read(&fifo_state->rwsem); - /** * Then check if the sequence is higher than what we've actually * emitted. Then the fence is stale and signaled. */ - ret = ((dev_priv->fence_seq - sequence) > VMW_FENCE_WRAP); - up_read(&fifo_state->rwsem); + ret = ((atomic_read(&dev_priv->fence_seq) - sequence) + > VMW_FENCE_WRAP); return ret; } @@ -127,7 +120,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, if (fifo_idle) down_read(&fifo_state->rwsem); - signal_seq = dev_priv->fence_seq; + signal_seq = atomic_read(&dev_priv->fence_seq); ret = 0; for (;;) { From c60a284cc41f9989391706e113d30b4f27dbe3e0 Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Thu, 11 Feb 2010 00:10:33 +0200 Subject: [PATCH 424/640] drm/radeon: Skip dma copy test in benchmark if card doesn't have dma engine. radeon_copy_dma is only available for r200 or newer cards. Call to radeon_copy_dma would result to NULL pointer dereference if benchmarking asic without dma engine. Signed-off-by: Pauli Nieminen Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_benchmark.c | 55 ++++++++++++++--------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 4ddfd4b5bc51..7932dc4d6b90 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -65,31 +65,42 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, if (r) { goto out_cleanup; } - start_jiffies = jiffies; - for (i = 0; i < n; i++) { - r = radeon_fence_create(rdev, &fence); - if (r) { - goto out_cleanup; + + /* r100 doesn't have dma engine so skip the test */ + if (rdev->asic->copy_dma) { + + start_jiffies = jiffies; + for (i = 0; i < n; i++) { + r = radeon_fence_create(rdev, &fence); + if (r) { + goto out_cleanup; + } + + r = radeon_copy_dma(rdev, saddr, daddr, + size / RADEON_GPU_PAGE_SIZE, fence); + + if (r) { + goto out_cleanup; + } + r = radeon_fence_wait(fence, false); + if (r) { + goto out_cleanup; + } + radeon_fence_unref(&fence); } - r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); - if (r) { - goto out_cleanup; + end_jiffies = jiffies; + time = end_jiffies - start_jiffies; + time = jiffies_to_msecs(time); + if (time > 0) { + i = ((n * size) >> 10) / time; + printk(KERN_INFO "radeon: dma %u bo moves of %ukb from" + " %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n", + n, size >> 10, + sdomain, ddomain, time, + i, i * 1000, (i * 1000) / 1024); } - r = radeon_fence_wait(fence, false); - if (r) { - goto out_cleanup; - } - radeon_fence_unref(&fence); - } - end_jiffies = jiffies; - time = end_jiffies - start_jiffies; - time = jiffies_to_msecs(time); - if (time > 0) { - i = ((n * size) >> 10) / time; - printk(KERN_INFO "radeon: dma %u bo moves of %ukb from %d to %d" - " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10, - sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024); } + start_jiffies = jiffies; for (i = 0; i < n; i++) { r = radeon_fence_create(rdev, &fence); From 648ac05c4f8a8aea908c7dff81ceffe003e28561 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 10 Feb 2010 16:52:45 +1000 Subject: [PATCH 425/640] drm/radeon/kms: retry auxch on 0x20 timeout value. ATOM appears to return 0x20 which seems to mean some sort of timeout. retry the transaction up to 10 times before failing, this makes DP->VGA convertor we bought work at least a bit more predictably. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios_dp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 71060114d5de..b32eeea5bb8b 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -332,11 +332,13 @@ bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes, PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION args; int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction); unsigned char *base; + int retry_count = 0; memset(&args, 0, sizeof(args)); base = (unsigned char *)rdev->mode_info.atom_context->scratch; +retry: memcpy(base, req_bytes, num_bytes); args.lpAuxRequest = 0; @@ -347,10 +349,12 @@ bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes, atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); - if (args.ucReplyStatus) { - DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x\n", + if (args.ucReplyStatus && !args.ucDataOutLen) { + if (args.ucReplyStatus == 0x20 && retry_count < 10) + goto retry; + DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x after %d retries\n", req_bytes[1], req_bytes[0], req_bytes[2], req_bytes[3], - chan->rec.i2c_id, args.ucReplyStatus); + chan->rec.i2c_id, args.ucReplyStatus, retry_count); return false; } From 77c1ff3982c6b36961725dd19e872a1c07df7f3b Mon Sep 17 00:00:00 2001 From: Andy Getzendanner Date: Thu, 11 Feb 2010 14:04:48 +1000 Subject: [PATCH 426/640] vgaarb: fix incorrect dereference of userspace pointer. This patch corrects a userspace pointer dereference in the VGA arbiter in 2.6.32.1. copy_from_user() is used at line 822 to copy the contents of buf into kbuf, but a call to strncmp() on line 964 uses buf rather than kbuf. This problem led to a GPF in strncmp() when X was started on my x86_32 systems. X triggered the behavior with a write of "target PCI:0000:01:00.0" to /dev/vga_arbiter. The patch has been tested against 2.6.32.1 and observed to correct the GPF observed when starting X or manually writing the string "target PCI:0000:01:00.0" to /dev/vga_arbiter. Signed-off-by: Andy Getzendanner Cc: Jesse Barnes Cc: Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- drivers/gpu/vga/vgaarb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index 1ac0c93603c9..24b56dc54597 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -961,7 +961,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf, remaining -= 7; pr_devel("client 0x%p called 'target'\n", priv); /* if target is default */ - if (!strncmp(buf, "default", 7)) + if (!strncmp(kbuf, "default", 7)) pdev = pci_dev_get(vga_default_device()); else { if (!vga_pci_str_to_vars(curr_pos, remaining, From cab4d27764d5a8654212b3e96eb0ae793aec5b94 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 11 Feb 2010 17:15:57 +0100 Subject: [PATCH 427/640] amd64_edac: Do not falsely trigger kerneloops An unfortunate "WARNING" in the message amd64_edac dumps when the system doesn't support DRAM ECC or ECC checking is not enabled in the BIOS used to trigger kerneloops which qualified the message as an OOPS thus misleading the users. See, e.g. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/422536 http://bugzilla.kernel.org/show_bug.cgi?id=15238 Downgrade the message level to KERN_NOTICE and fix the formulation. Cc: stable@kernel.org # .32.x Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/amd64_edac.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 000dc67b85b7..3391e6739d06 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2658,10 +2658,11 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) * the memory system completely. A command line option allows to force-enable * hardware ECC later in amd64_enable_ecc_error_reporting(). */ -static const char *ecc_warning = - "WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n" - " Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n" - " Also, use of the override can cause unknown side effects.\n"; +static const char *ecc_msg = + "ECC disabled in the BIOS or no ECC capability, module will not load.\n" + " Either enable ECC checking or force module loading by setting " + "'ecc_enable_override'.\n" + " (Note that use of the override may cause unknown side effects.)\n"; static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) { @@ -2673,7 +2674,7 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); if (!ecc_enabled) - amd64_printk(KERN_WARNING, "This node reports that Memory ECC " + amd64_printk(KERN_NOTICE, "This node reports that Memory ECC " "is currently disabled, set F3x%x[22] (%s).\n", K8_NBCFG, pci_name(pvt->misc_f3_ctl)); else @@ -2681,13 +2682,13 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id); if (!nb_mce_en) - amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR " + amd64_printk(KERN_NOTICE, "NB MCE bank disabled, set MSR " "0x%08x[4] on node %d to enable.\n", MSR_IA32_MCG_CTL, pvt->mc_node_id); if (!ecc_enabled || !nb_mce_en) { if (!ecc_enable_override) { - amd64_printk(KERN_WARNING, "%s", ecc_warning); + amd64_printk(KERN_NOTICE, "%s", ecc_msg); return -ENODEV; } ecc_enable_override = 0; From 440ab7ac2d6b735fb278a1ff1674f6716314c6bb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 11 Feb 2010 12:29:16 -0800 Subject: [PATCH 428/640] sparc32: Fix thinko in previous change. Should mask stack with 0xf not "0x15". Noticed by Blue Swirl Signed-off-by: David S. Miller --- arch/sparc/kernel/process_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index f23c8fda6503..c49865b30719 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -526,7 +526,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, * Set some valid stack frames to give to the child. */ childstack = (struct sparc_stackf __user *) - (sp & ~0x15UL); + (sp & ~0xfUL); parentstack = (struct sparc_stackf __user *) regs->u_regs[UREG_FP]; From 93716b9470fbfd9efdc7d0f2445cb34635de3f6d Mon Sep 17 00:00:00 2001 From: Marcel Selhorst Date: Wed, 10 Feb 2010 13:56:32 -0800 Subject: [PATCH 429/640] tpm_infineon: fix suspend/resume handler for pnp_driver When suspending, tpm_infineon calls the generic suspend function of the TPM framework. However, the TPM framework does not return and the system hangs upon suspend. When sending the necessary command "TPM_SaveState" directly within the driver, suspending and resuming works fine. Signed-off-by: Marcel Selhorst Cc: OGAWA Hirofumi Cc: Debora Velarde Cc: Rajiv Andrade Cc: [2.6.32.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm_infineon.c | 79 ++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index ecba4942fc8e..f58440791e65 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -39,12 +39,12 @@ struct tpm_inf_dev { int iotype; - void __iomem *mem_base; /* MMIO ioremap'd addr */ - unsigned long map_base; /* phys MMIO base */ - unsigned long map_size; /* MMIO region size */ - unsigned int index_off; /* index register offset */ + void __iomem *mem_base; /* MMIO ioremap'd addr */ + unsigned long map_base; /* phys MMIO base */ + unsigned long map_size; /* MMIO region size */ + unsigned int index_off; /* index register offset */ - unsigned int data_regs; /* Data registers */ + unsigned int data_regs; /* Data registers */ unsigned int data_size; unsigned int config_port; /* IO Port config index reg */ @@ -406,14 +406,14 @@ static const struct tpm_vendor_specific tpm_inf = { .miscdev = {.fops = &inf_ops,}, }; -static const struct pnp_device_id tpm_pnp_tbl[] = { +static const struct pnp_device_id tpm_inf_pnp_tbl[] = { /* Infineon TPMs */ {"IFX0101", 0}, {"IFX0102", 0}, {"", 0} }; -MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl); +MODULE_DEVICE_TABLE(pnp, tpm_inf_pnp_tbl); static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) @@ -430,7 +430,7 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev, if (pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) && !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) { - tpm_dev.iotype = TPM_INF_IO_PORT; + tpm_dev.iotype = TPM_INF_IO_PORT; tpm_dev.config_port = pnp_port_start(dev, 0); tpm_dev.config_size = pnp_port_len(dev, 0); @@ -459,9 +459,9 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev, goto err_last; } } else if (pnp_mem_valid(dev, 0) && - !(pnp_mem_flags(dev, 0) & IORESOURCE_DISABLED)) { + !(pnp_mem_flags(dev, 0) & IORESOURCE_DISABLED)) { - tpm_dev.iotype = TPM_INF_IO_MEM; + tpm_dev.iotype = TPM_INF_IO_MEM; tpm_dev.map_base = pnp_mem_start(dev, 0); tpm_dev.map_size = pnp_mem_len(dev, 0); @@ -563,11 +563,11 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev, "product id 0x%02x%02x" "%s\n", tpm_dev.iotype == TPM_INF_IO_PORT ? - tpm_dev.config_port : - tpm_dev.map_base + tpm_dev.index_off, + tpm_dev.config_port : + tpm_dev.map_base + tpm_dev.index_off, tpm_dev.iotype == TPM_INF_IO_PORT ? - tpm_dev.data_regs : - tpm_dev.map_base + tpm_dev.data_regs, + tpm_dev.data_regs : + tpm_dev.map_base + tpm_dev.data_regs, version[0], version[1], vendorid[0], vendorid[1], productid[0], productid[1], chipname); @@ -607,20 +607,55 @@ static __devexit void tpm_inf_pnp_remove(struct pnp_dev *dev) iounmap(tpm_dev.mem_base); release_mem_region(tpm_dev.map_base, tpm_dev.map_size); } + tpm_dev_vendor_release(chip); tpm_remove_hardware(chip->dev); } } +static int tpm_inf_pnp_suspend(struct pnp_dev *dev, pm_message_t pm_state) +{ + struct tpm_chip *chip = pnp_get_drvdata(dev); + int rc; + if (chip) { + u8 savestate[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 10, /* blob length (in bytes) */ + 0, 0, 0, 152 /* TPM_ORD_SaveState */ + }; + dev_info(&dev->dev, "saving TPM state\n"); + rc = tpm_inf_send(chip, savestate, sizeof(savestate)); + if (rc < 0) { + dev_err(&dev->dev, "error while saving TPM state\n"); + return rc; + } + } + return 0; +} + +static int tpm_inf_pnp_resume(struct pnp_dev *dev) +{ + /* Re-configure TPM after suspending */ + tpm_config_out(ENABLE_REGISTER_PAIR, TPM_INF_ADDR); + tpm_config_out(IOLIMH, TPM_INF_ADDR); + tpm_config_out((tpm_dev.data_regs >> 8) & 0xff, TPM_INF_DATA); + tpm_config_out(IOLIML, TPM_INF_ADDR); + tpm_config_out((tpm_dev.data_regs & 0xff), TPM_INF_DATA); + /* activate register */ + tpm_config_out(TPM_DAR, TPM_INF_ADDR); + tpm_config_out(0x01, TPM_INF_DATA); + tpm_config_out(DISABLE_REGISTER_PAIR, TPM_INF_ADDR); + /* disable RESET, LP and IRQC */ + tpm_data_out(RESET_LP_IRQC_DISABLE, CMD); + return tpm_pm_resume(&dev->dev); +} + static struct pnp_driver tpm_inf_pnp_driver = { .name = "tpm_inf_pnp", - .driver = { - .owner = THIS_MODULE, - .suspend = tpm_pm_suspend, - .resume = tpm_pm_resume, - }, - .id_table = tpm_pnp_tbl, + .id_table = tpm_inf_pnp_tbl, .probe = tpm_inf_pnp_probe, - .remove = __devexit_p(tpm_inf_pnp_remove), + .suspend = tpm_inf_pnp_suspend, + .resume = tpm_inf_pnp_resume, + .remove = __devexit_p(tpm_inf_pnp_remove) }; static int __init init_inf(void) @@ -638,5 +673,5 @@ module_exit(cleanup_inf); MODULE_AUTHOR("Marcel Selhorst "); MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2"); -MODULE_VERSION("1.9"); +MODULE_VERSION("1.9.2"); MODULE_LICENSE("GPL"); From c286d03cce118e9fb8dda8da43f9131c169c5a75 Mon Sep 17 00:00:00 2001 From: Johan Kristell Date: Wed, 10 Feb 2010 13:56:34 -0800 Subject: [PATCH 430/640] mmc_test: block addressed cards This patch fixes a bug in the multiblock write tests where the written data is read back for verifying one block at a time. The tests in mmc_test assumes that all cards are byte addressable. This will cause the multi block write tests to fail, leading the user of the mmc_test driver thinking there is something wrong with the sdhci driver they are testing. The start address for the block is calculated as: blocknum * 512. For block addressable cards the blocknum alone should be used. Signed-off-by: Johan Kristell Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/card/mmc_test.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index b9f1e84897cc..e7f8027165e6 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -74,6 +74,9 @@ static void mmc_test_prepare_mrq(struct mmc_test_card *test, } mrq->cmd->arg = dev_addr; + if (!mmc_card_blockaddr(test->card)) + mrq->cmd->arg <<= 9; + mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC; if (blocks == 1) @@ -190,7 +193,7 @@ static int __mmc_test_prepare(struct mmc_test_card *test, int write) } for (i = 0;i < BUFFER_SIZE / 512;i++) { - ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1); + ret = mmc_test_buffer_transfer(test, test->buffer, i, 512, 1); if (ret) return ret; } @@ -219,7 +222,7 @@ static int mmc_test_cleanup(struct mmc_test_card *test) memset(test->buffer, 0, 512); for (i = 0;i < BUFFER_SIZE / 512;i++) { - ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1); + ret = mmc_test_buffer_transfer(test, test->buffer, i, 512, 1); if (ret) return ret; } @@ -426,7 +429,7 @@ static int mmc_test_transfer(struct mmc_test_card *test, for (i = 0;i < sectors;i++) { ret = mmc_test_buffer_transfer(test, test->buffer + i * 512, - dev_addr + i * 512, 512, 0); + dev_addr + i, 512, 0); if (ret) return ret; } From cff9279e4e8d6ff80a640dd6977c8f76aa01e1f8 Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Wed, 10 Feb 2010 13:56:36 -0800 Subject: [PATCH 431/640] edac: mpc85xx fix bad page calculation Commit b4846251727a38a7f248e41308c060995371dd05 ("edac: mpc85xx add mpc83xx support") accidentally broke how a chip select's first and last page addresses are calculated. The page addresses are being shifted too far right by PAGE_SHIFT. This results in errors such as: EDAC MPC85xx MC1: Err addr: 0x003075c0 EDAC MPC85xx MC1: PFN: 0x00000307 EDAC MPC85xx MC1: PFN out of range! EDAC MC1: INTERNAL ERROR: row out of range (4 >= 4) EDAC MC1: CE - no information available: INTERNAL ERROR The vaule of PAGE_SHIFT is already being taken into consideration during the calculation of the 'start' and 'end' variables, thus it is not necessary to account for it again when setting a chip select's first and last page address. Signed-off-by: Peter Tyser Signed-off-by: Doug Thompson Cc: Ira W. Snyder Cc: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/edac/mpc85xx_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index cf27402af97b..e24a87fe3b9b 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -804,8 +804,8 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) end <<= (24 - PAGE_SHIFT); end |= (1 << (24 - PAGE_SHIFT)) - 1; - csrow->first_page = start >> PAGE_SHIFT; - csrow->last_page = end >> PAGE_SHIFT; + csrow->first_page = start; + csrow->last_page = end; csrow->nr_pages = end + 1 - start; csrow->grain = 8; csrow->mtype = mtype; From f8c63345b498a8590e8e87a5990a36cdf89636df Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Wed, 10 Feb 2010 13:56:37 -0800 Subject: [PATCH 432/640] edac: mpc85xx fix build regression by removing unused debug code Some unused, unsupported debug code existed in the mpc85xx EDAC driver that resulted in a build failure when CONFIG_EDAC_DEBUG was defined: drivers/edac/mpc85xx_edac.c: In function 'mpc85xx_mc_err_probe': drivers/edac/mpc85xx_edac.c:1031: error: implicit declaration of function 'edac_mc_register_mcidev_debug' drivers/edac/mpc85xx_edac.c:1031: error: 'debug_attr' undeclared (first use in this function) drivers/edac/mpc85xx_edac.c:1031: error: (Each undeclared identifier is reported only once drivers/edac/mpc85xx_edac.c:1031: error: for each function it appears in.) Signed-off-by: Peter Tyser Signed-off-by: Doug Thompson Signed-off-by: Linus Torvalds --- drivers/edac/mpc85xx_edac.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index e24a87fe3b9b..ecd5928d7110 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -892,10 +892,6 @@ static int __devinit mpc85xx_mc_err_probe(struct of_device *op, mpc85xx_init_csrows(mci); -#ifdef CONFIG_EDAC_DEBUG - edac_mc_register_mcidev_debug((struct attribute **)debug_attr); -#endif - /* store the original error disable bits */ orig_ddr_err_disable = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE); From 763458e0dbfb4d562d62823149cf62e8b8eca82b Mon Sep 17 00:00:00 2001 From: Rishikesh Date: Wed, 10 Feb 2010 13:56:40 -0800 Subject: [PATCH 433/640] MAINTAINERS: changed LTP maintainership responsibilities Change the LTP maintainer responsibities from 2010. Ref: http://marc.info/?l=ltp-list&m=126502242912536&w=2 Signed-off-by : Rishikesh K Rajak Cc: Subrata Modak Cc: Mike Frysinger Cc: Garrett Cooper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 602022d2c7a5..412eff60c33d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3411,8 +3411,10 @@ S: Maintained F: drivers/scsi/sym53c8xx_2/ LTP (Linux Test Project) -M: Subrata Modak -M: Mike Frysinger +M: Rishikesh K Rajak +M: Garrett Cooper +M: Mike Frysinger +M: Subrata Modak L: ltp-list@lists.sourceforge.net (subscribers-only) W: http://ltp.sourceforge.net/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/galak/ltp.git From 4cfbafd33f5ae99688ab82525a1d449c1c1b198f Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 10 Feb 2010 13:56:40 -0800 Subject: [PATCH 434/640] compat_ioctl: add compat handler for TIOCGSID ioctl This is used by tcgetsid(3). Signed-off-by: Andreas Schwab Cc: Alan Cox Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index b6f23b25370e..30698a13fb22 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -936,6 +936,7 @@ COMPATIBLE_IOCTL(TCSETSF) COMPATIBLE_IOCTL(TIOCLINUX) COMPATIBLE_IOCTL(TIOCSBRK) COMPATIBLE_IOCTL(TIOCCBRK) +COMPATIBLE_IOCTL(TIOCGSID) COMPATIBLE_IOCTL(TIOCGICOUNT) /* Little t */ COMPATIBLE_IOCTL(TIOCGETD) From 803bf5ec259941936262d10ecc84511b76a20921 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 10 Feb 2010 13:56:42 -0800 Subject: [PATCH 435/640] fs/exec.c: restrict initial stack space expansion to rlimit When reserving stack space for a new process, make sure we're not attempting to expand the stack by more than rlimit allows. This fixes a bug caused by b6a2fea39318e43fee84fa7b0b90d68bed92d2ba ("mm: variable length argument support") and unmasked by fc63cf237078c86214abcb2ee9926d8ad289da9b ("exec: setup_arg_pages() fails to return errors"). This bug means that when limiting the stack to less the 20*PAGE_SIZE (eg. 80K on 4K pages or 'ulimit -s 79') all processes will be killed before they start. This is particularly bad with 64K pages, where a ulimit below 1280K will kill every process. To test, do: 'ulimit -s 15; ls' before and after the patch is applied. Before it's applied, 'ls' should be killed. After the patch is applied, 'ls' should no longer be killed. A stack limit of 15KB since it's small enough to trigger 20*PAGE_SIZE. Also 15KB not a multiple of PAGE_SIZE, which is a trickier case to handle correctly with this code. 4K pages should be fine to test with. [kosaki.motohiro@jp.fujitsu.com: cleanup] [akpm@linux-foundation.org: cleanup cleanup] Signed-off-by: Michael Neuling Signed-off-by: KOSAKI Motohiro Cc: Americo Wang Cc: Anton Blanchard Cc: Oleg Nesterov Cc: James Morris Cc: Ingo Molnar Cc: Serge Hallyn Cc: Benjamin Herrenschmidt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 0790a107ff7e..e95c692ef0e4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -571,6 +571,9 @@ int setup_arg_pages(struct linux_binprm *bprm, struct vm_area_struct *prev = NULL; unsigned long vm_flags; unsigned long stack_base; + unsigned long stack_size; + unsigned long stack_expand; + unsigned long rlim_stack; #ifdef CONFIG_STACK_GROWSUP /* Limit stack size to 1GB */ @@ -627,10 +630,24 @@ int setup_arg_pages(struct linux_binprm *bprm, goto out_unlock; } + stack_expand = EXTRA_STACK_VM_PAGES * PAGE_SIZE; + stack_size = vma->vm_end - vma->vm_start; + /* + * Align this down to a page boundary as expand_stack + * will align it up. + */ + rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; + rlim_stack = min(rlim_stack, stack_size); #ifdef CONFIG_STACK_GROWSUP - stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE; + if (stack_size + stack_expand > rlim_stack) + stack_base = vma->vm_start + rlim_stack; + else + stack_base = vma->vm_end + stack_expand; #else - stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE; + if (stack_size + stack_expand > rlim_stack) + stack_base = vma->vm_end - rlim_stack; + else + stack_base = vma->vm_start - stack_expand; #endif ret = expand_stack(vma, stack_base); if (ret) From 0e5a9fb0426108d750c97c25b1ab04d3768b5aff Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Fri, 5 Feb 2010 18:25:41 -0500 Subject: [PATCH 436/640] GFS2: Fix error code We need this one-liner to signal the mount helper of the 'insufficient journals' condition. Signed-off-by: Abhijith Das Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8a102f731003..a86ed6381566 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -725,7 +725,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) goto fail; } - error = -EINVAL; + error = -EUSERS; if (!gfs2_jindex_size(sdp)) { fs_err(sdp, "no journals!\n"); goto fail_jindex; From 07ccb7bf2c928fef4fea2cda69ba2e23479578db Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 12 Feb 2010 10:10:55 +0000 Subject: [PATCH 437/640] GFS2: Fix bmap allocation corner-case bug This patch solves a corner case during allocation which occurs if both metadata (indirect) and data blocks are required but there is an obstacle in the filesystem (e.g. a resource group header or another allocated block) such that when the allocation is requested only enough blocks for the metadata are returned. By changing the exit condition of this loop, we ensure that a minimum of one data block will always be returned. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6d47379e794b..583e823307ae 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -541,7 +541,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, *ptr++ = cpu_to_be64(bn++); break; } - } while (state != ALLOC_DATA); + } while ((state != ALLOC_DATA) || !dblock); ip->i_height = height; gfs2_add_inode_blocks(&ip->i_inode, alloced); From 973e9a2795b3b41d8408a0bb6f87b783c5efc88a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 11 Feb 2010 19:20:48 +0000 Subject: [PATCH 438/640] regulator: Fix display of null constraints for regulators If the regulator constraints are empty and there is no voltage reported then nothing will be added to the text displayed for the constraints, leading to random stack data being printed. This is unlikely to happen for practical regulators since most will at least report a voltage but should still be fixed. Signed-off-by: Mark Brown Cc: stable@kernel.org Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 686ef270ecf7..b60a4c9f8f16 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -661,7 +661,7 @@ static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state) static void print_constraints(struct regulator_dev *rdev) { struct regulation_constraints *constraints = rdev->constraints; - char buf[80]; + char buf[80] = ""; int count = 0; int ret; From 62737d445b149eaf0beac50de8d856b5e94150be Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 12 Feb 2010 12:30:21 +0100 Subject: [PATCH 439/640] regulator/lp3971: vol_map out of bounds in lp3971_{ldo,dcdc}_set_voltage() After `for (val = LDO_VOL_MIN_IDX; val <= LDO_VOL_MAX_IDX; val++)', if no break occurs, val reaches LDO_VOL_MIN_IDX + 1, which is out of bounds for ldo45_voltage_map[] and ldo123_voltage_map[]. Similarly BUCK_TARGET_VOL_MAX_IDX + 1 is out of bounds for buck_voltage_map[]. Signed-off-by: Roel Kluin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/lp3971.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c index 76d08c282f9c..4f33a0f4a179 100644 --- a/drivers/regulator/lp3971.c +++ b/drivers/regulator/lp3971.c @@ -183,7 +183,7 @@ static int lp3971_ldo_set_voltage(struct regulator_dev *dev, if (vol_map[val] >= min_vol) break; - if (vol_map[val] > max_vol) + if (val > LDO_VOL_MAX_IDX || vol_map[val] > max_vol) return -EINVAL; return lp3971_set_bits(lp3971, LP3971_LDO_VOL_CONTR_REG(ldo), @@ -272,7 +272,7 @@ static int lp3971_dcdc_set_voltage(struct regulator_dev *dev, if (vol_map[val] >= min_vol) break; - if (vol_map[val] > max_vol) + if (val > BUCK_TARGET_VOL_MAX_IDX || vol_map[val] > max_vol) return -EINVAL; ret = lp3971_set_bits(lp3971, LP3971_BUCK_TARGET_VOL1_REG(buck), From 22208ac586f2e456c49e927b90ded50e923b6aee Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 12 Feb 2010 08:17:58 -0800 Subject: [PATCH 440/640] [IA64] preserve personality flag bits across exec In its ia64 defines SET_PERSONALITY in a way that unconditionally sets the personality of the current process to PER_LINUX, losing any flag bits from the upper 3 bytes of current->personality. This is wrong. Those bits are intended to be inherited across exec (other code takes care of ensuring that security sensitive bits like ADDR_NO_RANDOMIZE are not passed to unsuspecting setuid/setgid applications). Signed-off-by: Tony Luck --- arch/ia64/include/asm/elf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index e14108b19c09..4c41656ede87 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -201,7 +201,9 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); relevant until we have real hardware to play with... */ #define ELF_PLATFORM NULL -#define SET_PERSONALITY(ex) set_personality(PER_LINUX) +#define SET_PERSONALITY(ex) \ + set_personality((current->personality & ~PER_MASK) | PER_LINUX) + #define elf_read_implies_exec(ex, executable_stack) \ ((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0) From 22a8cdd60339d931d0dca54427712b2714e5ba8b Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Fri, 12 Feb 2010 10:53:08 -0500 Subject: [PATCH 441/640] parisc: fix tracing of signals Mike Frysinger pointed out that calling tracehook_signal_handler with stepping=0 missed testing the thread flags, resulting in not calling ptrace_notify. Fix this by testing if we're single stepping or branch stepping and setting the flag accordingly. Tested, seems to work. Reported-by: Mike Frysinger Signed-off-by: Kyle McMartin Signed-off-by: Linus Torvalds --- arch/parisc/kernel/signal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index fb37ac52e46c..35c827e94e31 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -468,7 +468,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - tracehook_signal_handler(sig, info, ka, regs, 0); + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP) || + test_thread_flag(TIF_BLOCKSTEP)); return 1; } From d6d8bf549393484e906913f02fa3c9518a2819b6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 12 Feb 2010 18:17:06 +0100 Subject: [PATCH 442/640] ALSA: hda - use WARN_ON_ONCE() for zero-division detection Replace the zero-division warning message with WARN_ON_ONCE() per the advice by Linus. This shouldn't happen, but if it happens, it's possible that the bug happens often due to buggy IRQs. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e767c3f395ab..3600e9cc9bc6 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1893,12 +1893,9 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) if (!bdl_pos_adj[chip->dev_index]) return 1; /* no delayed ack */ - if (azx_dev->period_bytes == 0) { - printk(KERN_WARNING - "hda-intel: Divide by zero was avoided " - "in azx_dev->period_bytes.\n"); - return 0; - } + if (WARN_ONCE(!azx_dev->period_bytes, + "hda-intel: zero azx_dev->period_bytes")) + return 0; /* this shouldn't happen! */ if (pos % azx_dev->period_bytes > azx_dev->period_bytes / 2) return 0; /* NG - it's below the period boundary */ return 1; /* OK, it's fine */ From 724e6d3fe8003c3f60bf404bf22e4e331327c596 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 12 Feb 2010 11:07:45 -0800 Subject: [PATCH 443/640] Linux 2.6.33-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f8e02e9491d0..12b1aa1103ee 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 33 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* From fafaf31bf9f965d91462ee115e27ef6c262b74ea Mon Sep 17 00:00:00 2001 From: Shanyu Zhao Date: Thu, 11 Feb 2010 10:42:22 -0800 Subject: [PATCH 444/640] iwlwifi: fix AMSDU Rx after paged Rx patch Previous patch "use paged Rx" broke AMSDU Rx functionality. If an AP sends out A-MSDU packets the station will crash. Fix it by linearizing skbuff for AMSDU packet before handing it to mac80211 since mac80211 doesn't support paged skbuff. This fixes http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2155 Reported-by: Norbert Preining Signed-off-by: Shanyu Zhao Acked-by: Zhu Yi Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c index 6f36b6e79f5e..2dbce85404aa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rx.c +++ b/drivers/net/wireless/iwlwifi/iwl-rx.c @@ -928,7 +928,10 @@ static void iwl_pass_packet_to_mac80211(struct iwl_priv *priv, if (ieee80211_is_mgmt(fc) || ieee80211_has_protected(fc) || ieee80211_has_morefrags(fc) || - le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG) + le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG || + (ieee80211_is_data_qos(fc) && + *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)) ret = skb_linearize(skb); else ret = __pskb_pull_tail(skb, min_t(u16, IWL_LINK_HDR_MAX, len)) ? From c6b471e6454c0e1c6d756672841cbaeae7c949f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 7 Feb 2010 17:26:30 +0000 Subject: [PATCH 445/640] inet: Remove bogus IGMPv3 report handling Currently we treat IGMPv3 reports as if it were an IGMPv2/v1 report. This is broken as IGMPv3 reports are formatted differently. So we end up suppressing a bogus multicast group (which should be harmless as long as the leading reserved field is zero). In fact, IGMPv3 does not allow membership report suppression so we should simply ignore IGMPv3 membership reports as a host. This patch does exactly that. I kept the case statement for it so people won't accidentally add it back thinking that we overlooked this case. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 76c08402c933..a42f658e756a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -946,7 +946,6 @@ int igmp_rcv(struct sk_buff *skb) break; case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - case IGMPV3_HOST_MEMBERSHIP_REPORT: /* Is it our report looped back? */ if (skb_rtable(skb)->fl.iif == 0) break; @@ -960,6 +959,7 @@ int igmp_rcv(struct sk_buff *skb) in_dev_put(in_dev); return pim_rcv_v1(skb); #endif + case IGMPV3_HOST_MEMBERSHIP_REPORT: case IGMP_DVMRP: case IGMP_TRACE: case IGMP_HOST_LEAVE_MESSAGE: From 5affcd6ba2036b59a4dee3f0576ae3584e92e4f1 Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Fri, 12 Feb 2010 10:05:45 +0200 Subject: [PATCH 446/640] mac80211: fix handling of null-rate control in rate_control_get_rate For hardware with IEEE80211_HW_HAS_RATE_CONTROL the rate controller is not initialized. However, calling functions such as ieee80211_beacon_get result in the rate_control_get_rate function getting called, which is accessing (in this case uninitialized) rate control structures unconditionally. Fix by exiting the function before setting the rates for HW with IEEE80211_HW_HAS_RATE_CONTROL set. The initialization of the ieee80211_tx_info struct is intentionally still executed. Signed-off-by: Juuso Oikarinen Reviewed-by: Kalle Valo Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/rate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index b9007f80cb92..12a2bff7dcdb 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -245,6 +245,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, info->control.rates[i].count = 1; } + if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) + return; + if (sta && sdata->force_unicast_rateidx > -1) { info->control.rates[0].idx = sdata->force_unicast_rateidx; } else { From 232486e1e9f34889424b68ee6270440b554479a2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Feb 2010 12:03:45 -0800 Subject: [PATCH 447/640] sparc64: Tighten checks in kstack_valid(). The kernel stack pointer is invalid if it is not 16-byte aligned. Based upon a report by Meelis Roos Signed-off-by: David S. Miller --- arch/sparc/kernel/kstack.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h index 4248d969272f..5247283d1c03 100644 --- a/arch/sparc/kernel/kstack.h +++ b/arch/sparc/kernel/kstack.h @@ -11,6 +11,10 @@ static inline bool kstack_valid(struct thread_info *tp, unsigned long sp) { unsigned long base = (unsigned long) tp; + /* Stack pointer must be 16-byte aligned. */ + if (sp & (16UL - 1)) + return false; + if (sp >= (base + sizeof(struct thread_info)) && sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf))) return true; From 5e2a911cecc7e0fd89b1d2d001b7b89d47057ad6 Mon Sep 17 00:00:00 2001 From: Steve Hodgson Date: Fri, 12 Feb 2010 12:32:27 -0800 Subject: [PATCH 448/640] sfc: Fix SFE4002 initialisation From: Steve Hodgson Commit 357d46a17e54c9a87e0e6ef3930ff4ab2d232b81 "sfc: QT202x: Remove unreliable MMD check at initialisation" broke initialisation of the SFE4002. efx_mdio_reset_mmd() returns a positive value rather than 0 on success. The above commit causes this value to be propagated up by qt202x_reset_phy(), which is treated as a failure by its callers. Change qt202x_reset_phy() to return 0 if successful. The PCI layer treats >0 as "fail, but please call remove() anyway", which means that unloading the driver would cause a crash. Add a WARN_ON() on the failure path of efx_pci_probe() to provide early warning if there are any other cases where we do this. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/efx.c | 1 + drivers/net/sfc/qt202x_phy.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index 103e8b0e2a0d..46997e177ee3 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -2284,6 +2284,7 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, fail2: efx_fini_struct(efx); fail1: + WARN_ON(rc > 0); EFX_LOG(efx, "initialisation failed. rc=%d\n", rc); free_netdev(net_dev); return rc; diff --git a/drivers/net/sfc/qt202x_phy.c b/drivers/net/sfc/qt202x_phy.c index e0d13a451019..67eec7a6e487 100644 --- a/drivers/net/sfc/qt202x_phy.c +++ b/drivers/net/sfc/qt202x_phy.c @@ -320,7 +320,7 @@ static int qt202x_reset_phy(struct efx_nic *efx) falcon_board(efx)->type->init_phy(efx); - return rc; + return 0; fail: EFX_ERR(efx, "PHY reset timed out\n"); From 3f6fae9559225741c91f1320090b285da1413290 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Feb 2010 07:43:00 +0000 Subject: [PATCH 449/640] Btrfs: btrfs_mark_extent_written uses the wrong slot My test do: fallocate a big file and do write. The file is 512M, but after file write is done btrfs-debug-tree shows: item 6 key (257 EXTENT_DATA 0) itemoff 3516 itemsize 53 extent data disk byte 1103101952 nr 536870912 extent data offset 0 nr 399634432 ram 536870912 extent compression 0 Looks like a regression introducted by 6c7d54ac87f338c479d9729e8392eca3f76e11e1, where we set wrong slot. Signed-off-by: Shaohua Li Acked-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 413a30dafcda..a7fd9f3a750a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -720,13 +720,15 @@ again: inode->i_ino, orig_offset); BUG_ON(ret); } - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); if (del_nr == 0) { + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(leaf); } else { + fi = btrfs_item_ptr(leaf, del_slot - 1, + struct btrfs_file_extent_item); btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_num_bytes(leaf, fi, From fa644298eb24ab05b32acf6cc0f2265b833280e1 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 15 Dec 2009 12:58:09 +0000 Subject: [PATCH 450/640] powerpc/85xx: Fix oops during MSI driver probe on MPC85xxMDS boards MPC85xx chips report the wrong value in feature reporting register, and that causes the following oops: Unable to handle kernel paging request for data at address 0x00000c00 Faulting instruction address: 0xc0019294 Oops: Kernel access of bad area, sig: 11 [#1] MPC8569 MDS Modules linked in: [...] NIP [c0019294] mpic_set_irq_type+0x2f0/0x368 LR [c0019124] mpic_set_irq_type+0x180/0x368 Call Trace: [ef851d60] [c0019124] mpic_set_irq_type+0x180/0x368 (unreliable) [ef851d90] [c007958c] __irq_set_trigger+0x44/0xd4 [ef851db0] [c007b550] set_irq_type+0x40/0x7c [ef851dc0] [c0004a60] irq_create_of_mapping+0xb4/0x114 [ef851df0] [c0004af0] irq_of_parse_and_map+0x30/0x40 [ef851e20] [c0405678] fsl_of_msi_probe+0x1a0/0x328 [ef851e60] [c02e6438] of_platform_device_probe+0x5c/0x84 [...] This is because mpic_alloc() assigns wrong values to mpic->isu_{size,shift,mask}, and things eventually break when _mpic_irq_read() is trying to use them. This patch fixes the issue by enabling MPIC_BROKEN_FRR_NIRQS quirk. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 21f61b8c445b..cc29c0f5300d 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -338,7 +338,8 @@ static void __init mpc85xx_mds_pic_init(void) } mpic = mpic_alloc(np, r.start, - MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, + MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN | + MPIC_BROKEN_FRR_NIRQS, 0, 256, " OpenPIC "); BUG_ON(mpic == NULL); of_node_put(np); From d1d47ec6e62ab08d2ebb925fd9203abfad3adfbf Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Fri, 18 Dec 2009 16:50:37 -0600 Subject: [PATCH 451/640] powerpc/85xx: Fix SMP when "cpu-release-addr" is in lowmem Recent U-Boot commit 5ccd29c3679b3669b0bde5c501c1aa0f325a7acb caused the "cpu-release-addr" device tree property to contain the physical RAM location that secondary cores were spinning at. Previously, the "cpu-release-addr" property contained a value referencing the boot page translation address range of 0xfffffxxx, which then indirectly accessed RAM. The "cpu-release-addr" is currently ioremapped and the secondary cores kicked. However, due to the recent change in "cpu-release-addr", it sometimes points to a memory location in low memory that cannot be ioremapped. For example on a P2020-based board with 512MB of RAM the following error occurs on bootup: <...> mpic: requesting IPIs ... __ioremap(): phys addr 0x1ffff000 is RAM lr c05df9a0 Unable to handle kernel paging request for data at address 0x00000014 Faulting instruction address: 0xc05df9b0 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=2 P2020 RDB Modules linked in: <... eventual kernel panic> Adding logic to conditionally ioremap or access memory directly resolves the issue. Signed-off-by: Peter Tyser Signed-off-by: Nate Case Reported-by: Dipen Dudhat Tested-by: Dipen Dudhat Signed-off-by: Kumar Gala --- arch/powerpc/platforms/85xx/smp.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 04160a4cc699..a15f582300d8 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -46,6 +46,7 @@ smp_85xx_kick_cpu(int nr) __iomem u32 *bptr_vaddr; struct device_node *np; int n = 0; + int ioremappable; WARN_ON (nr < 0 || nr >= NR_CPUS); @@ -59,21 +60,37 @@ smp_85xx_kick_cpu(int nr) return; } + /* + * A secondary core could be in a spinloop in the bootpage + * (0xfffff000), somewhere in highmem, or somewhere in lowmem. + * The bootpage and highmem can be accessed via ioremap(), but + * we need to directly access the spinloop if its in lowmem. + */ + ioremappable = *cpu_rel_addr > virt_to_phys(high_memory); + /* Map the spin table */ - bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY); + if (ioremappable) + bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY); + else + bptr_vaddr = phys_to_virt(*cpu_rel_addr); local_irq_save(flags); out_be32(bptr_vaddr + BOOT_ENTRY_PIR, nr); out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start)); + if (!ioremappable) + flush_dcache_range((ulong)bptr_vaddr, + (ulong)(bptr_vaddr + SIZE_BOOT_ENTRY)); + /* Wait a bit for the CPU to ack. */ while ((__secondary_hold_acknowledge != nr) && (++n < 1000)) mdelay(1); local_irq_restore(flags); - iounmap(bptr_vaddr); + if (ioremappable) + iounmap(bptr_vaddr); pr_debug("waited %d msecs for CPU #%d.\n", n, nr); } From a9bb18f36c8056f0712fb28c52c0f85d98438dfb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2010 17:23:47 +0100 Subject: [PATCH 452/640] tracing/kprobes: Fix probe parsing Trying to add a probe like: echo p:myprobe 0x10000 > /sys/kernel/debug/tracing/kprobe_events will fail since the wrong pointer is passed to strict_strtoul when trying to convert the address to an unsigned long. Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <20100210162346.GA6933@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c96..50b1b8239806 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv) return -EINVAL; } /* an address specified */ - ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); + ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { pr_info("Failed to parse address.\n"); return ret; From 7f51a100bba517196ac4bdf29408d20ee1c771e8 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 8 Feb 2010 08:30:03 +0100 Subject: [PATCH 453/640] firewire: ohci: retransmit isochronous transmit packets on cycle loss In isochronous transmit DMA descriptors, link the skip address pointer back to the descriptor itself. When a cycle is lost, the controller will send the packet in the next cycle, instead of terminating the entire DMA program. There are two reasons for this: * This behaviour is compatible with the old IEEE1394 stack. Old applications would not expect the DMA program to stop in this case. * Since the OHCI driver does not report any uncompleted packets, the context would stop silently; clients would not have any chance to detect and handle this error without a watchdog timer. Signed-off-by: Clemens Ladisch Pieter Palmers notes: "The reason I added this retry behavior to the old stack is because some cards now and then fail to send a packet (e.g. the o2micro card in my dell laptop). I couldn't figure out why exactly this happens, my best guess is that the card cannot fetch the payload data on time. This happens much more frequently when sending large packets, which leads me to suspect that there are some contention issues with the DMA that fills the transmit FIFO. In the old stack it was a pretty critical issue as it resulted in a freeze of the userspace application. The omission of a packet doesn't necessarily have to be an issue. E.g. in IEC61883 streams the DBC field can be used to detect discontinuities in the stream. So as long as the other side doesn't bail when no [packet] is present in a cycle, there is not really a problem. I'm not convinced though that retrying is the proper solution, but it is simple and effective for what it had to do. And I think there are no reasons not to do it this way. Userspace can still detect this by checking the cycle the descriptor was sent in." Signed-off-by: Stefan Richter (changelog, comment) --- drivers/firewire/ohci.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 2345d4103fe6..43ebf337b131 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2101,11 +2101,6 @@ static int ohci_queue_iso_transmit(struct fw_iso_context *base, u32 payload_index, payload_end_index, next_page_index; int page, end_page, i, length, offset; - /* - * FIXME: Cycle lost behavior should be configurable: lose - * packet, retransmit or terminate.. - */ - p = packet; payload_index = payload; @@ -2135,6 +2130,14 @@ static int ohci_queue_iso_transmit(struct fw_iso_context *base, if (!p->skip) { d[0].control = cpu_to_le16(DESCRIPTOR_KEY_IMMEDIATE); d[0].req_count = cpu_to_le16(8); + /* + * Link the skip address to this descriptor itself. This causes + * a context to skip a cycle whenever lost cycles or FIFO + * overruns occur, without dropping the data. The application + * should then decide whether this is an error condition or not. + * FIXME: Make the context's cycle-lost behaviour configurable? + */ + d[0].branch_address = cpu_to_le32(d_bus | z); header = (__le32 *) &d[1]; header[0] = cpu_to_le32(IT_HEADER_SY(p->sy) | From 175359f89df39f4faed663c8cfd6ee0222d2fa1e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Feb 2010 13:13:10 +0100 Subject: [PATCH 454/640] reiserfs: Fix softlockup while waiting on an inode When we wait for an inode through reiserfs_iget(), we hold the reiserfs lock. And waiting for an inode may imply waiting for its writeback. But the inode writeback path may also require the reiserfs lock, which leads to a deadlock. We just need to release the reiserfs lock from reiserfs_iget() to fix this. Reported-by: Alexander Beregalov Signed-off-by: Frederic Weisbecker Tested-by: Christian Kujau Cc: Chris Mason --- fs/reiserfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9087b10209e6..2df0f5c7c60b 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1497,9 +1497,11 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) args.objectid = key->on_disk_key.k_objectid; args.dirid = key->on_disk_key.k_dir_id; + reiserfs_write_unlock(s); inode = iget5_locked(s, key->on_disk_key.k_objectid, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); + reiserfs_write_lock(s); if (!inode) return ERR_PTR(-ENOMEM); From 775c67090c98780b44c4f95d5c05565df715b8bd Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 12 Feb 2010 00:17:59 +0100 Subject: [PATCH 455/640] drm: Fix a bug in the range manager. When searching for free space in a range, the function could return a node extending outside of the given range. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_mm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index cdec32977129..2ac074c8f5d2 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -405,7 +405,8 @@ struct drm_mm_node *drm_mm_search_free_in_range(const struct drm_mm *mm, wasted += alignment - tmp; } - if (entry->size >= size + wasted) { + if (entry->size >= size + wasted && + (entry->start + wasted + size) <= end) { if (!best_match) return entry; if (entry->size < best_size) { From e22238ea37a870f70e34668a4992bde0c92bba8d Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 12 Feb 2010 00:18:00 +0100 Subject: [PATCH 456/640] drm/ttm: Fix a bug occuring when validating a buffer object in a range. If the buffer object was already in the requested memory type, but outside of the requested range it was never moved into the requested range. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1a3e909b7bba..c7320ce4567d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1020,6 +1020,12 @@ static int ttm_bo_mem_compat(struct ttm_placement *placement, struct ttm_mem_reg *mem) { int i; + struct drm_mm_node *node = mem->mm_node; + + if (node && placement->lpfn != 0 && + (node->start < placement->fpfn || + node->start + node->size > placement->lpfn)) + return -1; for (i = 0; i < placement->num_placement; i++) { if ((placement->placement[i] & mem->placement & From ce36f00d599e0f988c2a1b7b276d9184ee9c5d82 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sat, 13 Feb 2010 20:20:19 -0500 Subject: [PATCH 457/640] drm/radeon/kms/atom: use get_unaligned_le32() for ctx->ps Noticed on a DEC Alpha. Start up into console mode caused 15 unaligned accesses, and starting X caused another 48. Signed-off-by: Matt Turner CC: Jerome Glisse CC: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atom.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index e3b44562d265..2a3df5599ab4 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -24,6 +24,7 @@ #include #include +#include #define ATOM_DEBUG @@ -212,7 +213,9 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, case ATOM_ARG_PS: idx = U8(*ptr); (*ptr)++; - val = le32_to_cpu(ctx->ps[idx]); + /* get_unaligned_le32 avoids unaligned accesses from atombios + * tables, noticed on a DEC Alpha. */ + val = get_unaligned_le32((u32 *)&ctx->ps[idx]); if (print) DEBUG("PS[0x%02X,0x%04X]", idx, val); break; From 4b505db9c4c72dbd2a8e66b8d681640101325af6 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 15 Feb 2010 14:17:45 +0900 Subject: [PATCH 458/640] sh64: fix tracing of signals. This follows the parisc change to ensure that tracehook_signal_handler() is aware of when we are single-stepping in order to ptrace_notify() appropriately. While this was implemented for 32-bit SH, sh64 neglected to make use of TIF_SINGLESTEP when it was folded in with the 32-bit code, resulting in ptrace_notify() never being called. As sh64 uses all of the other abstractions already, this simply plugs in the thread flag in the appropriate enable/disable paths and fixes up the tracehook notification accordingly. With this in place, sh64 is brought in line with what 32-bit is already doing. Reported-by: Mike Frysinger Signed-off-by: Paul Mundt --- arch/sh/kernel/ptrace_64.c | 11 +++++++++-- arch/sh/kernel/signal_64.c | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index 873ebdc4f98e..b063eb8b18e3 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -133,6 +133,8 @@ void user_enable_single_step(struct task_struct *child) struct pt_regs *regs = child->thread.uregs; regs->sr |= SR_SSTEP; /* auto-resetting upon exception */ + + set_tsk_thread_flag(child, TIF_SINGLESTEP); } void user_disable_single_step(struct task_struct *child) @@ -140,6 +142,8 @@ void user_disable_single_step(struct task_struct *child) struct pt_regs *regs = child->thread.uregs; regs->sr &= ~SR_SSTEP; + + clear_tsk_thread_flag(child, TIF_SINGLESTEP); } static int genregs_get(struct task_struct *target, @@ -454,6 +458,8 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs) asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) { + int step; + if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->regs[9]), regs->regs[9]); @@ -461,8 +467,9 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->regs[9]); - if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); } /* Called with interrupts disabled */ diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index ce76dbdef294..580e97d46ca5 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -118,7 +118,9 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset) * clear the TS_RESTORE_SIGMASK flag. */ current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - tracehook_signal_handler(signr, &info, &ka, regs, 0); + + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLESTEP)); return 1; } } From e803e8b2628f3e9a42f45c5b7bb1f9821b08352c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 15 Feb 2010 15:24:48 +1000 Subject: [PATCH 459/640] drm/radeon/kms: make sure retry count increases. In testing I've never seen it go past 1 retry anyways but better safe than sorry. Reported by Droste on irc. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index b32eeea5bb8b..99915a682d59 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -350,7 +350,7 @@ retry: atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); if (args.ucReplyStatus && !args.ucDataOutLen) { - if (args.ucReplyStatus == 0x20 && retry_count < 10) + if (args.ucReplyStatus == 0x20 && retry_count++ < 10) goto retry; DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x after %d retries\n", req_bytes[1], req_bytes[0], req_bytes[2], req_bytes[3], From 0a27fcfaaf61108d94f0377f91bed81b2dd35f52 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 15 Feb 2010 17:05:28 +0100 Subject: [PATCH 460/640] ALSA: hda - Correct ASUA blacklist for MSI brokenness The MSI blacklist entry for ASUS mobo added in the commit 8ce28d6abff34886d3797b25324c940471b99164 was based on the alsa-info output wrongly posted. Fix the id to the right one now. Reported-by: Sid Boyce Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 3600e9cc9bc6..ff6da6f386d1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2350,7 +2350,7 @@ static void __devinit check_probe_mask(struct azx *chip, int dev) */ static struct snd_pci_quirk msi_black_list[] __devinitdata = { SND_PCI_QUIRK(0x1043, 0x81f2, "ASUS", 0), /* Athlon64 X2 + nvidia */ - SND_PCI_QUIRK(0x1043, 0x829c, "ASUS", 0), /* nvidia */ + SND_PCI_QUIRK(0x1043, 0x81f6, "ASUS", 0), /* nvidia */ {} }; From d9184fa97b6f48d399636e5e2669bc8419f9369e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 16 Feb 2010 11:14:14 +1000 Subject: [PATCH 461/640] drm/nouveau: use mutex for vbios lock Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bios.c | 7 +++---- drivers/gpu/drm/nouveau/nouveau_bios.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 2cd0fad17dac..0e9cd1d49130 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -5861,13 +5861,12 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table, struct drm_nouveau_private *dev_priv = dev->dev_private; struct nvbios *bios = &dev_priv->VBIOS; struct init_exec iexec = { true, false }; - unsigned long flags; - spin_lock_irqsave(&bios->lock, flags); + mutex_lock(&bios->lock); bios->display.output = dcbent; parse_init_table(bios, table, &iexec); bios->display.output = NULL; - spin_unlock_irqrestore(&bios->lock, flags); + mutex_unlock(&bios->lock); } static bool NVInitVBIOS(struct drm_device *dev) @@ -5876,7 +5875,7 @@ static bool NVInitVBIOS(struct drm_device *dev) struct nvbios *bios = &dev_priv->VBIOS; memset(bios, 0, sizeof(struct nvbios)); - spin_lock_init(&bios->lock); + mutex_init(&bios->lock); bios->dev = dev; if (!NVShadowVBIOS(dev, bios->data)) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h index 68446fd4146b..fd94bd6dc264 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.h +++ b/drivers/gpu/drm/nouveau/nouveau_bios.h @@ -205,7 +205,7 @@ struct nvbios { struct drm_device *dev; struct nouveau_bios_info pub; - spinlock_t lock; + struct mutex lock; uint8_t data[NV_PROM_SIZE]; unsigned int length; From bf929efa56ac174bf6d4f54cd6fe811181a51ae5 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 11 Feb 2010 12:47:40 +0100 Subject: [PATCH 462/640] drm/nouveau: Force TV encoder DPMS reinit after resume. Signed-off-by: Francisco Jerez Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv17_tv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c index 58b917c3341b..21ac6e49b6ee 100644 --- a/drivers/gpu/drm/nouveau/nv17_tv.c +++ b/drivers/gpu/drm/nouveau/nv17_tv.c @@ -579,6 +579,8 @@ static void nv17_tv_restore(struct drm_encoder *encoder) nouveau_encoder(encoder)->restore.output); nv17_tv_state_load(dev, &to_tv_enc(encoder)->saved_state); + + nouveau_encoder(encoder)->last_dpms = NV_DPMS_CLEARED; } static int nv17_tv_create_resources(struct drm_encoder *encoder, From 65d269538a1129495ac45a14a777cd11cfe881d8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 Feb 2010 12:19:53 -0500 Subject: [PATCH 463/640] NFS: Too many GETATTR and ACCESS calls after direct I/O The cached read and write paths initialize fattr->time_start in their setup procedures. The value of fattr->time_start is propagated to read_cache_jiffies by nfs_update_inode(). Subsequent calls to nfs_attribute_timeout() will then use a good time stamp when computing the attribute cache timeout, and squelch unneeded GETATTR calls. Since the direct I/O paths erroneously leave the inode's fattr->time_start field set to zero, read_cache_jiffies for that inode is set to zero after any direct read or write operation. This triggers an otw GETATTR or ACCESS call to update the file's attribute and access caches properly, even when the NFS READ or WRITE replies have usable post-op attributes. Make sure the direct read and write setup code performs the same fattr initialization as the cached I/O paths to prevent unnecessary GETATTR calls. This was likely introduced by commit 0e574af1 in 2.6.15, which appears to add new nfs_fattr_init() call sites in the cached read and write paths, but not in the equivalent places in fs/nfs/direct.c. A subsequent commit in the same series, 33801147, introduces the fattr->time_start field. Interestingly, the direct write reschedule path already has a call to nfs_fattr_init() in the right place. Reported-by: Quentin Barnes Signed-off-by: Chuck Lever Cc: stable@kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/direct.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e1d415e97849..0d289823e856 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.eof = 0; data->res.count = bytes; + nfs_fattr_init(&data->fattr); msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); NFS_PROTO(data->inode)->commit_setup(data, &msg); @@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.count = bytes; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); task_setup_data.task = &data->task; task_setup_data.callback_data = data; From a626e8478b18de4fdee0e6d13975cea2b23efea5 Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Thu, 11 Feb 2010 04:13:49 +0000 Subject: [PATCH 464/640] ixgbe: Fix - Do not allow Rx FC on 82598 at 1G due to errata The 82598 has an erratum that receipt of pause frames at 1G could lead to a Tx Hang. To avoid this this patch disables Rx FC while at 1G speed for all 82598 parts. Signed-off-by: Don Skidmore Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_82598.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ixgbe/ixgbe_82598.c b/drivers/net/ixgbe/ixgbe_82598.c index 3103f4165311..35a06b47587b 100644 --- a/drivers/net/ixgbe/ixgbe_82598.c +++ b/drivers/net/ixgbe/ixgbe_82598.c @@ -357,12 +357,34 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num) u32 fctrl_reg; u32 rmcs_reg; u32 reg; + u32 link_speed = 0; + bool link_up; #ifdef CONFIG_DCB if (hw->fc.requested_mode == ixgbe_fc_pfc) goto out; #endif /* CONFIG_DCB */ + /* + * On 82598 having Rx FC on causes resets while doing 1G + * so if it's on turn it off once we know link_speed. For + * more details see 82598 Specification update. + */ + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) { + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + hw->fc.requested_mode = ixgbe_fc_tx_pause; + break; + case ixgbe_fc_rx_pause: + hw->fc.requested_mode = ixgbe_fc_none; + break; + default: + /* no change */ + break; + } + } + /* Negotiate the fc mode to use */ ret_val = ixgbe_fc_autoneg(hw); if (ret_val) From e86bff0edaa514a63ecd80e1ed2b3472b2507880 Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Thu, 11 Feb 2010 04:14:08 +0000 Subject: [PATCH 465/640] ixgbe: fix WOL register setup for 82599 We need to have the WUS register set to all 1's in order for the hardware to be capable of ever waking up. Set it here in the ixgbe_probe(). Signed-off-by: Don Skidmore Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 7b7c8486c0bf..951b73cf5ca2 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -5763,6 +5763,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (err) goto err_sw_init; + /* Make it possible the adapter to be woken up via WOL */ + if (adapter->hw.mac.type == ixgbe_mac_82599EB) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); + /* * If there is a fan on this device and it has failed log the * failure. From f04d5e012d73ea441bd39804ace39fd6d1ce5611 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 2 Feb 2010 14:37:58 -0800 Subject: [PATCH 466/640] thinkpad-acpi: wrong thermal attribute_group removed in thermal_exit() sysfs_remove_group() removed the wrong attribute_group for thermal_read_mode TPEC_8, ACPI_TMP07 and ACPI_UPDT Signed-off-by: Roel Kluin Acked-by: Henrique de Moraes Holschuh Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e67e4feb35cb..eb603f1d55ca 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -5771,7 +5771,7 @@ static void thermal_exit(void) case TPACPI_THERMAL_ACPI_TMP07: case TPACPI_THERMAL_ACPI_UPDT: sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, - &thermal_temp_input16_group); + &thermal_temp_input8_group); break; case TPACPI_THERMAL_NONE: default: From 97c169d39b6846a564dc8d883832e7fef9bdb77d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 16 Feb 2010 03:30:06 -0500 Subject: [PATCH 467/640] ACPI: remove Asus P2B-DS from acpi=ht blacklist We realized when we broke acpi=ht http://bugzilla.kernel.org/show_bug.cgi?id=14886 that acpi=ht is not needed on this box and folks have been using acpi=force on it anyway. Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0acbcdfa5ca4..af1c5833ff23 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1342,14 +1342,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), }, }, - { - .callback = force_acpi_ht, - .ident = "ASUS P2B-DS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), - }, - }, { .callback = force_acpi_ht, .ident = "ASUS CUR-DLS", From 49c6fb2e41d41c4c0c5c753b6960bc81fe658d20 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 1 Feb 2010 10:35:18 -0700 Subject: [PATCH 468/640] ACPI: dock: properly initialize local struct dock_station in dock_add() Commit fe06fba2 (ACPI: dock: add struct dock_station * directly to platform device data) changed dock_add() to use the platform_device_register_data() API. We passed that interface a stack variable, which is kmemdup'ed and assigned to the device's platform_data pointer. Unfortunately, whatever random garbage is in the stack variable gets coped during the kmemdup, and that leads to broken behavior. Explicitly zero out the structure before passing it to the API. This fixes the T41 docking button issue: http://bugzilla.kernel.org/show_bug.cgi?id=15000 Cc: stable@kernel.org Reported-by: Chris Mason Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- drivers/acpi/dock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index bbc2c1315c47..b2586f57e1f5 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -935,6 +935,7 @@ static int dock_add(acpi_handle handle) struct platform_device *dd; id = dock_station_count; + memset(&ds, 0, sizeof(ds)); dd = platform_device_register_data(NULL, "dock", id, &ds, sizeof(ds)); if (IS_ERR(dd)) return PTR_ERR(dd); From 370d5cd88509b93b76eb2f5f97efbd71c25061cb Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 27 Jan 2010 15:25:39 -0800 Subject: [PATCH 469/640] ACPI: fix High cpu temperature with 2.6.32 Since the rewrite of the CPU idle governor in 2.6.32, two laptops have surfaced where the BIOS advertises a C2 power state, but for some reason this state is not functioning (as verified in both cases by powertop before the patch in .32). The old governor had the accidental behavior that if a non-working state was chosen too many times, it would end up falling back to C1. The new governor works differently and this accidental behavior is no longer there; the result is a high temperature on these two machines. This patch adds these 2 machines to the DMI table for C state anomalies; by just not using C2 both these machines are better off (the TSC can be used instead of the pm timer, giving a performance boost for example). Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14742 Signed-off-by: Arjan van de Ven Reported-by: Cc: Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 7c0441f63b39..e88e8ae04fdb 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -110,6 +110,14 @@ static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, (void *)2}, + { set_max_cstate, "Pavilion zv5000", { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME,"Pavilion zv5000 (DS502A#ABA)")}, + (void *)1}, + { set_max_cstate, "Asus L8400B", { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), + DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")}, + (void *)1}, {}, }; From 1379d2fef0ec07c7027a5e89036025ce761470c8 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 16 Feb 2010 04:16:55 -0500 Subject: [PATCH 470/640] ACPI, i915: blacklist Clevo M5x0N bad_lid state Wrong Lid state reported. Need to blacklist this machine for LVDS detection. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- drivers/gpu/drm/i915/intel_lvds.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b1d0acbae4e4..c2e8a45780d5 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -636,6 +636,13 @@ static const struct dmi_system_id bad_lid_status[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PC-81005"), }, }, + { + .ident = "Clevo M5x0N", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "CLEVO Co."), + DMI_MATCH(DMI_BOARD_NAME, "M5x0N"), + }, + }, { } }; From 1252f238db48ec419f40c1bdf30fda649860eed9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 16 Feb 2010 15:02:13 +0100 Subject: [PATCH 471/640] x86: set_personality_ia32() misses force_personality32 05d43ed8a "x86: get rid of the insane TIF_ABI_PENDING bit" forgot about force_personality32. Fix. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- arch/x86/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 41a26a82470a..126f0b493d04 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -527,6 +527,7 @@ void set_personality_ia32(void) /* Make sure to be in 32bit mode */ set_thread_flag(TIF_IA32); + current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; From 11557b24fdec13cb1c3d5f681688401a651ed54e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 16 Feb 2010 15:24:01 +0100 Subject: [PATCH 472/640] x86: ELF_PLAT_INIT() shouldn't worry about TIF_IA32 The 64-bit version of ELF_PLAT_INIT() clears TIF_IA32, but at this point it has already been cleared by SET_PERSONALITY == set_personality_64bit. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- arch/x86/include/asm/elf.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1994d3f58443..f2ad2163109d 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t, } #define ELF_PLAT_INIT(_r, load_addr) \ -do { \ - elf_common_init(¤t->thread, _r, 0); \ - clear_thread_flag(TIF_IA32); \ -} while (0) + elf_common_init(¤t->thread, _r, 0) #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) From 781248c1b50c776a9ef4be1130f84ced1cba42fe Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 16 Feb 2010 18:42:47 +0000 Subject: [PATCH 473/640] dm stripe: avoid divide by zero with invalid stripe count If a table containing zero as stripe count is passed into stripe_ctr the code attempts to divide by zero. This patch changes DM_TABLE_LOAD to return -EINVAL if the stripe count is zero. We now get the following error messages: device-mapper: table: 253:0: striped: Invalid stripe count device-mapper: ioctl: error adding target to table Signed-off-by: Nikanth Karthikesan Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-stripe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e0efc1adcaff..bd58703ee8f6 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -110,7 +110,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } stripes = simple_strtoul(argv[0], &end, 10); - if (*end) { + if (!stripes || *end) { ti->error = "Invalid stripe count"; return -EINVAL; } From 55f67f2dedec1e3049abc30b6d82b999a14cafb7 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 16 Feb 2010 18:42:51 +0000 Subject: [PATCH 474/640] dm snapshot: persistent annotate work_queue as on stack chunk_io() declares its 'struct mdata_req' on the stack and then initializes its 'struct work_struct' member. Annotate the initialization of this workqueue with INIT_WORK_ON_STACK to suppress a debugobjects warning seen when CONFIG_DEBUG_OBJECTS_WORK is enabled. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap-persistent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 7d08879689ac..c097d8a4823d 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -254,7 +254,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, * Issue the synchronous I/O from a different thread * to avoid generic_make_request recursion. */ - INIT_WORK(&req.work, do_metadata); + INIT_WORK_ON_STACK(&req.work, do_metadata); queue_work(ps->metadata_wq, &req.work); flush_workqueue(ps->metadata_wq); From ebfd32bba9b518d684009d9d21a56742337ca1b3 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 16 Feb 2010 18:42:53 +0000 Subject: [PATCH 475/640] dm log: userspace fix overhead_size calcuations This patch fixes two bugs that revolve around the miscalculation and misuse of the variable 'overhead_size'. 'overhead_size' is the size of the various header structures used during communication. The first bug is the use of 'sizeof' with the pointer of a structure instead of the structure itself - resulting in the wrong size being computed. This is then used in a check to see if the payload (data_size) would be to large for the preallocated structure. Since the bug produces a smaller value for the overhead, it was possible for the structure to be breached. (Although the current users of the code do not currently send enough data to trigger this bug.) The second bug is that the 'overhead_size' value is used to compute how much of the preallocated space should be cleared before populating it with fresh data. This should have simply been 'sizeof(struct cn_msg)' not overhead_size. The fact that 'overhead_size' was computed incorrectly made this problem "less bad" - leaving only a pointer's worth of space at the end uncleared. Thus, this bug was never producing a bad result, but still needs to be fixed - especially now that the value is computed correctly. Cc: stable@kernel.org Signed-off-by: Jonathan Brassow --- drivers/md/dm-log-userspace-transfer.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 54abf9e303b7..f1c8cae70b4b 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -172,11 +172,15 @@ int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, { int r = 0; size_t dummy = 0; - int overhead_size = - sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg); + int overhead_size = sizeof(struct dm_ulog_request) + sizeof(struct cn_msg); struct dm_ulog_request *tfr = prealloced_ulog_tfr; struct receiving_pkg pkg; + /* + * Given the space needed to hold the 'struct cn_msg' and + * 'struct dm_ulog_request' - do we have enough payload + * space remaining? + */ if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) { DMINFO("Size of tfr exceeds preallocated size"); return -EINVAL; @@ -191,7 +195,7 @@ resend: */ mutex_lock(&dm_ulog_lock); - memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size); + memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg)); memcpy(tfr->uuid, uuid, DM_UUID_LEN); tfr->luid = luid; tfr->seq = dm_ulog_seq++; From 5528d17de1cf1462f285c40ccaf8e0d0e4c64dc0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 16 Feb 2010 18:42:55 +0000 Subject: [PATCH 476/640] dm raid1: fail writes if errors are not handled and log fails If the mirror log fails when the handle_errors option was not selected and there is no remaining valid mirror leg, writes return success even though they weren't actually written to any device. This patch completes them with EIO instead. This code path is taken: do_writes: bio_list_merge(&ms->failures, &sync); do_failures: if (!get_valid_mirror(ms)) (false) else if (errors_handled(ms)) (false) else bio_endio(bio, 0); The logic in do_failures is based on presuming that the write was already tried: if it succeeded at least on one leg (without handle_errors) it is reported as success. Reference: https://bugzilla.redhat.com/show_bug.cgi?id=555197 Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ad779bd13aec..6c1046df81f6 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -724,7 +724,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) /* * Dispatch io. */ - if (unlikely(ms->log_failure)) { + if (unlikely(ms->log_failure) && errors_handled(ms)) { spin_lock_irq(&ms->lock); bio_list_merge(&ms->failures, &sync); spin_unlock_irq(&ms->lock); From 558569aa9d83e016295bac77d900342908d7fd85 Mon Sep 17 00:00:00 2001 From: Takahiro Yasui Date: Tue, 16 Feb 2010 18:42:58 +0000 Subject: [PATCH 477/640] dm raid1: fix null pointer dereference in suspend When suspending a failed mirror, bios are completed by mirror_end_io() and __rh_lookup() in dm_rh_dec() returns NULL where a non-NULL return value is required by design. Fix this by not changing the state of the recovery failed region from DM_RH_RECOVERING to DM_RH_NOSYNC in dm_rh_recovery_end(). Issue On 2.6.33-rc1 kernel, I hit the bug when I suspended the failed mirror by dmsetup command. BUG: unable to handle kernel NULL pointer dereference at 00000020 IP: [] dm_rh_dec+0x35/0xa1 [dm_region_hash] ... EIP: 0060:[] EFLAGS: 00010046 CPU: 0 EIP is at dm_rh_dec+0x35/0xa1 [dm_region_hash] EAX: 00000286 EBX: 00000000 ECX: 00000286 EDX: 00000000 ESI: eff79eac EDI: eff79e80 EBP: f6915cd4 ESP: f6915cc4 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process dmsetup (pid: 2849, ti=f6914000 task=eff03e80 task.ti=f6914000) ... Call Trace: [] ? mirror_end_io+0x53/0x1b1 [dm_mirror] [] ? clone_endio+0x4d/0xa2 [dm_mod] [] ? mirror_end_io+0x0/0x1b1 [dm_mirror] [] ? clone_endio+0x0/0xa2 [dm_mod] [] ? bio_endio+0x28/0x2b [] ? hold_bio+0x2d/0x62 [dm_mirror] [] ? mirror_presuspend+0xeb/0xf7 [dm_mirror] [] ? vmap_page_range+0xb/0xd [] ? suspend_targets+0x2d/0x3b [dm_mod] [] ? dm_table_presuspend_targets+0xe/0x10 [dm_mod] [] ? dm_suspend+0x4d/0x150 [dm_mod] [] ? dev_suspend+0x55/0x18a [dm_mod] [] ? _copy_from_user+0x42/0x56 [] ? dm_ctl_ioctl+0x22c/0x281 [dm_mod] [] ? dev_suspend+0x0/0x18a [dm_mod] [] ? dm_ctl_ioctl+0x0/0x281 [dm_mod] [] ? vfs_ioctl+0x22/0x85 [] ? do_vfs_ioctl+0x4cb/0x516 [] ? sys_ioctl+0x40/0x5a [] ? sysenter_do_call+0x12/0x28 Analysis When recovery process of a region failed, dm_rh_recovery_end() function changes the state of the region from RM_RH_RECOVERING to DM_RH_NOSYNC. When recovery_complete() is executed between dm_rh_update_states() and dm_writes() in do_mirror(), bios are processed with the region state, DM_RH_NOSYNC. However, the region data is freed without checking its pending count when dm_rh_update_states() is called next time. When bios are finished by mirror_end_io(), __rh_lookup() in dm_rh_dec() returns NULL even though a valid return value are expected. Solution Remove the state change of the recovery failed region from DM_RH_RECOVERING to DM_RH_NOSYNC in dm_rh_recovery_end(). We can remove the state change because: - If the region data has been released by dm_rh_update_states(), a new region data is created with the state of DM_RH_NOSYNC, and bios are processed according to the DM_RH_NOSYNC state. - If the region data has not been released by dm_rh_update_states(), a state of the region is DM_RH_RECOVERING and bios are put in the delayed_bio list. The flag change from DM_RH_RECOVERING to DM_RH_NOSYNC in dm_rh_recovery_end() was added in the following commit: dm raid1: handle resync failures author Jonathan Brassow Thu, 12 Jul 2007 16:29:04 +0000 (17:29 +0100) http://git.kernel.org/linus/f44db678edcc6f4c2779ac43f63f0b9dfa28b724 Signed-off-by: Takahiro Yasui Reviewed-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-region-hash.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 5f19ceb6fe91..168bd38f5006 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -660,10 +660,9 @@ void dm_rh_recovery_end(struct dm_region *reg, int success) spin_lock_irq(&rh->region_lock); if (success) list_add(®->list, ®->rh->recovered_regions); - else { - reg->state = DM_RH_NOSYNC; + else list_add(®->list, ®->rh->failed_recovered_regions); - } + spin_unlock_irq(&rh->region_lock); rh->wakeup_workers(rh->context); From 9eef87da2a8ea4920e0d913ff977cac064b68ee0 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Tue, 16 Feb 2010 18:43:01 +0000 Subject: [PATCH 478/640] dm mpath: fix stall when requeueing io This patch fixes the problem that system may stall if target's ->map_rq returns DM_MAPIO_REQUEUE in map_request(). E.g. stall happens on 1 CPU box when a dm-mpath device with queue_if_no_path bounces between all-paths-down and paths-up on I/O load. When target's ->map_rq returns DM_MAPIO_REQUEUE, map_request() requeues the request and returns to dm_request_fn(). Then, dm_request_fn() doesn't exit the I/O dispatching loop and continues processing the requeued request again. This map and requeue loop can be done with interrupt disabled, so 1 CPU system can be stalled if this situation happens. For example, commands below can stall my 1 CPU box within 1 minute or so: # dmsetup table mp mp: 0 2097152 multipath 1 queue_if_no_path 0 1 1 service-time 0 1 2 8:144 1 1 # while true; do dd if=/dev/mapper/mp of=/dev/null bs=1M count=100; done & # while true; do \ > dmsetup message mp 0 "fail_path 8:144" \ > dmsetup suspend --noflush mp \ > dmsetup resume mp \ > dmsetup message mp 0 "reinstate_path 8:144" \ > done To fix the problem above, this patch changes dm_request_fn() to exit the I/O dispatching loop once if a request is requeued in map_request(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3167480b532c..aa4e2aa86d49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1595,10 +1595,15 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) return BLKPREP_OK; } -static void map_request(struct dm_target *ti, struct request *clone, - struct mapped_device *md) +/* + * Returns: + * 0 : the request has been processed (not requeued) + * !0 : the request has been requeued + */ +static int map_request(struct dm_target *ti, struct request *clone, + struct mapped_device *md) { - int r; + int r, requeued = 0; struct dm_rq_target_io *tio = clone->end_io_data; /* @@ -1625,6 +1630,7 @@ static void map_request(struct dm_target *ti, struct request *clone, case DM_MAPIO_REQUEUE: /* The target wants to requeue the I/O */ dm_requeue_unmapped_request(clone); + requeued = 1; break; default: if (r > 0) { @@ -1636,6 +1642,8 @@ static void map_request(struct dm_target *ti, struct request *clone, dm_kill_unmapped_request(clone, r); break; } + + return requeued; } /* @@ -1677,12 +1685,17 @@ static void dm_request_fn(struct request_queue *q) atomic_inc(&md->pending[rq_data_dir(clone)]); spin_unlock(q->queue_lock); - map_request(ti, clone, md); + if (map_request(ti, clone, md)) + goto requeued; + spin_lock_irq(q->queue_lock); } goto out; +requeued: + spin_lock_irq(q->queue_lock); + plug_and_out: if (!elv_queue_empty(q)) /* Some requests still remain, retry later */ From 9307f6b19ac4f5887552b5b2992f391b866f7633 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 16 Feb 2010 18:43:04 +0000 Subject: [PATCH 479/640] dm: sysfs revert add empty release function to avoid debug warning Revert commit d2bb7df8cac647b92f51fb84ae735771e7adbfa7 at Greg's request. Author: Milan Broz Date: Thu Dec 10 23:51:53 2009 +0000 dm: sysfs add empty release function to avoid debug warning This patch just removes an unnecessary warning: kobject: 'dm': does not have a release() function, it is broken and must be fixed. The kobject is embedded in mapped device struct, so code does not need to release memory explicitly here. Cc: Greg KH Signed-off-by: Alasdair G Kergon --- drivers/md/dm-sysfs.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c index f53392df7b97..f91b40942e07 100644 --- a/drivers/md/dm-sysfs.c +++ b/drivers/md/dm-sysfs.c @@ -79,13 +79,6 @@ static struct sysfs_ops dm_sysfs_ops = { .show = dm_attr_show, }; -/* - * The sysfs structure is embedded in md struct, nothing to do here - */ -static void dm_sysfs_release(struct kobject *kobj) -{ -} - /* * dm kobject is embedded in mapped_device structure * no need to define release function here @@ -93,7 +86,6 @@ static void dm_sysfs_release(struct kobject *kobj) static struct kobj_type dm_ktype = { .sysfs_ops = &dm_sysfs_ops, .default_attrs = dm_attrs, - .release = dm_sysfs_release }; /* From 1cab819b5e244e1b853c7b440981e6a960da3bfb Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 11 Feb 2010 13:48:29 +0000 Subject: [PATCH 480/640] ethtool: allow non-admin user to read GRO settings. Looks like an oversight in GRO design. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d8aee584e8d1..236a9988ea91 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -927,6 +927,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: case ETHTOOL_GGSO: + case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: case ETHTOOL_GRXFH: From 10e7454ed7a2da39f1f6255f63d7df27ab4bb67f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Feb 2010 19:24:30 +0000 Subject: [PATCH 481/640] ipcomp: Avoid duplicate calls to ipcomp_destroy When ipcomp_tunnel_attach fails we will call ipcomp_destroy twice. This may lead to double-frees on certain structures. As there is no reason to explicitly call ipcomp_destroy, this patch removes it from ipcomp*.c and lets the standard xfrm_state destruction take place. This is based on the discovery and patch by Alexey Dobriyan. Tested-by: Alexey Dobriyan Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ipcomp.c | 6 +----- net/ipv6/ipcomp6.c | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 38fbf04150ae..544ce0876f12 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -124,16 +124,12 @@ static int ipcomp4_init_state(struct xfrm_state *x) if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) - goto error_tunnel; + goto out; } err = 0; out: return err; - -error_tunnel: - ipcomp_destroy(x); - goto out; } static const struct xfrm_type ipcomp_type = { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 2f2a5ca2c878..002e6eef9120 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -154,16 +154,12 @@ static int ipcomp6_init_state(struct xfrm_state *x) if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) - goto error_tunnel; + goto out; } err = 0; out: return err; -error_tunnel: - ipcomp_destroy(x); - - goto out; } static const struct xfrm_type ipcomp6_type = From 553f9118abc4fc53674fff87f6fe5fa3f56a41ed Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Feb 2010 20:00:51 +0000 Subject: [PATCH 482/640] xfrm: Fix xfrm_state_clone leak xfrm_state_clone calls kfree instead of xfrm_state_put to free a failed state. Depending on the state of the failed state, it can cause leaks to things like module references. All states should be freed by xfrm_state_put past the point of xfrm_init_state. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index b36cc344474b..f445ea1c5f52 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1102,7 +1102,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) int err = -ENOMEM; struct xfrm_state *x = xfrm_state_alloc(net); if (!x) - goto error; + goto out; memcpy(&x->id, &orig->id, sizeof(x->id)); memcpy(&x->sel, &orig->sel, sizeof(x->sel)); @@ -1160,16 +1160,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) return x; error: + xfrm_state_put(x); +out: if (errp) *errp = err; - if (x) { - kfree(x->aalg); - kfree(x->ealg); - kfree(x->calg); - kfree(x->encap); - kfree(x->coaddr); - } - kfree(x); return NULL; } From 07793d33b4fba00f5bd1dac78fa038bb0e23fa5c Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Tue, 16 Feb 2010 00:18:46 +0000 Subject: [PATCH 483/640] be2net: set proper value to version field in req hdr Before sending a command to the ASIC, set version properly. This is necessary for the ARM firmware to send correct data to the driver. This also fixes a bug in certain skews of the ASIC where the statistics are misreported. Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/benet/be_cmds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c index fee6eee7ae5b..006cb2efcd22 100644 --- a/drivers/net/benet/be_cmds.c +++ b/drivers/net/benet/be_cmds.c @@ -296,6 +296,7 @@ static void be_cmd_hdr_prepare(struct be_cmd_req_hdr *req_hdr, req_hdr->opcode = opcode; req_hdr->subsystem = subsystem; req_hdr->request_length = cpu_to_le32(cmd_len - sizeof(*req_hdr)); + req_hdr->version = 0; } static void be_cmd_page_addrs_prepare(struct phys_addr *pages, u32 max_pages, From d4a4683ca054ed9917dfc9e3ff0f7ecf74ad90d6 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Mon, 15 Feb 2010 09:37:46 -0800 Subject: [PATCH 484/640] USB: usbfs: only copy the actual data received We need to only copy the data received by the device to userspace, not the whole kernel buffer, which can contain "stale" data. Thanks to Marcus Meissner for pointing this out and testing the fix. Reported-by: Marcus Meissner Tested-by: Marcus Meissner Cc: Alan Stern Cc: Linus Torvalds Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 6e8bcdfd23b4..ca948bbc388f 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1312,9 +1312,9 @@ static int processcompl(struct async *as, void __user * __user *arg) void __user *addr = as->userurb; unsigned int i; - if (as->userbuffer) + if (as->userbuffer && urb->actual_length) if (copy_to_user(as->userbuffer, urb->transfer_buffer, - urb->transfer_buffer_length)) + urb->actual_length)) goto err_out; if (put_user(as->status, &userurb->status)) goto err_out; @@ -1475,9 +1475,9 @@ static int processcompl_compat(struct async *as, void __user * __user *arg) void __user *addr = as->userurb; unsigned int i; - if (as->userbuffer) + if (as->userbuffer && urb->actual_length) if (copy_to_user(as->userbuffer, urb->transfer_buffer, - urb->transfer_buffer_length)) + urb->actual_length)) return -EFAULT; if (put_user(as->status, &userurb->status)) return -EFAULT; From ddeee0b2eec2a51b0712b04de4b39e7bec892a53 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 16 Feb 2010 12:35:07 -0800 Subject: [PATCH 485/640] USB: usbfs: properly clean up the as structure on error paths I notice that the processcompl_compat() function seems to be leaking the 'struct async *as' in the error paths. I think that the calling convention is fundamentally buggered. The caller is the one that did the "reap_as()" to get the as thing, the caller should be the one to free it too. Freeing it in the caller also means that it very clearly always gets freed, and avoids the need for any "free in the error case too". From: Linus Torvalds Cc: Alan Stern Cc: Marcus Meissner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ca948bbc388f..a678186f218f 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1334,14 +1334,11 @@ static int processcompl(struct async *as, void __user * __user *arg) } } - free_async(as); - if (put_user(addr, (void __user * __user *)arg)) return -EFAULT; return 0; err_out: - free_async(as); return -EFAULT; } @@ -1371,8 +1368,11 @@ static struct async *reap_as(struct dev_state *ps) static int proc_reapurb(struct dev_state *ps, void __user *arg) { struct async *as = reap_as(ps); - if (as) - return processcompl(as, (void __user * __user *)arg); + if (as) { + int retval = processcompl(as, (void __user * __user *)arg); + free_async(as); + return retval; + } if (signal_pending(current)) return -EINTR; return -EIO; @@ -1380,11 +1380,16 @@ static int proc_reapurb(struct dev_state *ps, void __user *arg) static int proc_reapurbnonblock(struct dev_state *ps, void __user *arg) { + int retval; struct async *as; - if (!(as = async_getcompleted(ps))) - return -EAGAIN; - return processcompl(as, (void __user * __user *)arg); + as = async_getcompleted(ps); + retval = -EAGAIN; + if (as) { + retval = processcompl(as, (void __user * __user *)arg); + free_async(as); + } + return retval; } #ifdef CONFIG_COMPAT @@ -1497,7 +1502,6 @@ static int processcompl_compat(struct async *as, void __user * __user *arg) } } - free_async(as); if (put_user(ptr_to_compat(addr), (u32 __user *)arg)) return -EFAULT; return 0; @@ -1506,8 +1510,11 @@ static int processcompl_compat(struct async *as, void __user * __user *arg) static int proc_reapurb_compat(struct dev_state *ps, void __user *arg) { struct async *as = reap_as(ps); - if (as) - return processcompl_compat(as, (void __user * __user *)arg); + if (as) { + int retval = processcompl_compat(as, (void __user * __user *)arg); + free_async(as); + return retval; + } if (signal_pending(current)) return -EINTR; return -EIO; @@ -1515,11 +1522,16 @@ static int proc_reapurb_compat(struct dev_state *ps, void __user *arg) static int proc_reapurbnonblock_compat(struct dev_state *ps, void __user *arg) { + int retval; struct async *as; - if (!(as = async_getcompleted(ps))) - return -EAGAIN; - return processcompl_compat(as, (void __user * __user *)arg); + retval = -EAGAIN; + as = async_getcompleted(ps); + if (as) { + retval = processcompl_compat(as, (void __user * __user *)arg); + free_async(as); + } + return retval; } From a7787e508acb4378d62f4584bae3dd1cd0ba3eac Mon Sep 17 00:00:00 2001 From: Radek Liboska Date: Wed, 27 Jan 2010 15:38:34 +0100 Subject: [PATCH 486/640] USB: ftdi_sio: new device id for papouch AD4USB added new device pid (PAPOUCH_AD4USB_PID) to ftdi_sio.h and ftdi_sio.c AD4USB measuring converter is a 4-input A/D converter which enables the user to measure to four current inputs ranging from 0(4) to 20 mA or voltage between 0 and 10 V. The measured values are then transferred to a superior system in digital form. The AD4USB communicates via USB. Powered is also via USB. datasheet in english is here: http://www.papouch.com/shop/scripts/pdf/ad4usb_en.pdf Signed-off-by: Radek Liboska --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 216f187582ab..f6d85660a446 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -697,6 +697,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) }, { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO4x4_PID) }, + { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AD4USB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DGQG_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DUSB_PID) }, { USB_DEVICE(ALTI2_VID, ALTI2_N3_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index da92b4952ffb..2478130672ef 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -968,6 +968,7 @@ #define PAPOUCH_VID 0x5050 /* Vendor ID */ #define PAPOUCH_TMU_PID 0x0400 /* TMU USB Thermometer */ #define PAPOUCH_QUIDO4x4_PID 0x0900 /* Quido 4/4 Module */ +#define PAPOUCH_AD4USB_PID 0x8003 /* AD4USB Measurement Module */ /* * Marvell SheevaPlug From 39232b3d8046eace9985fd898b763c585f989099 Mon Sep 17 00:00:00 2001 From: Phil Dibowitz Date: Sat, 16 Jan 2010 19:52:17 +0100 Subject: [PATCH 487/640] USB: storage: Remove unneeded SC/PR from unusual_devs.h This patch removes the subclass and protocol entries from a Microtech entry in unusual_devs.h. This was reported by . Greg, please apply. Signed-off-by: Phil Dibowitz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index c932f9053188..49575fba3756 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -941,7 +941,7 @@ UNUSUAL_DEV( 0x07ab, 0xfccd, 0x0000, 0x9999, UNUSUAL_DEV( 0x07af, 0x0004, 0x0100, 0x0133, "Microtech", "USB-SCSI-DB25", - US_SC_SCSI, US_PR_BULK, usb_stor_euscsi_init, + US_SC_DEVICE, US_PR_DEVICE, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), UNUSUAL_DEV( 0x07af, 0x0005, 0x0100, 0x0100, From 65e1ec6751b3eefee6d94161185e78736366126f Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Sun, 17 Jan 2010 11:45:38 +0100 Subject: [PATCH 488/640] USB: ftdi_sio: add device IDs (several ELV, one Mindstorms NXT) - add FTDI device IDs for several ELV devices and NXTCam of Lego Mindstorms NXT - add hopefully helpful new_id comment - remove less helpful "Due to many user requests for multiple ELV devices we enable them by default." comment (we simply add _all_ known devices - an enduser shouldn't have to fiddle with obscure module parameters...). - add myself to DRIVER_AUTHOR The missing NXTCam ID has been found at http://www.unixboard.de/vb3/showthread.php?t=44155 , ELV devices taken from ELV Windows .inf file. Signed-off-by: Andreas Mohr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 24 +++++++++++++++++++++--- drivers/usb/serial/ftdi_sio_ids.h | 17 +++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index f6d85660a446..7638828e7317 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -50,7 +50,7 @@ * Version Information */ #define DRIVER_VERSION "v1.5.0" -#define DRIVER_AUTHOR "Greg Kroah-Hartman , Bill Ryder , Kuba Ober " +#define DRIVER_AUTHOR "Greg Kroah-Hartman , Bill Ryder , Kuba Ober , Andreas Mohr" #define DRIVER_DESC "USB FTDI Serial Converters Driver" static int debug; @@ -145,10 +145,15 @@ static struct ftdi_sio_quirk ftdi_HE_TIRA1_quirk = { +/* + * Device ID not listed? Test via module params product/vendor or + * /sys/bus/usb/ftdi_sio/new_id, then send patch/report! + */ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_NXTCAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_2_PID) }, @@ -552,9 +557,16 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_IBS_PEDO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PROD_PID) }, /* - * Due to many user requests for multiple ELV devices we enable - * them by default. + * ELV devices: */ + { USB_DEVICE(FTDI_VID, FTDI_ELV_USR_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_MSM1_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_KL100_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_WS550_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_EC3000_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_WS888_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_TWS550_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_FEM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_CLI7000_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_PPS7330_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_TFM100_PID) }, @@ -571,11 +583,17 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_ELV_PCK100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_RFP500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FS20SIG_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_UTP8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS300PC_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_WS444PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1300PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_EM1010PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_HS485_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_UMS100_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) }, { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) }, { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 2478130672ef..c8951aeed983 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -38,6 +38,8 @@ /* www.candapter.com Ewert Energy Systems CANdapter device */ #define FTDI_CANDAPTER_PID 0x9F80 /* Product Id */ +#define FTDI_NXTCAM_PID 0xABB8 /* NXTCam for Mindstorms NXT */ + /* OOCDlink by Joern Kaipf * (http://www.joernonline.de/dw/doku.php?id=start&idx=projects:oocdlink) */ #define FTDI_OOCDLINK_PID 0xbaf8 /* Amontec JTAGkey */ @@ -161,22 +163,37 @@ /* * ELV USB devices submitted by Christian Abt of ELV (www.elv.de). * All of these devices use FTDI's vendor ID (0x0403). + * Further IDs taken from ELV Windows .inf file. * * The previously included PID for the UO 100 module was incorrect. * In fact, that PID was for ELV's UR 100 USB-RS232 converter (0xFB58). * * Armin Laeuger originally sent the PID for the UM 100 module. */ +#define FTDI_ELV_USR_PID 0xE000 /* ELV Universal-Sound-Recorder */ +#define FTDI_ELV_MSM1_PID 0xE001 /* ELV Mini-Sound-Modul */ +#define FTDI_ELV_KL100_PID 0xE002 /* ELV Kfz-Leistungsmesser KL 100 */ +#define FTDI_ELV_WS550_PID 0xE004 /* WS 550 */ +#define FTDI_ELV_EC3000_PID 0xE006 /* ENERGY CONTROL 3000 USB */ +#define FTDI_ELV_WS888_PID 0xE008 /* WS 888 */ +#define FTDI_ELV_TWS550_PID 0xE009 /* Technoline WS 550 */ +#define FTDI_ELV_FEM_PID 0xE00A /* Funk Energie Monitor */ #define FTDI_ELV_FHZ1300PC_PID 0xE0E8 /* FHZ 1300 PC */ #define FTDI_ELV_WS500_PID 0xE0E9 /* PC-Wetterstation (WS 500) */ #define FTDI_ELV_HS485_PID 0xE0EA /* USB to RS-485 adapter */ +#define FTDI_ELV_UMS100_PID 0xE0EB /* ELV USB Master-Slave Schaltsteckdose UMS 100 */ +#define FTDI_ELV_TFD128_PID 0xE0EC /* ELV Temperatur-Feuchte-Datenlogger TFD 128 */ +#define FTDI_ELV_FM3RX_PID 0xE0ED /* ELV Messwertuebertragung FM3 RX */ +#define FTDI_ELV_WS777_PID 0xE0EE /* Conrad WS 777 */ #define FTDI_ELV_EM1010PC_PID 0xE0EF /* Engery monitor EM 1010 PC */ #define FTDI_ELV_CSI8_PID 0xE0F0 /* Computer-Schalt-Interface (CSI 8) */ #define FTDI_ELV_EM1000DL_PID 0xE0F1 /* PC-Datenlogger fuer Energiemonitor (EM 1000 DL) */ #define FTDI_ELV_PCK100_PID 0xE0F2 /* PC-Kabeltester (PCK 100) */ #define FTDI_ELV_RFP500_PID 0xE0F3 /* HF-Leistungsmesser (RFP 500) */ #define FTDI_ELV_FS20SIG_PID 0xE0F4 /* Signalgeber (FS 20 SIG) */ +#define FTDI_ELV_UTP8_PID 0xE0F5 /* ELV UTP 8 */ #define FTDI_ELV_WS300PC_PID 0xE0F6 /* PC-Wetterstation (WS 300 PC) */ +#define FTDI_ELV_WS444PC_PID 0xE0F7 /* Conrad WS 444 PC */ #define FTDI_PHI_FISCO_PID 0xE40B /* PHI Fisco USB to Serial cable */ #define FTDI_ELV_UAD8_PID 0xF068 /* USB-AD-Wandler (UAD 8) */ #define FTDI_ELV_UDA7_PID 0xF069 /* USB-DA-Wandler (UDA 7) */ From ae3a07924f0a31b96d52bf16bdf1713445a5a414 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 18 Jan 2010 12:03:18 +0000 Subject: [PATCH 489/640] USB: r8a66597-udc: Prototype IS_ERR() and PTR_ERR() The build of r8a66597-udc was failing on ARM since IS_ERR() and PTR_ERR() weren't protyped. Presumably err.h is being pulled in by another header on other platforms. Signed-off-by: Mark Brown Acked-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/r8a66597-udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c index e220fb8091a3..8b45145b9136 100644 --- a/drivers/usb/gadget/r8a66597-udc.c +++ b/drivers/usb/gadget/r8a66597-udc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include From 6feb63b69f4f6e876ea5a2edc6119b8e7ac90102 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 18 Jan 2010 13:18:34 +0000 Subject: [PATCH 490/640] USB: s3c-hsotg: Export usb_gadget_register_driver() USB gadget controller drivers normally export their driver registration function, allowing modular builds of the individual gadget drivers so do so for s3c-hsotg, fixing builds. Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/s3c-hsotg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c index 4b5dbd0127f5..5fc80a104150 100644 --- a/drivers/usb/gadget/s3c-hsotg.c +++ b/drivers/usb/gadget/s3c-hsotg.c @@ -2582,6 +2582,7 @@ err: hsotg->gadget.dev.driver = NULL; return ret; } +EXPORT_SYMBOL(usb_gadget_register_driver); int usb_gadget_unregister_driver(struct usb_gadget_driver *driver) { From b9df794258de24d10b0616634d4c30d8b6e9d381 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Tue, 19 Jan 2010 16:31:31 +0800 Subject: [PATCH 491/640] USB: ehci: phy low power mode bug fixing 1. There are two msleep calls inside two spin lock sections, need to unlock and lock again after msleep. 2. Save a extra status reg setting. Signed-off-by: Alek Du Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index c75d9270c752..19372673bf09 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -196,7 +196,9 @@ static int ehci_bus_suspend (struct usb_hcd *hcd) if (hostpc_reg) { u32 t3; + spin_unlock_irq(&ehci->lock); msleep(5);/* 5ms for HCD enter low pwr mode */ + spin_lock_irq(&ehci->lock); t3 = ehci_readl(ehci, hostpc_reg); ehci_writel(ehci, t3 | HOSTPC_PHCD, hostpc_reg); t3 = ehci_readl(ehci, hostpc_reg); @@ -904,17 +906,18 @@ static int ehci_hub_control ( if ((temp & PORT_PE) == 0 || (temp & PORT_RESET) != 0) goto error; - ehci_writel(ehci, temp | PORT_SUSPEND, status_reg); + /* After above check the port must be connected. * Set appropriate bit thus could put phy into low power * mode if we have hostpc feature */ + temp &= ~PORT_WKCONN_E; + temp |= PORT_WKDISC_E | PORT_WKOC_E; + ehci_writel(ehci, temp | PORT_SUSPEND, status_reg); if (hostpc_reg) { - temp &= ~PORT_WKCONN_E; - temp |= (PORT_WKDISC_E | PORT_WKOC_E); - ehci_writel(ehci, temp | PORT_SUSPEND, - status_reg); + spin_unlock_irqrestore(&ehci->lock, flags); msleep(5);/* 5ms for HCD enter low pwr mode */ + spin_lock_irqsave(&ehci->lock, flags); temp1 = ehci_readl(ehci, hostpc_reg); ehci_writel(ehci, temp1 | HOSTPC_PHCD, hostpc_reg); From bbcb8bbad52b8795912e8f02c2b319092b96078e Mon Sep 17 00:00:00 2001 From: Tanaka Akira Date: Thu, 21 Jan 2010 02:31:09 +0900 Subject: [PATCH 492/640] USB: SIS USB2VGA DRIVER: support KAIREN's USB VGA adaptor USB20SVGA-MB-PLUS This patch adds the USB product ID of KAIREN's USB VGA Adaptor, USB20SVGA-MB-PLUS, to sisusbvga work with it. Signed-off-by: Tanaka Akira Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/sisusbvga/sisusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index 0025847743f3..8b37a4b9839e 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -3245,6 +3245,7 @@ static struct usb_device_id sisusb_table [] = { { USB_DEVICE(0x0711, 0x0902) }, { USB_DEVICE(0x0711, 0x0903) }, { USB_DEVICE(0x0711, 0x0918) }, + { USB_DEVICE(0x0711, 0x0920) }, { USB_DEVICE(0x182d, 0x021c) }, { USB_DEVICE(0x182d, 0x0269) }, { } From 1ebca9dad5abe8b2ed4dbd186cd657fb47c1f321 Mon Sep 17 00:00:00 2001 From: Richard Farina Date: Wed, 20 Jan 2010 16:42:33 -0500 Subject: [PATCH 493/640] USB: serial: add usbid for dell wwan card to sierra.c This patch adds support for Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port. I stole the name from lsusb, but my card does not have a GPS on it (at least not that I can make function). I'm sure the patch is whitespace damaged but the one line addition should be fairly straightforward nonetheless. Tested-by: Rick Farina Signed-off-by: Rick Farina Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/sierra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index ac1b6449fb6a..3eb6143bb646 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -298,6 +298,7 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, + { USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */ { } }; From 1a02d59aba9b61b820517fb135086471c065b573 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Jan 2010 17:09:34 +0300 Subject: [PATCH 494/640] kfifo: Make kfifo_initialized work after kfifo_free After kfifo rework it's no longer possible to reliably know if kfifo is usable, since after kfifo_free(), kfifo_initialized() would still return true. The correct behaviour is needed for at least FHCI USB driver. This patch fixes the issue by resetting the kfifo to zero values (the same approach is used in kfifo_alloc() if allocation failed). Signed-off-by: Anton Vorontsov Acked-by: Stefani Seibold Signed-off-by: Greg Kroah-Hartman --- kernel/kfifo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 498cabba225e..559fb5582b60 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc); void kfifo_free(struct kfifo *fifo) { kfree(fifo->buffer); + _kfifo_init(fifo, NULL, 0); } EXPORT_SYMBOL(kfifo_free); From 4c743d0ae60462e91465483dd87f4458d71af550 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Jan 2010 17:09:36 +0300 Subject: [PATCH 495/640] USB: FHCI: Fix build after kfifo rework After kfifo rework FHCI fails to build: CC drivers/usb/host/fhci-tds.o drivers/usb/host/fhci-tds.c: In function 'fhci_ep0_free': drivers/usb/host/fhci-tds.c:108: error: used struct type value where scalar is required drivers/usb/host/fhci-tds.c:118: error: used struct type value where scalar is required drivers/usb/host/fhci-tds.c:128: error: used struct type value where scalar is required This is because kfifos are no longer pointers in the ep struct. So, instead of checking the pointers, we should now check if kfifo is initialized. Reported-by: Josh Boyer Signed-off-by: Anton Vorontsov Acked-by: Stefani Seibold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/fhci-tds.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/fhci-tds.c b/drivers/usb/host/fhci-tds.c index d224ab467a40..e1232890c78b 100644 --- a/drivers/usb/host/fhci-tds.c +++ b/drivers/usb/host/fhci-tds.c @@ -105,7 +105,7 @@ void fhci_ep0_free(struct fhci_usb *usb) if (ep->td_base) cpm_muram_free(cpm_muram_offset(ep->td_base)); - if (ep->conf_frame_Q) { + if (kfifo_initialized(&ep->conf_frame_Q)) { size = cq_howmany(&ep->conf_frame_Q); for (; size; size--) { struct packet *pkt = cq_get(&ep->conf_frame_Q); @@ -115,7 +115,7 @@ void fhci_ep0_free(struct fhci_usb *usb) cq_delete(&ep->conf_frame_Q); } - if (ep->empty_frame_Q) { + if (kfifo_initialized(&ep->empty_frame_Q)) { size = cq_howmany(&ep->empty_frame_Q); for (; size; size--) { struct packet *pkt = cq_get(&ep->empty_frame_Q); @@ -125,7 +125,7 @@ void fhci_ep0_free(struct fhci_usb *usb) cq_delete(&ep->empty_frame_Q); } - if (ep->dummy_packets_Q) { + if (kfifo_initialized(&ep->dummy_packets_Q)) { size = cq_howmany(&ep->dummy_packets_Q); for (; size; size--) { u8 *buff = cq_get(&ep->dummy_packets_Q); From 5a5e0f4c7038168e38d1db6af09d1ac715ee9888 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Jan 2010 17:09:38 +0300 Subject: [PATCH 496/640] kfifo: Don't use integer as NULL pointer This patch fixes following sparse warnings: include/linux/kfifo.h:127:25: warning: Using plain integer as NULL pointer kernel/kfifo.c:83:21: warning: Using plain integer as NULL pointer Signed-off-by: Anton Vorontsov Acked-by: Stefani Seibold Signed-off-by: Greg Kroah-Hartman --- include/linux/kfifo.h | 2 +- kernel/kfifo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 6f6c5f300af6..bc0fc795bd35 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -124,7 +124,7 @@ extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo, */ static inline bool kfifo_initialized(struct kfifo *fifo) { - return fifo->buffer != 0; + return fifo->buffer != NULL; } /** diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 559fb5582b60..35edbe22e9a9 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask) buffer = kmalloc(size, gfp_mask); if (!buffer) { - _kfifo_init(fifo, 0, 0); + _kfifo_init(fifo, NULL, 0); return -ENOMEM; } From dbe4a99d846e565f0f99914cc82658cd9ce60bdc Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 22 Jan 2010 15:18:21 +0100 Subject: [PATCH 497/640] USB: g_multi: fix CONFIG_USB_G_MULTI_RNDIS usage g_multi used CONFIG_USB_ETH_RNDIS to check if RNDIS option was requested where it should check for CONFIG_USB_G_MULTI_RNDIS. As a result, RNDIS was never present in g_multi regardless of configuration. This fixes changes made in commit 396cda90d228d0851f3d64c7c85a1ecf6b8ae1e8. Signed-off-by: Michal Nazarewicz Cc: Marek Szyprowski Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/multi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/multi.c b/drivers/usb/gadget/multi.c index 429560100b10..76496f5d272c 100644 --- a/drivers/usb/gadget/multi.c +++ b/drivers/usb/gadget/multi.c @@ -29,7 +29,7 @@ #if defined USB_ETH_RNDIS # undef USB_ETH_RNDIS #endif -#ifdef CONFIG_USB_ETH_RNDIS +#ifdef CONFIG_USB_G_MULTI_RNDIS # define USB_ETH_RNDIS y #endif From dd091c7b3280d4811b855d034fa91519fd3485ef Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Wed, 20 Jan 2010 20:06:31 +0100 Subject: [PATCH 498/640] USB: otg Kconfig: let USB_OTG_UTILS select USB_ULPI option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CONFIG_USB_ULPI=y, CONFIG_USB<=m, CONFIG_PCI=n and CONFIG_USB_OTG_UTILS=n, which is the default used for mx31moboard, the build for all mx3 platforms fails because drivers/usb/otg/ulpi.c where otg_ulpi_create is defined is not compiled. Build error: arch/arm/mach-mx3/built-in.o: In function `mxc_board_init': kzmarm11.c:(.init.text+0x73c): undefined reference to `otg_ulpi_create' kzmarm11.c:(.init.text+0x1020): undefined reference to `otg_ulpi_create' This isn't a strong dependency as drivers/usb/otg/ulpi.c doesn't use functions defined in drivers/usb/otg/otg.o and is only needed to get ulpi.o linked into the kernel image. Signed-off-by: Valentin Longchamp Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/usb/otg/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig index de56b3d743d7..3d2d3e549bd1 100644 --- a/drivers/usb/otg/Kconfig +++ b/drivers/usb/otg/Kconfig @@ -44,6 +44,7 @@ config ISP1301_OMAP config USB_ULPI bool "Generic ULPI Transceiver Driver" depends on ARM + select USB_OTG_UTILS help Enable this to support ULPI connected USB OTG transceivers which are likely found on embedded boards. From 31e5d4abceaa3d11ff583ddf76ec292e90eacb7d Mon Sep 17 00:00:00 2001 From: Brian Niebuhr Date: Mon, 25 Jan 2010 14:45:40 -0600 Subject: [PATCH 499/640] USB: gadget: fix EEM gadget CRC usage eem_wrap() is sending a sentinel CRC, but it didn't indicate that to the host, it should zero bit 14 (bmCRC) in the EEM packet header, instead of setting it. Also remove a redundant crc calculation in eem_unwrap(). Signed-off-by: Steve Longerbeam Acked-by: Brian Niebuhr Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/f_eem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c index 0a577d5694fd..d4f0db58a8ad 100644 --- a/drivers/usb/gadget/f_eem.c +++ b/drivers/usb/gadget/f_eem.c @@ -358,7 +358,7 @@ done: * b15: bmType (0 == data) */ len = skb->len; - put_unaligned_le16((len & 0x3FFF) | BIT(14), skb_push(skb, 2)); + put_unaligned_le16(len & 0x3FFF, skb_push(skb, 2)); /* add a zero-length EEM packet, if needed */ if (padlen) @@ -464,7 +464,6 @@ static int eem_unwrap(struct gether *port, } /* validate CRC */ - crc = get_unaligned_le32(skb->data + len - ETH_FCS_LEN); if (header & BIT(14)) { crc = get_unaligned_le32(skb->data + len - ETH_FCS_LEN); From 7c0ff870d1ed287504a61ed865f3d728c757436b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Feb 2010 23:13:24 -0800 Subject: [PATCH 500/640] sysfs: sysfs_sd_setattr set iattrs unconditionally There is currently a bug in sysfs_sd_setattr inherited from sysfs_setattr in 2.6.32 where the first time we set the attributes on a sysfs file we allocate backing store but do not set the backing store attributes. Resulting in overly restrictive permissions on sysfs files. The fix is to simply modify the code so that it always executes when we update the sysfs attributes, as we did in 2.6.31 and earlier. Signed-off-by: Eric W. Biederman Tested-by: Jean Delvare Cc: stable Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/inode.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 220b758523ae..6a06a1d1ea7b 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -81,24 +81,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr) if (!sd_attrs) return -ENOMEM; sd->s_iattr = sd_attrs; - } else { - /* attributes were changed at least once in past */ - iattrs = &sd_attrs->ia_iattr; + } + /* attributes were changed at least once in past */ + iattrs = &sd_attrs->ia_iattr; - if (ia_valid & ATTR_UID) - iattrs->ia_uid = iattr->ia_uid; - if (ia_valid & ATTR_GID) - iattrs->ia_gid = iattr->ia_gid; - if (ia_valid & ATTR_ATIME) - iattrs->ia_atime = iattr->ia_atime; - if (ia_valid & ATTR_MTIME) - iattrs->ia_mtime = iattr->ia_mtime; - if (ia_valid & ATTR_CTIME) - iattrs->ia_ctime = iattr->ia_ctime; - if (ia_valid & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - iattrs->ia_mode = sd->s_mode = mode; - } + if (ia_valid & ATTR_UID) + iattrs->ia_uid = iattr->ia_uid; + if (ia_valid & ATTR_GID) + iattrs->ia_gid = iattr->ia_gid; + if (ia_valid & ATTR_ATIME) + iattrs->ia_atime = iattr->ia_atime; + if (ia_valid & ATTR_MTIME) + iattrs->ia_mtime = iattr->ia_mtime; + if (ia_valid & ATTR_CTIME) + iattrs->ia_ctime = iattr->ia_ctime; + if (ia_valid & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + iattrs->ia_mode = sd->s_mode = mode; } return 0; } From 18d19c96457d172d913510c083bc7411ed40cb10 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 10 Feb 2010 13:32:49 +0100 Subject: [PATCH 501/640] class: Free the class private data in class_release Fix a memory leak by freeing the memory allocated in __class_register for the class private data. Signed-off-by: Laurent Pinchart Acked-by: Artem Bityutskiy Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/class.c b/drivers/base/class.c index 161746deab4b..6e2c3b064f53 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -59,6 +59,8 @@ static void class_release(struct kobject *kobj) else pr_debug("class '%s' does not have a release() function, " "be careful\n", class->name); + + kfree(cp); } static struct sysfs_ops class_sysfs_ops = { From bca476139d2ded86be146dae09b06e22548b67f3 Mon Sep 17 00:00:00 2001 From: Dick Hollenbeck Date: Wed, 9 Dec 2009 12:31:34 -0800 Subject: [PATCH 502/640] serial: 8250: add serial transmitter fully empty test When controlling an industrial radio modem it can be necessary to manipulate the handshake lines in order to control the radio modem's transmitter, from userspace. The transmitter should not be turned off before all characters have been transmitted. serial8250_tx_empty() was reporting that all characters were transmitted before they actually were. === Discovered in parallel with more testing and analysis by Kees Schoenmakers as follows: I ran into an NetMos 9835 serial pci board which behaves a little different than the standard. This type of expansion board is very common. "Standard" 8250 compatible devices clear the 'UART_LST_TEMT" bit together with the "UART_LSR_THRE" bit when writing data to the device. The NetMos device does it slightly different I believe that the TEMT bit is coupled to the shift register. The problem is that after writing data to the device and very quickly after that one does call serial8250_tx_empty, it returns the wrong information. My patch makes the test more robust (and solves the problem) and it does not affect the already correct devices. Alan: We may yet need to quirk this but now we know which chips we have a way to do that should we find this breaks some other 8250 clone with dodgy THRE. Signed-off-by: Dick Hollenbeck Signed-off-by: Alan Cox Cc: Kees Schoenmakers Signed-off-by: Andrew Morton Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/serial/8250.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index c3e37c8e7e26..e9b15c3746fa 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -83,6 +83,9 @@ static unsigned int skip_txen_test; /* force skip of txen test at init time */ #define PASS_LIMIT 256 +#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) + + /* * We default to IRQ0 for the "no irq" hack. Some * machine types want others as well - they're free @@ -1792,7 +1795,7 @@ static unsigned int serial8250_tx_empty(struct uart_port *port) up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; spin_unlock_irqrestore(&up->port.lock, flags); - return lsr & UART_LSR_TEMT ? TIOCSER_TEMT : 0; + return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0; } static unsigned int serial8250_get_mctrl(struct uart_port *port) @@ -1850,8 +1853,6 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state) spin_unlock_irqrestore(&up->port.lock, flags); } -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - /* * Wait for transmitter & holding register to empty */ From fee099b278894a1c7383a08cb3c62a5b62a134e8 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 12 Jan 2010 22:48:00 -0600 Subject: [PATCH 503/640] [SCSI] iscsi_tcp regression: remove bogus warn on in write path An empty r2tqueue is a valid state. It just means that we have processed all that there was to do. This patch removes the WARN_ON that was added when the kfifo changes were merged. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi_tcp.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index db6856c138fc..4ad87fd74ddd 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -992,12 +992,10 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task) if (r2t == NULL) { if (kfifo_out(&tcp_task->r2tqueue, (void *)&tcp_task->r2t, sizeof(void *)) != - sizeof(void *)) { - WARN_ONCE(1, "unexpected fifo state"); + sizeof(void *)) r2t = NULL; - } - - r2t = tcp_task->r2t; + else + r2t = tcp_task->r2t; } spin_unlock_bh(&session->lock); } From 10897ae71dd6e205969726e0f817f3327ef32f83 Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Thu, 21 Jan 2010 10:15:44 -0800 Subject: [PATCH 504/640] [SCSI] libfc: call ddp setup for only FCP reads to avoid accessing junk fsp pointer Adds check to call fc_fcp_ddp_setup for only FCP read cmds to avoid accessing junk fsp pointer at least in ESX since non FCP frame had junk fsp value, though fsp is implicitly initialized to null by __alloc_skb but with this patch no more relying on fsp initialized to null value and hitting junk fsp ptr access. Removes fsp pointer checking in fc_fcp_ddp_setup as this is not needed any more since its only caller for FCP read will always have a valid fsp. Reported by: Frank Zhang Reported by: Rob Love Signed-off-by: Vasu Dev Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_exch.c | 2 +- drivers/scsi/libfc/fc_fcp.c | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 19d711cb938c..7f4364770e4a 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -1890,7 +1890,7 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport, fc_exch_setup_hdr(ep, fp, ep->f_ctl); sp->cnt++; - if (ep->xid <= lport->lro_xid) + if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD) fc_fcp_ddp_setup(fr_fsp(fp), ep->xid); if (unlikely(lport->tt.frame_send(lport, fp))) diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 881d5dfe8c74..6fde2fabfd9b 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -298,9 +298,6 @@ void fc_fcp_ddp_setup(struct fc_fcp_pkt *fsp, u16 xid) { struct fc_lport *lport; - if (!fsp) - return; - lport = fsp->lp; if ((fsp->req_flags & FC_SRB_READ) && (lport->lro_enabled) && (lport->tt.ddp_setup)) { From 3b709150b73205710d05128b925090aac048ed23 Mon Sep 17 00:00:00 2001 From: Hugh Daschbach Date: Thu, 21 Jan 2010 10:15:49 -0800 Subject: [PATCH 505/640] [SCSI] libfc: Fix e_d_tov ns -> ms scaling factor in PLOGI response. Both PLOGI and RTV response processing conditionally scale e_d_tov, but use different scaling factors. The scaling factor is correct in RTV response processing. Bring PLOGI e_d_tov scaling in line with RTV common service parameter inspection. Signed-off-by: Hugh Daschbach Acked-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_rport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 02300523b234..97923bb07765 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -623,7 +623,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp, tov = ntohl(plp->fl_csp.sp_e_d_tov); if (ntohs(plp->fl_csp.sp_features) & FC_SP_FT_EDTR) - tov /= 1000; + tov /= 1000000; if (tov > rdata->e_d_tov) rdata->e_d_tov = tov; csp_seq = ntohs(plp->fl_csp.sp_tot_seq); From b248df30fca3aeee1d650b570e8cbc4e8cc45710 Mon Sep 17 00:00:00 2001 From: Hugh Daschbach Date: Thu, 21 Jan 2010 10:15:55 -0800 Subject: [PATCH 506/640] [SCSI] libfc: Don't assume response request present. Fix NULL pointer dereference crash occurs in fc_lport_bsg_request() for bsg requests that do not contain a response request. Specifically, FC_BSG_HST_ADD_RPORT and FC_BSG_HST_DEL_RPORT bsg requests are not guaranteed to include a response request. Signed-off-by: Hugh Daschbach Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/libfc/fc_lport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 0b165024a219..7ec8ce75007c 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1800,7 +1800,8 @@ int fc_lport_bsg_request(struct fc_bsg_job *job) u32 did; job->reply->reply_payload_rcv_len = 0; - rsp->resid_len = job->reply_payload.payload_len; + if (rsp) + rsp->resid_len = job->reply_payload.payload_len; mutex_lock(&lport->lp_mutex); From f47dd855d9e64a5d499a93e858a82bc5e7b21345 Mon Sep 17 00:00:00 2001 From: Bhanu Prakash Gollapudi Date: Thu, 21 Jan 2010 10:16:00 -0800 Subject: [PATCH 507/640] [SCSI] libfcoe: Send port LKA every FIP_VN_KA_PERIOD secs. libfcoe module doesnt send port keep alive every FIP_VN_KA_PERIOD due to improper assignment of timeout value. Update the port_ka_time appropriately by incrementing it by FIP_VN_KA_PERIOD in fcoe_ctlr_timeout(), so that the link_work is scheduled to send the port LKA. Signed-off-by: Bhanu Gollapudi Acked-by: Joe Eykholt Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/libfcoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c index 9823291395ad..511cb6b371ee 100644 --- a/drivers/scsi/fcoe/libfcoe.c +++ b/drivers/scsi/fcoe/libfcoe.c @@ -1187,7 +1187,7 @@ static void fcoe_ctlr_timeout(unsigned long arg) next_timer = fip->ctlr_ka_time; if (time_after_eq(jiffies, fip->port_ka_time)) { - fip->port_ka_time += jiffies + + fip->port_ka_time = jiffies + msecs_to_jiffies(FIP_VN_KA_PERIOD); fip->send_port_ka = 1; } From 6409ea65b3b81ef693cbbc7c4b2300e50a4219dd Mon Sep 17 00:00:00 2001 From: Rob Love Date: Thu, 21 Jan 2010 10:16:05 -0800 Subject: [PATCH 508/640] [SCSI] fcoe: Only rmmod fcoe.ko if there are no active connections Currently we're gracefully tearing down each active connection when fcoe.ko is removed. We shouldn't allow the user to destroy connections by removing the module. We should force the user to destroy each connection and then the module can be removed. This patch makes it so a refrerence count on the module is taken each time a fcoe_interface is created. The reference count is dropped when the fcoe_interface is destroyed. This makes it so that module_exit() doesn't get called unless all fcoe_interfaces have been destroyed. This patch leaves the removal of interfaces in the module_exit routine so that if the user does a 'rmmod -f' we'll clean everything up before removing the module. The module_put line was put before the out_putdev goto line because we should only be decrementing the reference count if a fcoe_interface is actually destroyed. If we can't find the netdev or the fcoe_interface then it's assumed that something else has destroyed the fcoe_interface and it would have decremented the reference count at that time. Signed-off-by: Robert Love Signed-off-by: James Bottomley --- drivers/scsi/fcoe/fcoe.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 10be9f36a4cc..2f47ae7cce91 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -2009,6 +2009,8 @@ static int fcoe_destroy(const char *buffer, struct kernel_param *kp) fcoe_interface_cleanup(fcoe); rtnl_unlock(); fcoe_if_destroy(fcoe->ctlr.lp); + module_put(THIS_MODULE); + out_putdev: dev_put(netdev); out_nodev: @@ -2059,6 +2061,11 @@ static int fcoe_create(const char *buffer, struct kernel_param *kp) } #endif + if (!try_module_get(THIS_MODULE)) { + rc = -EINVAL; + goto out_nomod; + } + rtnl_lock(); netdev = fcoe_if_to_netdev(buffer); if (!netdev) { @@ -2099,17 +2106,24 @@ static int fcoe_create(const char *buffer, struct kernel_param *kp) if (!fcoe_link_ok(lport)) fcoe_ctlr_link_up(&fcoe->ctlr); - rc = 0; -out_free: /* * Release from init in fcoe_interface_create(), on success lport * should be holding a reference taken in fcoe_if_create(). */ fcoe_interface_put(fcoe); + dev_put(netdev); + rtnl_unlock(); + mutex_unlock(&fcoe_config_mutex); + + return 0; +out_free: + fcoe_interface_put(fcoe); out_putdev: dev_put(netdev); out_nodev: rtnl_unlock(); + module_put(THIS_MODULE); +out_nomod: mutex_unlock(&fcoe_config_mutex); return rc; } From b72c7d543589736d43da531566490dd31572f5ca Mon Sep 17 00:00:00 2001 From: Ranjith Lohithakshan Date: Wed, 17 Feb 2010 17:17:01 +0000 Subject: [PATCH 509/640] omap: Remove DEBUG_FS dependency for mux name checking The check for a valid mux name should be performed regardless of whether DEBUG_FS is enabled or not. Otherwise without DEBUG_FS, we get: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] last sysfs file: Modules linked in: CPU: 0 Not tainted (2.6.33-rc8 #10) PC is at strcmp+0x18/0x40 LR is at omap_mux_init_signal+0x68/0x14c ... This fixes the issue currently seen with boards not booting up if DEBUG_FS is not enabled in defconfig. Note that the earlier ifndef + ifdef now becomes simpler ifdef else: If CONFIG_OMAP_MUX is selected, we use pin names. If it's not selected, we only want the GPIO to mux register mapping. Signed-off-by: Ranjith Lohithakshan Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/mux.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index 5fedc50c58e4..5fef73f4743d 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -961,16 +961,14 @@ static void __init omap_mux_init_list(struct omap_mux *superset) while (superset->reg_offset != OMAP_MUX_TERMINATOR) { struct omap_mux *entry; -#ifndef CONFIG_OMAP_MUX - /* Skip pins that are not muxed as GPIO by bootloader */ - if (!OMAP_MODE_GPIO(omap_mux_read(superset->reg_offset))) { +#ifdef CONFIG_OMAP_MUX + if (!superset->muxnames || !superset->muxnames[0]) { superset++; continue; } -#endif - -#if defined(CONFIG_OMAP_MUX) && defined(CONFIG_DEBUG_FS) - if (!superset->muxnames || !superset->muxnames[0]) { +#else + /* Skip pins that are not muxed as GPIO by bootloader */ + if (!OMAP_MODE_GPIO(omap_mux_read(superset->reg_offset))) { superset++; continue; } From 172d2d0041fdd4f3617dbdff8296bf279db3d5fb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 Feb 2010 16:42:08 -0800 Subject: [PATCH 510/640] sparc64: Sync of_create_pci_dev() with drivers/pci/probe.c changes. Mirrors powerpc commits bb209c8287d2d55ec4a67e3933346e0a3ee0da76 ("powerpc/pci: Add calls to set_pcie_port_type() and set_pcie_hotplug_bridge()") and 26b4a0ca46985ae9586c194f7859f3838b1230f8 ("powerpc/pci: Add missing hookup to pci_slot") We also need to initialize ->dma_mask explicitly here too. Signed-off-by: David S. Miller --- arch/sparc/kernel/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 539e83f8e087..592b03d85167 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -247,6 +247,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, struct pci_bus *bus, int devfn) { struct dev_archdata *sd; + struct pci_slot *slot; struct of_device *op; struct pci_dev *dev; const char *type; @@ -286,6 +287,11 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->dev.bus = &pci_bus_type; dev->devfn = devfn; dev->multifunction = 0; /* maybe a lie? */ + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff); dev->device = of_getintprop_default(node, "device-id", 0xffff); @@ -322,6 +328,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->current_state = 4; /* unknown power state */ dev->error_state = pci_channel_io_normal; + dev->dma_mask = 0xffffffff; if (!strcmp(node->name, "pci")) { /* a PCI-PCI bridge */ From d7ecfb3c2aa155c9f6152ebe91de92067d16ba6e Mon Sep 17 00:00:00 2001 From: Kristoffer Glembo Date: Mon, 15 Feb 2010 16:10:28 +0100 Subject: [PATCH 511/640] sparc: Fix incorrect comparison in of_bus_ambapp_match() Use type instead of name in comparison. Signed-off-by: Kristoffer Glembo Signed-off-by: David S. Miller --- arch/sparc/kernel/of_device_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 4c26eb59e742..53a58b349849 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -105,7 +105,7 @@ static unsigned long of_bus_sbus_get_flags(const u32 *addr, unsigned long flags) static int of_bus_ambapp_match(struct device_node *np) { - return !strcmp(np->name, "ambapp"); + return !strcmp(np->type, "ambapp"); } static void of_bus_ambapp_count_cells(struct device_node *child, From 01d4503968f471f876fb44335800d2cf8dc5a2ce Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 31 Jan 2010 07:07:14 +1000 Subject: [PATCH 512/640] drm/radeon/kms: use udelay for short delays For usec delays use udelay instead of scheduling, this should allow reclocking to happen faster. This also was the cause of reported 33s delays at bootup on certain systems. fixes: freedesktop.org bug 25506 Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index 2a3df5599ab4..7f152f66f196 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -643,7 +643,7 @@ static void atom_op_delay(atom_exec_context *ctx, int *ptr, int arg) uint8_t count = U8((*ptr)++); SDEBUG(" count: %d\n", count); if (arg == ATOM_UNIT_MICROSEC) - schedule_timeout_uninterruptible(usecs_to_jiffies(count)); + udelay(count); else schedule_timeout_uninterruptible(msecs_to_jiffies(count)); } From f2d12b8e2c05e86b1a2070efcc07f1b8a79afb4c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 15 Feb 2010 14:45:22 +0000 Subject: [PATCH 513/640] drm/vmwgfx: Use fb handover mechanism instead of stealth mode. When the vmwgfx module is loaded on top of vesafb, it would operate in stealth mode in parallel with vesafb, evicting VRAM on dropmaster. Change that to use the vesafb handover mechanism, like other drmfb drivers. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 49 ++++++++++------------------- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 3 ++ 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index a6e8f687fa64..0c9c0811f42d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -348,22 +348,19 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) */ DRM_INFO("It appears like vesafb is loaded. " - "Ignore above error if any. Entering stealth mode.\n"); + "Ignore above error if any.\n"); ret = pci_request_region(dev->pdev, 2, "vmwgfx stealth probe"); if (unlikely(ret != 0)) { DRM_ERROR("Failed reserving the SVGA MMIO resource.\n"); goto out_no_device; } - vmw_kms_init(dev_priv); - vmw_overlay_init(dev_priv); - } else { - ret = vmw_request_device(dev_priv); - if (unlikely(ret != 0)) - goto out_no_device; - vmw_kms_init(dev_priv); - vmw_overlay_init(dev_priv); - vmw_fb_init(dev_priv); } + ret = vmw_request_device(dev_priv); + if (unlikely(ret != 0)) + goto out_no_device; + vmw_kms_init(dev_priv); + vmw_overlay_init(dev_priv); + vmw_fb_init(dev_priv); dev_priv->pm_nb.notifier_call = vmwgfx_pm_notifier; register_pm_notifier(&dev_priv->pm_nb); @@ -406,17 +403,15 @@ static int vmw_driver_unload(struct drm_device *dev) unregister_pm_notifier(&dev_priv->pm_nb); - if (!dev_priv->stealth) { - vmw_fb_close(dev_priv); - vmw_kms_close(dev_priv); - vmw_overlay_close(dev_priv); - vmw_release_device(dev_priv); - pci_release_regions(dev->pdev); - } else { - vmw_kms_close(dev_priv); - vmw_overlay_close(dev_priv); + vmw_fb_close(dev_priv); + vmw_kms_close(dev_priv); + vmw_overlay_close(dev_priv); + vmw_release_device(dev_priv); + if (dev_priv->stealth) pci_release_region(dev->pdev, 2); - } + else + pci_release_regions(dev->pdev); + if (dev_priv->capabilities & SVGA_CAP_IRQMASK) drm_irq_uninstall(dev_priv->dev); if (dev->devname == vmw_devname) @@ -585,11 +580,6 @@ static int vmw_master_set(struct drm_device *dev, int ret = 0; DRM_INFO("Master set.\n"); - if (dev_priv->stealth) { - ret = vmw_request_device(dev_priv); - if (unlikely(ret != 0)) - return ret; - } if (active) { BUG_ON(active != &dev_priv->fbdev_master); @@ -649,18 +639,11 @@ static void vmw_master_drop(struct drm_device *dev, ttm_lock_set_kill(&vmaster->lock, true, SIGTERM); - if (dev_priv->stealth) { - ret = ttm_bo_evict_mm(&dev_priv->bdev, TTM_PL_VRAM); - if (unlikely(ret != 0)) - DRM_ERROR("Unable to clean VRAM on master drop.\n"); - vmw_release_device(dev_priv); - } dev_priv->active_master = &dev_priv->fbdev_master; ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); ttm_vt_unlock(&dev_priv->fbdev_master.lock); - if (!dev_priv->stealth) - vmw_fb_on(dev_priv); + vmw_fb_on(dev_priv); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 4f4f6432be8b..a93367041cdc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -559,6 +559,9 @@ int vmw_fb_init(struct vmw_private *vmw_priv) info->pixmap.scan_align = 1; #endif + info->aperture_base = vmw_priv->vram_start; + info->aperture_size = vmw_priv->vram_size; + /* * Dirty & Deferred IO */ From b58db2c6dd18d35f59862d3352c86a0a58838bf3 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 15 Feb 2010 22:15:39 +0000 Subject: [PATCH 514/640] drm/edid: Fix interlaced detailed timings to be frame size, not field. cf. https://bugzilla.redhat.com/show_bug.cgi?id=533561 Signed-off-by: Adam Jackson Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 47 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f665b05592f3..ab6c97330412 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -598,6 +598,50 @@ struct drm_display_mode *drm_mode_std(struct drm_device *dev, return mode; } +/* + * EDID is delightfully ambiguous about how interlaced modes are to be + * encoded. Our internal representation is of frame height, but some + * HDTV detailed timings are encoded as field height. + * + * The format list here is from CEA, in frame size. Technically we + * should be checking refresh rate too. Whatever. + */ +static void +drm_mode_do_interlace_quirk(struct drm_display_mode *mode, + struct detailed_pixel_timing *pt) +{ + int i; + static const struct { + int w, h; + } cea_interlaced[] = { + { 1920, 1080 }, + { 720, 480 }, + { 1440, 480 }, + { 2880, 480 }, + { 720, 576 }, + { 1440, 576 }, + { 2880, 576 }, + }; + static const int n_sizes = + sizeof(cea_interlaced)/sizeof(cea_interlaced[0]); + + if (!(pt->misc & DRM_EDID_PT_INTERLACED)) + return; + + for (i = 0; i < n_sizes; i++) { + if ((mode->hdisplay == cea_interlaced[i].w) && + (mode->vdisplay == cea_interlaced[i].h / 2)) { + mode->vdisplay *= 2; + mode->vsync_start *= 2; + mode->vsync_end *= 2; + mode->vtotal *= 2; + mode->vtotal |= 1; + } + } + + mode->flags |= DRM_MODE_FLAG_INTERLACE; +} + /** * drm_mode_detailed - create a new mode from an EDID detailed timing section * @dev: DRM device (needed to create new mode) @@ -680,8 +724,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, drm_mode_set_name(mode); - if (pt->misc & DRM_EDID_PT_INTERLACED) - mode->flags |= DRM_MODE_FLAG_INTERLACE; + drm_mode_do_interlace_quirk(mode, pt); if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) { pt->misc |= DRM_EDID_PT_HSYNC_POSITIVE | DRM_EDID_PT_VSYNC_POSITIVE; From 91cb91becf372b5308cdd7d2e15b2e3ef66bae7e Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 15 Feb 2010 21:36:13 +0100 Subject: [PATCH 515/640] drm/radeon/kms: fix indirect buffer management V2 There is 3 different distinct states for an indirect buffer (IB) : 1- free with no fence 2- free with a fence 3- non free (fence doesn't matter) Previous code mixed case 2 & 3 in a single one leading to possible catastrophique failure. This patch rework the handling and properly separate each case. So when you get ib we set the ib as non free and fence status doesn't matter. Fence become active (ie has a meaning for the ib code) once the ib is scheduled or free. This patch also get rid of the alloc bitmap as it was overkill, we know go through IB pool list like in a ring buffer as the oldest IB is the first one the will be free. Fix : https://bugs.freedesktop.org/show_bug.cgi?id=26438 and likely other bugs. V2 remove the scheduled list, it's useless now, fix free ib scanning Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_blit_kms.c | 3 - drivers/gpu/drm/radeon/radeon.h | 9 ++- drivers/gpu/drm/radeon/radeon_ring.c | 105 ++++++++++--------------- 3 files changed, 45 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index af1c3ca8a4cb..446b765ac72a 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -543,9 +543,6 @@ int r600_vb_ib_get(struct radeon_device *rdev) void r600_vb_ib_put(struct radeon_device *rdev) { radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); - mutex_lock(&rdev->ib_pool.mutex); - list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs); - mutex_unlock(&rdev->ib_pool.mutex); radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index f57480ba1355..c0356bb193e5 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -96,6 +96,7 @@ extern int radeon_audio; * symbol; */ #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ +/* RADEON_IB_POOL_SIZE must be a power of 2 */ #define RADEON_IB_POOL_SIZE 16 #define RADEON_DEBUGFS_MAX_NUM_FILES 32 #define RADEONFB_CONN_LIMIT 4 @@ -363,11 +364,12 @@ void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev); */ struct radeon_ib { struct list_head list; - unsigned long idx; + unsigned idx; uint64_t gpu_addr; struct radeon_fence *fence; - uint32_t *ptr; + uint32_t *ptr; uint32_t length_dw; + bool free; }; /* @@ -377,10 +379,9 @@ struct radeon_ib { struct radeon_ib_pool { struct mutex mutex; struct radeon_bo *robj; - struct list_head scheduled_ibs; struct radeon_ib ibs[RADEON_IB_POOL_SIZE]; bool ready; - DECLARE_BITMAP(alloc_bm, RADEON_IB_POOL_SIZE); + unsigned head_id; }; struct radeon_cp { diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 4d12b2d17b4d..694799f6fac1 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -41,68 +41,55 @@ int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib) { struct radeon_fence *fence; struct radeon_ib *nib; - unsigned long i; - int r = 0; + int r = 0, i, c; *ib = NULL; r = radeon_fence_create(rdev, &fence); if (r) { - DRM_ERROR("failed to create fence for new IB\n"); + dev_err(rdev->dev, "failed to create fence for new IB\n"); return r; } mutex_lock(&rdev->ib_pool.mutex); - i = find_first_zero_bit(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE); - if (i < RADEON_IB_POOL_SIZE) { - set_bit(i, rdev->ib_pool.alloc_bm); - rdev->ib_pool.ibs[i].length_dw = 0; - *ib = &rdev->ib_pool.ibs[i]; - mutex_unlock(&rdev->ib_pool.mutex); - goto out; + for (i = rdev->ib_pool.head_id, c = 0, nib = NULL; c < RADEON_IB_POOL_SIZE; c++, i++) { + i &= (RADEON_IB_POOL_SIZE - 1); + if (rdev->ib_pool.ibs[i].free) { + nib = &rdev->ib_pool.ibs[i]; + break; + } } - if (list_empty(&rdev->ib_pool.scheduled_ibs)) { - /* we go do nothings here */ + if (nib == NULL) { + /* This should never happen, it means we allocated all + * IB and haven't scheduled one yet, return EBUSY to + * userspace hoping that on ioctl recall we get better + * luck + */ + dev_err(rdev->dev, "no free indirect buffer !\n"); mutex_unlock(&rdev->ib_pool.mutex); - DRM_ERROR("all IB allocated none scheduled.\n"); - r = -EINVAL; - goto out; + radeon_fence_unref(&fence); + return -EBUSY; } - /* get the first ib on the scheduled list */ - nib = list_entry(rdev->ib_pool.scheduled_ibs.next, - struct radeon_ib, list); - if (nib->fence == NULL) { - /* we go do nothings here */ + rdev->ib_pool.head_id = (nib->idx + 1) & (RADEON_IB_POOL_SIZE - 1); + nib->free = false; + if (nib->fence) { mutex_unlock(&rdev->ib_pool.mutex); - DRM_ERROR("IB %lu scheduled without a fence.\n", nib->idx); - r = -EINVAL; - goto out; - } - mutex_unlock(&rdev->ib_pool.mutex); - - r = radeon_fence_wait(nib->fence, false); - if (r) { - DRM_ERROR("radeon: IB(%lu:0x%016lX:%u)\n", nib->idx, - (unsigned long)nib->gpu_addr, nib->length_dw); - DRM_ERROR("radeon: GPU lockup detected, fail to get a IB\n"); - goto out; + r = radeon_fence_wait(nib->fence, false); + if (r) { + dev_err(rdev->dev, "error waiting fence of IB(%u:0x%016lX:%u)\n", + nib->idx, (unsigned long)nib->gpu_addr, nib->length_dw); + mutex_lock(&rdev->ib_pool.mutex); + nib->free = true; + mutex_unlock(&rdev->ib_pool.mutex); + radeon_fence_unref(&fence); + return r; + } + mutex_lock(&rdev->ib_pool.mutex); } radeon_fence_unref(&nib->fence); - + nib->fence = fence; nib->length_dw = 0; - - /* scheduled list is accessed here */ - mutex_lock(&rdev->ib_pool.mutex); - list_del(&nib->list); - INIT_LIST_HEAD(&nib->list); mutex_unlock(&rdev->ib_pool.mutex); - *ib = nib; -out: - if (r) { - radeon_fence_unref(&fence); - } else { - (*ib)->fence = fence; - } - return r; + return 0; } void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) @@ -114,18 +101,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) return; } mutex_lock(&rdev->ib_pool.mutex); - if (!list_empty(&tmp->list) && !radeon_fence_signaled(tmp->fence)) { - /* IB is scheduled & not signaled don't do anythings */ - mutex_unlock(&rdev->ib_pool.mutex); - return; - } - list_del(&tmp->list); - INIT_LIST_HEAD(&tmp->list); - if (tmp->fence) - radeon_fence_unref(&tmp->fence); - - tmp->length_dw = 0; - clear_bit(tmp->idx, rdev->ib_pool.alloc_bm); + tmp->free = true; mutex_unlock(&rdev->ib_pool.mutex); } @@ -135,7 +111,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) if (!ib->length_dw || !rdev->cp.ready) { /* TODO: Nothings in the ib we should report. */ - DRM_ERROR("radeon: couldn't schedule IB(%lu).\n", ib->idx); + DRM_ERROR("radeon: couldn't schedule IB(%u).\n", ib->idx); return -EINVAL; } @@ -148,7 +124,8 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_ib_execute(rdev, ib); radeon_fence_emit(rdev, ib->fence); mutex_lock(&rdev->ib_pool.mutex); - list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs); + /* once scheduled IB is considered free and protected by the fence */ + ib->free = true; mutex_unlock(&rdev->ib_pool.mutex); radeon_ring_unlock_commit(rdev); return 0; @@ -164,7 +141,6 @@ int radeon_ib_pool_init(struct radeon_device *rdev) if (rdev->ib_pool.robj) return 0; /* Allocate 1M object buffer */ - INIT_LIST_HEAD(&rdev->ib_pool.scheduled_ibs); r = radeon_bo_create(rdev, NULL, RADEON_IB_POOL_SIZE*64*1024, true, RADEON_GEM_DOMAIN_GTT, &rdev->ib_pool.robj); @@ -195,9 +171,9 @@ int radeon_ib_pool_init(struct radeon_device *rdev) rdev->ib_pool.ibs[i].ptr = ptr + offset; rdev->ib_pool.ibs[i].idx = i; rdev->ib_pool.ibs[i].length_dw = 0; - INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].list); + rdev->ib_pool.ibs[i].free = true; } - bitmap_zero(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE); + rdev->ib_pool.head_id = 0; rdev->ib_pool.ready = true; DRM_INFO("radeon: ib pool ready.\n"); if (radeon_debugfs_ib_init(rdev)) { @@ -214,7 +190,6 @@ void radeon_ib_pool_fini(struct radeon_device *rdev) return; } mutex_lock(&rdev->ib_pool.mutex); - bitmap_zero(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE); if (rdev->ib_pool.robj) { r = radeon_bo_reserve(rdev->ib_pool.robj, false); if (likely(r == 0)) { @@ -363,7 +338,7 @@ static int radeon_debugfs_ib_info(struct seq_file *m, void *data) if (ib == NULL) { return 0; } - seq_printf(m, "IB %04lu\n", ib->idx); + seq_printf(m, "IB %04u\n", ib->idx); seq_printf(m, "IB fence %p\n", ib->fence); seq_printf(m, "IB size %05u dwords\n", ib->length_dw); for (i = 0; i < ib->length_dw; i++) { From 94429bb6c8343722544e282d89dc4638672e49b4 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 15 Feb 2010 21:36:33 +0100 Subject: [PATCH 516/640] drm/radeon/kms: fix bo's fence association Previous code did associate fence to bo before the fence was emited and it also didn't lock protected access to ttm sync_obj member. Both of this flaw leads to possible race between different code path. This patch fix this by associating fence only once the fence is emitted and properly lock protect access to sync_obj member of ttm. Fix: https://bugs.freedesktop.org/show_bug.cgi?id=26438 and likely similar others bugs Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_cs.c | 10 +++----- drivers/gpu/drm/radeon/radeon_object.c | 34 +++++++++++--------------- drivers/gpu/drm/radeon/radeon_object.h | 4 +-- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 1190148cf5e6..e9d085021c1f 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -86,7 +86,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) &p->validated); } } - return radeon_bo_list_validate(&p->validated, p->ib->fence); + return radeon_bo_list_validate(&p->validated); } int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) @@ -189,12 +189,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; - if (error && parser->ib) { - radeon_bo_list_unvalidate(&parser->validated, - parser->ib->fence); - } else { - radeon_bo_list_unreserve(&parser->validated); + if (!error && parser->ib) { + radeon_bo_list_fence(&parser->validated, parser->ib->fence); } + radeon_bo_list_unreserve(&parser->validated); for (i = 0; i < parser->nrelocs; i++) { if (parser->relocs[i].gobj) { mutex_lock(&parser->rdev->ddev->struct_mutex); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index d72a71bff218..f1da370928eb 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -306,11 +306,10 @@ void radeon_bo_list_unreserve(struct list_head *head) } } -int radeon_bo_list_validate(struct list_head *head, void *fence) +int radeon_bo_list_validate(struct list_head *head) { struct radeon_bo_list *lobj; struct radeon_bo *bo; - struct radeon_fence *old_fence = NULL; int r; r = radeon_bo_list_reserve(head); @@ -334,32 +333,27 @@ int radeon_bo_list_validate(struct list_head *head, void *fence) } lobj->gpu_offset = radeon_bo_gpu_offset(bo); lobj->tiling_flags = bo->tiling_flags; - if (fence) { - old_fence = (struct radeon_fence *)bo->tbo.sync_obj; - bo->tbo.sync_obj = radeon_fence_ref(fence); - bo->tbo.sync_obj_arg = NULL; - } - if (old_fence) { - radeon_fence_unref(&old_fence); - } } return 0; } -void radeon_bo_list_unvalidate(struct list_head *head, void *fence) +void radeon_bo_list_fence(struct list_head *head, void *fence) { struct radeon_bo_list *lobj; - struct radeon_fence *old_fence; + struct radeon_bo *bo; + struct radeon_fence *old_fence = NULL; - if (fence) - list_for_each_entry(lobj, head, list) { - old_fence = to_radeon_fence(lobj->bo->tbo.sync_obj); - if (old_fence == fence) { - lobj->bo->tbo.sync_obj = NULL; - radeon_fence_unref(&old_fence); - } + list_for_each_entry(lobj, head, list) { + bo = lobj->bo; + spin_lock(&bo->tbo.lock); + old_fence = (struct radeon_fence *)bo->tbo.sync_obj; + bo->tbo.sync_obj = radeon_fence_ref(fence); + bo->tbo.sync_obj_arg = NULL; + spin_unlock(&bo->tbo.lock); + if (old_fence) { + radeon_fence_unref(&old_fence); } - radeon_bo_list_unreserve(head); + } } int radeon_bo_fbdev_mmap(struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index a02f18011ad1..7ab43de1e244 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -156,8 +156,8 @@ extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head); extern int radeon_bo_list_reserve(struct list_head *head); extern void radeon_bo_list_unreserve(struct list_head *head); -extern int radeon_bo_list_validate(struct list_head *head, void *fence); -extern void radeon_bo_list_unvalidate(struct list_head *head, void *fence); +extern int radeon_bo_list_validate(struct list_head *head); +extern void radeon_bo_list_fence(struct list_head *head, void *fence); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, From 49bf83a45fc677db1ed44d0e072e6aaeabe4e124 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 16 Feb 2010 03:45:45 -0500 Subject: [PATCH 517/640] ACPI: fix "acpi=ht" boot option We broke "acpi=ht" in 2.6.32 by disabling MADT parsing for acpi=disabled. e5b8fc6ac158f65598f58dba2c0d52ba3b412f52 This also broke systems which invoked acpi=ht via DMI blacklist. acpi=ht is a really ugly hack, but restore it for those that still use it. http://bugzilla.kernel.org/show_bug.cgi?id=14886 Signed-off-by: Len Brown --- arch/ia64/include/asm/acpi.h | 1 + drivers/acpi/tables.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index 7ae58892ba8d..e97b255d97bc 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -94,6 +94,7 @@ ia64_acpi_release_global_lock (unsigned int *lock) #define acpi_noirq 0 /* ACPI always enabled on IA64 */ #define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */ #define acpi_strict 1 /* no ACPI spec workarounds on IA64 */ +#define acpi_ht 0 /* no HT-only mode on IA64 */ #endif #define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */ static inline void disable_acpi(void) { } diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index f336bca7c450..8a0ed2800e63 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -213,7 +213,7 @@ acpi_table_parse_entries(char *id, unsigned long table_end; acpi_size tbl_size; - if (acpi_disabled) + if (acpi_disabled && !acpi_ht) return -ENODEV; if (!handler) @@ -280,7 +280,7 @@ int __init acpi_table_parse(char *id, acpi_table_handler handler) struct acpi_table_header *table = NULL; acpi_size tbl_size; - if (acpi_disabled) + if (acpi_disabled && !acpi_ht) return -ENODEV; if (!handler) From c2d1a2a11b9b29c3be1dd781dc88518ffab8d4be Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 17 Feb 2010 12:17:33 -0800 Subject: [PATCH 518/640] Input: i8042 - fix KBC jam during hibernate 633aae2 "Input: i8042 - switch to using dev_pm_ops" removed handling for PMSG_THAW, since we do not need to do anything during freeze and thus it was thougt that thaw is not needed as well. However, there is a period when interrupts are kept off, and if key happens to be pressed during that time KBC becomes jammed. To avoid the jam we simply need to poll KBC once during thaw. Signed-off-by: Alan Jenkins Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index d84a36e545f6..b54aee7cd9e3 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1161,9 +1161,17 @@ static int i8042_pm_restore(struct device *dev) return 0; } +static int i8042_pm_thaw(struct device *dev) +{ + i8042_interrupt(0, NULL); + + return 0; +} + static const struct dev_pm_ops i8042_pm_ops = { .suspend = i8042_pm_reset, .resume = i8042_pm_restore, + .thaw = i8042_pm_thaw, .poweroff = i8042_pm_reset, .restore = i8042_pm_restore, }; From 3b77fd8ee6a8ae34e349651e9d5f5000d1cc206e Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 17 Feb 2010 12:21:45 -0800 Subject: [PATCH 519/640] Input: add KEY_RFKILL Most laptops have keys that are intended to toggle all device state, not just wifi. These are currently generally mapped to KEY_WLAN. As a result, rfkill will only kill or enable wifi in response to the key press. This confuses users and can make it difficult for them to enable bluetooth and wwan devices. This patch adds a new keycode, KEY_RFKILL. It indicates that the system should toggle the state of all rfkillable devices. Signed-off-by: Matthew Garrett Acked-by: Marcel Holtmann Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/input.h b/include/linux/input.h index 735ceaf1bc2d..663208afb64c 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -376,6 +376,7 @@ struct input_absinfo { #define KEY_DISPLAY_OFF 245 /* display device to off state */ #define KEY_WIMAX 246 +#define KEY_RFKILL 247 /* Key that controls all radios */ /* Range 248 - 255 is reserved for special needs of AT keyboard driver */ From 6c09f09d44690d341d970559b64779bef8b9236b Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 16 Feb 2010 07:57:43 +0100 Subject: [PATCH 520/640] ARM: 5938/1: ARM: L2: export outer_cache_fns The 'outer_cache' variable is needed by the outer_inv_range(), outer_clean_range() and outer_flush_range() functions, which are declared as inline in asm/cacheflush.h. Otherwise drivers built as a loadable module, which access these functions, will have an undefined symbol. Signed-off-by: Santosh Shilimkar Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c6c57b640b6b..621acad8ea43 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -102,6 +102,7 @@ struct cpu_cache_fns cpu_cache; #endif #ifdef CONFIG_OUTER_CACHE struct outer_cache_fns outer_cache; +EXPORT_SYMBOL(outer_cache); #endif struct stack { From f8b55f251012e104093e105483c45c5d85ad3040 Mon Sep 17 00:00:00 2001 From: Christine Caulfield Date: Thu, 18 Feb 2010 11:33:13 +0000 Subject: [PATCH 521/640] Orphan DECnet Due to lack of time, space, motivation, hardware and probably expertise, I have reluctantly decided to orphan the DECnet code in the kernel. Judging by the deafening silence on the linux-decnet mailing list I suspect it's either not being used anyway, or the few people that are using it are happy with their older kernels. Signed-off-by: Christine Caulfield Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 412eff60c33d..8ed3d0a8b3f4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1733,10 +1733,9 @@ F: include/linux/tfrc.h F: net/dccp/ DECnet NETWORK LAYER -M: Christine Caulfield W: http://linux-decnet.sourceforge.net L: linux-decnet-user@lists.sourceforge.net -S: Maintained +S: Orphan F: Documentation/networking/decnet.txt F: net/decnet/ From b857df1acc634b18db1db2a40864af985100266e Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 18 Feb 2010 18:07:18 +0100 Subject: [PATCH 522/640] ARM: 5944/1: scsi: fix timer setup in fas216.c mod_timer() takes an absolute time and not a delay as its argument. Cc: Signed-off-by: Guennadi Liakhovetski Signed-off-by: Russell King --- drivers/scsi/arm/fas216.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index 477542602284..9e71ac611146 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -2516,7 +2516,7 @@ int fas216_eh_device_reset(struct scsi_cmnd *SCpnt) if (info->scsi.phase == PHASE_IDLE) fas216_kick(info); - mod_timer(&info->eh_timer, 30 * HZ); + mod_timer(&info->eh_timer, jiffies + 30 * HZ); spin_unlock_irqrestore(&info->host_lock, flags); /* From 079e1091a2901c81fc2d7ad2079344c3edab2c4d Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Thu, 18 Feb 2010 21:54:11 +0200 Subject: [PATCH 523/640] Gemini: wrong registers used to set reg_level in gpio_set_irq_type() It appears the wrong GPIO registers were used Signed-off-by: Roel Kluin Signed-off-by: Paulius Zaleckas --- arch/arm/mach-gemini/gpio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c index e7263854bc7b..fe3bd5ac8b10 100644 --- a/arch/arm/mach-gemini/gpio.c +++ b/arch/arm/mach-gemini/gpio.c @@ -86,7 +86,7 @@ static int gpio_set_irq_type(unsigned int irq, unsigned int type) unsigned int reg_both, reg_level, reg_type; reg_type = __raw_readl(base + GPIO_INT_TYPE); - reg_level = __raw_readl(base + GPIO_INT_BOTH_EDGE); + reg_level = __raw_readl(base + GPIO_INT_LEVEL); reg_both = __raw_readl(base + GPIO_INT_BOTH_EDGE); switch (type) { @@ -117,7 +117,7 @@ static int gpio_set_irq_type(unsigned int irq, unsigned int type) } __raw_writel(reg_type, base + GPIO_INT_TYPE); - __raw_writel(reg_level, base + GPIO_INT_BOTH_EDGE); + __raw_writel(reg_level, base + GPIO_INT_LEVEL); __raw_writel(reg_both, base + GPIO_INT_BOTH_EDGE); gpio_ack_irq(irq); From 083c88fcf1a89986ffa160826f96509fb4b370bb Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Thu, 18 Feb 2010 21:54:12 +0200 Subject: [PATCH 524/640] MAINTAINERS: fix my e-mail and status for Gemini and FA526 Signed-off-by: Paulius Zaleckas --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 412eff60c33d..44c669d92a49 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -616,10 +616,10 @@ M: Richard Purdie S: Maintained ARM/CORTINA SYSTEMS GEMINI ARM ARCHITECTURE -M: Paulius Zaleckas +M: Paulius Zaleckas L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) T: git git://gitorious.org/linux-gemini/mainline.git -S: Maintained +S: Odd Fixes F: arch/arm/mach-gemini/ ARM/EBSA110 MACHINE SUPPORT @@ -641,9 +641,9 @@ T: topgit git://git.openezx.org/openezx.git F: arch/arm/mach-pxa/ezx.c ARM/FARADAY FA526 PORT -M: Paulius Zaleckas +M: Paulius Zaleckas L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained +S: Odd Fixes F: arch/arm/mm/*-fa* ARM/FOOTBRIDGE ARCHITECTURE From c1db53b36633e6a7511dbec7c372f01a31528f0c Mon Sep 17 00:00:00 2001 From: Richard Guenther Date: Tue, 9 Feb 2010 20:16:03 -0300 Subject: [PATCH 525/640] V4L/DVB: dvb: l64781.ko broken with gcc 4.5 I'm trying to fix it on the GCC side (PR43007), but the module is quite stupid in using ULL constants to operate on u32 values: static int apply_frontend_param (struct dvb_frontend* fe, struct dvb_frontend_parameters *param) { ... static const u32 ppm = 8000; u32 spi_bias; ... spi_bias *= 1000ULL; spi_bias /= 1000ULL + ppm/1000; which causes current GCC 4.5 to emit calls to __udivdi3 for i?86 again. This patch fixes this issue. Signed-off-by: Richard Guenther Signed-off-by: Greg Kroah-Hartman CC: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/frontends/l64781.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb/frontends/l64781.c b/drivers/media/dvb/frontends/l64781.c index 3051b64aa17c..445fa1068064 100644 --- a/drivers/media/dvb/frontends/l64781.c +++ b/drivers/media/dvb/frontends/l64781.c @@ -192,8 +192,8 @@ static int apply_frontend_param (struct dvb_frontend* fe, struct dvb_frontend_pa spi_bias *= qam_tab[p->constellation]; spi_bias /= p->code_rate_HP + 1; spi_bias /= (guard_tab[p->guard_interval] + 32); - spi_bias *= 1000ULL; - spi_bias /= 1000ULL + ppm/1000; + spi_bias *= 1000; + spi_bias /= 1000 + ppm/1000; spi_bias *= p->code_rate_HP; val0x04 = (p->transmission_mode << 2) | p->guard_interval; From fc4a7f93087a48619005111895dcaa115f807399 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 10 Feb 2010 23:57:17 -0300 Subject: [PATCH 526/640] V4L/DVB: cxusb: Select all required frontend and tuner modules cxusb uses the atbm8830 and lgs8gxx (not lgs8gl5) frontends and the max2165 tuner, so it needs to select them. Signed-off-by: Ben Hutchings Cc: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/dvb-usb/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig index 1b249897c9fb..465295b1d14b 100644 --- a/drivers/media/dvb/dvb-usb/Kconfig +++ b/drivers/media/dvb/dvb-usb/Kconfig @@ -112,11 +112,13 @@ config DVB_USB_CXUSB select DVB_MT352 if !DVB_FE_CUSTOMISE select DVB_ZL10353 if !DVB_FE_CUSTOMISE select DVB_DIB7000P if !DVB_FE_CUSTOMISE - select DVB_LGS8GL5 if !DVB_FE_CUSTOMISE select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE + select DVB_ATBM8830 if !DVB_FE_CUSTOMISE + select DVB_LGS8GXX if !DVB_FE_CUSTOMISE select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMISE select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE select MEDIA_TUNER_MXL5005S if !MEDIA_TUNER_CUSTOMISE + select MEDIA_TUNER_MAX2165 if !MEDIA_TUNER_CUSTOMISE help Say Y here to support the Conexant USB2.0 hybrid reference design. Currently, only DVB and ATSC modes are supported, analog mode From 2b59125b1b5f8c9bb0524b8a0bdad4b780a239ac Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 2 Feb 2010 13:17:54 +0900 Subject: [PATCH 527/640] soc-camera: mt9t112: modify exiting conditions from standby mode This polling is needed if camera is in standby mode, but current exiting condition is inverted. Signed-off-by: Kuninori Morimoto Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/mt9t112.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/video/mt9t112.c b/drivers/media/video/mt9t112.c index fc4dd6045720..7438f8d775ba 100644 --- a/drivers/media/video/mt9t112.c +++ b/drivers/media/video/mt9t112.c @@ -514,7 +514,7 @@ static int mt9t112_init_pll(const struct i2c_client *client) /* poll to verify out of standby. Must Poll this bit */ for (i = 0; i < 100; i++) { mt9t112_reg_read(data, client, 0x0018); - if (0x4000 & data) + if (!(0x4000 & data)) break; mdelay(10); From 53f68607caba85db9a73846ccd289e4b7fa96295 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Thu, 11 Feb 2010 10:50:31 -0300 Subject: [PATCH 528/640] V4L/DVB: Video : pwc : Fix regression in pwc_set_shutter_speed caused by bad constant => sizeof conversion. Regression was caused by my commit 6b35ca0d3d586b8ecb8396821af21186e20afaf0 which determined message size using sizeof rather than hardcoded constants. Unfortunately pwc_set_shutter_speed reuses a 2 byte buffer for a one byte message too so the sizeof was bogus in this case. All other uses of sizeof checked and are ok. Acked-by: Laurent Pinchart Cc: stable@kernel.org Signed-off-by: Martin Fuzzey Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/pwc/pwc-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/video/pwc/pwc-ctrl.c b/drivers/media/video/pwc/pwc-ctrl.c index 50b415e07eda..f7f7e04cf485 100644 --- a/drivers/media/video/pwc/pwc-ctrl.c +++ b/drivers/media/video/pwc/pwc-ctrl.c @@ -753,7 +753,7 @@ int pwc_set_shutter_speed(struct pwc_device *pdev, int mode, int value) buf[0] = 0xff; /* fixed */ ret = send_control_msg(pdev, - SET_LUM_CTL, SHUTTER_MODE_FORMATTER, &buf, sizeof(buf)); + SET_LUM_CTL, SHUTTER_MODE_FORMATTER, &buf, 1); if (!mode && ret >= 0) { if (value < 0) From 2434466432464110b5307757e0285dd41f15512e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 19 Feb 2010 00:18:41 -0300 Subject: [PATCH 529/640] V4L/DVB: bttv: Move I2C IR initialization Move I2C IR initialization from just after I2C bus setup to right before non-I2C IR initialization. This avoids the case where an I2C IR device is blocking audio support (at least the PV951 suffers from this). It is also more logical to group IR support together, regardless of the connectivity. This fixes bug #15184: http://bugzilla.kernel.org/show_bug.cgi?id=15184 Signed-off-by: Jean Delvare CC: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/bt8xx/bttv-driver.c | 1 + drivers/media/video/bt8xx/bttv-i2c.c | 8 ++++++-- drivers/media/video/bt8xx/bttvp.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c index 3182a406bdd1..ae08b077fd04 100644 --- a/drivers/media/video/bt8xx/bttv-driver.c +++ b/drivers/media/video/bt8xx/bttv-driver.c @@ -4461,6 +4461,7 @@ static int __devinit bttv_probe(struct pci_dev *dev, request_modules(btv); } + init_bttv_i2c_ir(btv); bttv_input_init(btv); /* everything is fine */ diff --git a/drivers/media/video/bt8xx/bttv-i2c.c b/drivers/media/video/bt8xx/bttv-i2c.c index 63aa31a041e8..407fa61e4cda 100644 --- a/drivers/media/video/bt8xx/bttv-i2c.c +++ b/drivers/media/video/bt8xx/bttv-i2c.c @@ -388,7 +388,12 @@ int __devinit init_bttv_i2c(struct bttv *btv) if (0 == btv->i2c_rc && i2c_scan) do_i2c_scan(btv->c.v4l2_dev.name, &btv->i2c_client); - /* Instantiate the IR receiver device, if present */ + return btv->i2c_rc; +} + +/* Instantiate the I2C IR receiver device, if present */ +void __devinit init_bttv_i2c_ir(struct bttv *btv) +{ if (0 == btv->i2c_rc) { struct i2c_board_info info; /* The external IR receiver is at i2c address 0x34 (0x35 for @@ -408,7 +413,6 @@ int __devinit init_bttv_i2c(struct bttv *btv) strlcpy(info.type, "ir_video", I2C_NAME_SIZE); i2c_new_probed_device(&btv->c.i2c_adap, &info, addr_list); } - return btv->i2c_rc; } int __devexit fini_bttv_i2c(struct bttv *btv) diff --git a/drivers/media/video/bt8xx/bttvp.h b/drivers/media/video/bt8xx/bttvp.h index a1d0e9c9f286..6cccc2a17eee 100644 --- a/drivers/media/video/bt8xx/bttvp.h +++ b/drivers/media/video/bt8xx/bttvp.h @@ -279,6 +279,7 @@ extern unsigned int bttv_debug; extern unsigned int bttv_gpio; extern void bttv_gpio_tracking(struct bttv *btv, char *comment); extern int init_bttv_i2c(struct bttv *btv); +extern void init_bttv_i2c_ir(struct bttv *btv); extern int fini_bttv_i2c(struct bttv *btv); #define bttv_printk if (bttv_verbose) printk From 6f6ef82cc9de24153ba7d5cedab5970e276aefa1 Mon Sep 17 00:00:00 2001 From: Carlos Corbacho Date: Sat, 26 Dec 2009 19:24:31 +0000 Subject: [PATCH 530/640] acer-wmi: Respect current backlight level when loading Set the backlight to use the current brightness when loaded, rather than always resetting the backlight to maximum brightness. Fixes kernel bugzilla #14207 Signed-off-by: Carlos Corbacho Reported-by: Denis Mukhin Signed-off-by: Len Brown --- drivers/platform/x86/acer-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 07d14dfdf0b4..226b3e93498c 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -934,7 +934,7 @@ static int __devinit acer_backlight_init(struct device *dev) acer_backlight_device = bd; bd->props.power = FB_BLANK_UNBLANK; - bd->props.brightness = max_brightness; + bd->props.brightness = read_brightness(bd); bd->props.max_brightness = max_brightness; backlight_update_status(bd); return 0; From 455c0d71d46e86b0b7ff2c9dcfc19bc162302ee9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 18 Feb 2010 10:28:20 -0800 Subject: [PATCH 531/640] ACPI: Fix regression where _PPC is not read at boot even when ignore_ppc=0 Earlier, Ingo Molnar posted a patch to make it so that the kernel would avoid reading _PPC on his broken T60. Unfortunately, it seems that with Thomas Renninger's patch last July to eliminate _PPC evaluations when the processor driver loads, the kernel never actually reads _PPC at all! This is problematic if you happen to boot your non-T60 computer in a state where the BIOS _wants_ _PPC to be something other than zero. So, put the _PPC evaluation back into acpi_processor_get_performance_info if ignore_ppc isn't 1. Signed-off-by: Darrick J. Wong Signed-off-by: Len Brown --- drivers/acpi/processor_perflib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 2cabadcc4d8c..a959f6a07508 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -413,7 +413,11 @@ static int acpi_processor_get_performance_info(struct acpi_processor *pr) if (result) goto update_bios; - return 0; + /* We need to call _PPC once when cpufreq starts */ + if (ignore_ppc != 1) + result = acpi_processor_get_platform_limit(pr); + + return result; /* * Having _PPC but missing frequencies (_PSS, _PCT) is a very good hint that From ac278a9c505092dd82077a2446af8f9fc0d9c095 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2010 18:09:36 +0000 Subject: [PATCH 532/640] fix LOOKUP_FOLLOW on automount "symlinks" Make sure that automount "symlinks" are followed regardless of LOOKUP_FOLLOW; it should have no effect on them. Cc: stable@kernel.org Signed-off-by: Al Viro --- fs/namei.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index d62fdc875f22..a4855af776a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -822,6 +822,17 @@ fail: return PTR_ERR(dentry); } +/* + * This is a temporary kludge to deal with "automount" symlinks; proper + * solution is to trigger them on follow_mount(), so that do_lookup() + * would DTRT. To be killed before 2.6.34-final. + */ +static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) +{ + return inode && unlikely(inode->i_op->follow_link) && + ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode)); +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -942,8 +953,7 @@ last_component: if (err) break; inode = next.dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op->follow_link) { + if (follow_on_final(inode, lookup_flags)) { err = do_follow_link(&next, nd); if (err) goto return_err; From 7fee4868be91e71a3ee8e57289ebf5e10a12297e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Jan 2010 01:03:28 -0500 Subject: [PATCH 533/640] Switch proc/self to nd_set_link() Signed-off-by: Al Viro --- fs/proc/base.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index e42bbd843ed1..58324c299165 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2369,16 +2369,30 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { struct pid_namespace *ns = dentry->d_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); - char tmp[PROC_NUMBUF]; - if (!tgid) - return ERR_PTR(-ENOENT); - sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); - return ERR_PTR(vfs_follow_link(nd,tmp)); + char *name = ERR_PTR(-ENOENT); + if (tgid) { + name = __getname(); + if (!name) + name = ERR_PTR(-ENOMEM); + else + sprintf(name, "%d", tgid); + } + nd_set_link(nd, name); + return NULL; +} + +static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + __putname(s); } static const struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, .follow_link = proc_self_follow_link, + .put_link = proc_self_put_link, }; /* From 4e70af56319e56423d6eb1ce25fc321cdf8cd41d Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 15 Feb 2010 11:16:11 +0000 Subject: [PATCH 534/640] fs: inode - remove 8 bytes of padding on 64bits allowing 1 more objects/slab under slub This removes 8 bytes of padding from struct inode on 64bit builds, and so allows 1 more object/slab in the inode_cache when using slub. Signed-off-by: Richard Kennedy ---- patch against 2.6.33-rc8 compiled & tested on x86_64 AMDX2 I've been running this patch for over a week with no obvious problems regards Richard Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index b1bcb275b596..ebb1cd5bc241 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -729,6 +729,7 @@ struct inode { uid_t i_uid; gid_t i_gid; dev_t i_rdev; + unsigned int i_blkbits; u64 i_version; loff_t i_size; #ifdef __NEED_I_SIZE_ORDERED @@ -738,7 +739,6 @@ struct inode { struct timespec i_mtime; struct timespec i_ctime; blkcnt_t i_blocks; - unsigned int i_blkbits; unsigned short i_bytes; umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ From 5e2f75b8993a0d83d469388b50716dd5551f2eb4 Mon Sep 17 00:00:00 2001 From: Dan Halperin Date: Thu, 18 Feb 2010 22:01:39 -0800 Subject: [PATCH 535/640] iwlwifi: set HT flags after channel in rxon The HT extension channel settings require priv->staging_rxon.channel to be accurate. However, iwl_set_rxon_ht was being called before iwl_set_rxon_channel and thus HT40 could be broken unless another call to iwl_mac_config came in. This problem was recently introduced by "iwlwifi: Fix to set correct ht configuration" The particular setting in which I noticed this was monitor mode: iwconfig wlan0 mode monitor ifconfig wlan0 up ./iw wlan0 set channel 64 HT40- #./iw wlan0 set channel 64 HT40- tcpdump -i wlan0 -y IEEE802_11_RADIO would only catch HT40 packets if I issued the IW command twice. From visual inspection, iwl_set_rxon_channel does not depend on iwl_set_rxon_ht, so simply swapping them should be safe and fixes this problem. Signed-off-by: Daniel Halperin Acked-by: Wey-Yi Guy Signed-off-by: Reinette Chatre CC: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index d10bea64fce3..f36f804804fc 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2744,8 +2744,8 @@ int iwl_mac_config(struct ieee80211_hw *hw, u32 changed) if ((le16_to_cpu(priv->staging_rxon.channel) != ch)) priv->staging_rxon.flags = 0; - iwl_set_rxon_ht(priv, ht_conf); iwl_set_rxon_channel(priv, conf->channel); + iwl_set_rxon_ht(priv, ht_conf); iwl_set_flags_for_band(priv, conf->channel->band); spin_unlock_irqrestore(&priv->lock, flags); From b3dc1a212e5167984616445990c76056034f8eeb Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Thu, 11 Feb 2010 18:01:50 +0100 Subject: [PATCH 536/640] [SCSI] megaraid_sas: fix for 32bit apps It looks like this patch - commit 7b2519afa1abd1b9f63aa1e90879307842422dae Author: Yang, Bo Date: Tue Oct 6 14:52:20 2009 -0600 [SCSI] megaraid_sas: fix 64 bit sense pointer truncation has caused a problem for 32bit programs with 64bit os - http://bugzilla.kernel.org/show_bug.cgi?id=15001 fix by converting the user space 32bit pointer to a 64 bit one when needed. [jejb: fix up some 64 bit warnings] Signed-off-by: Tomas Henzl Cc: Bo Yang Signed-off-by: James Bottomley --- drivers/scsi/megaraid/megaraid_sas.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index 708ea3157b60..d9b8ca5116bc 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -3781,6 +3781,7 @@ static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg) compat_alloc_user_space(sizeof(struct megasas_iocpacket)); int i; int error = 0; + compat_uptr_t ptr; if (clear_user(ioc, sizeof(*ioc))) return -EFAULT; @@ -3793,9 +3794,22 @@ static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg) copy_in_user(&ioc->sge_count, &cioc->sge_count, sizeof(u32))) return -EFAULT; - for (i = 0; i < MAX_IOCTL_SGE; i++) { - compat_uptr_t ptr; + /* + * The sense_ptr is used in megasas_mgmt_fw_ioctl only when + * sense_len is not null, so prepare the 64bit value under + * the same condition. + */ + if (ioc->sense_len) { + void __user **sense_ioc_ptr = + (void __user **)(ioc->frame.raw + ioc->sense_off); + compat_uptr_t *sense_cioc_ptr = + (compat_uptr_t *)(cioc->frame.raw + cioc->sense_off); + if (get_user(ptr, sense_cioc_ptr) || + put_user(compat_ptr(ptr), sense_ioc_ptr)) + return -EFAULT; + } + for (i = 0; i < MAX_IOCTL_SGE; i++) { if (get_user(ptr, &cioc->sgl[i].iov_base) || put_user(compat_ptr(ptr), &ioc->sgl[i].iov_base) || copy_in_user(&ioc->sgl[i].iov_len, From 2cc9116c2b37c525965d76a3e6def38913259427 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 16 Feb 2010 16:18:37 -0500 Subject: [PATCH 537/640] vgaarb: fix "target=default" passing Commit 77c1ff3982c6b36961725dd19e872a1c07df7f3b fixed the userspace pointer dereference, but introduced another bug pointed out by Eugene Teo in RH bug #564264. Instead of comparing the point we were at in the string, we instead compared the beginning of the string to "default". Signed-off-by: Kyle McMartin Reported-by: Eugene Teo Signed-off-by: Dave Airlie --- drivers/gpu/vga/vgaarb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index 24b56dc54597..2f6cf69ecb39 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -961,7 +961,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf, remaining -= 7; pr_devel("client 0x%p called 'target'\n", priv); /* if target is default */ - if (!strncmp(kbuf, "default", 7)) + if (!strncmp(curr_pos, "default", 7)) pdev = pci_dev_get(vga_default_device()); else { if (!vga_pci_str_to_vars(curr_pos, remaining, From c86a90383638fa830c32cf086a1520be72167086 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 18 Feb 2010 14:14:58 -0500 Subject: [PATCH 538/640] drm/radeon/kms/rs600: add connector quirk rs600 board lists DVI port as HDMI. Fixes fdo bug 26605 Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_atombios.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 2dcda6115874..4d8831548a5f 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -206,6 +206,15 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, *connector_type = DRM_MODE_CONNECTOR_DVID; } + /* Asrock RS600 board lists the DVI port as HDMI */ + if ((dev->pdev->device == 0x7941) && + (dev->pdev->subsystem_vendor == 0x1849) && + (dev->pdev->subsystem_device == 0x7941)) { + if ((*connector_type == DRM_MODE_CONNECTOR_HDMIA) && + (supported_device == ATOM_DEVICE_DFP3_SUPPORT)) + *connector_type = DRM_MODE_CONNECTOR_DVID; + } + /* a-bit f-i90hd - ciaranm on #radeonhd - this board has no DVI */ if ((dev->pdev->device == 0x7941) && (dev->pdev->subsystem_vendor == 0x147b) && From d3932d6c475f8307ac66b4ce21563285ec05f6ea Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Feb 2010 02:13:56 -0500 Subject: [PATCH 539/640] drm/radeon/kms: fix shared ddc detection Just compare the i2c id since the i2c structs may be slighly different. Fixes fdo bug 26616. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 238188540017..65f81942f399 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -780,7 +780,7 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect * connected and the DVI port disconnected. If the edid doesn't * say HDMI, vice versa. */ - if (radeon_connector->shared_ddc && connector_status_connected) { + if (radeon_connector->shared_ddc && (ret == connector_status_connected)) { struct drm_device *dev = connector->dev; struct drm_connector *list_connector; struct radeon_connector *list_radeon_connector; @@ -1060,8 +1060,7 @@ radeon_add_atom_connector(struct drm_device *dev, return; } if (radeon_connector->ddc_bus && i2c_bus->valid) { - if (memcmp(&radeon_connector->ddc_bus->rec, i2c_bus, - sizeof(struct radeon_i2c_bus_rec)) == 0) { + if (radeon_connector->ddc_bus->rec.i2c_id == i2c_bus->i2c_id) { radeon_connector->shared_ddc = true; shared_ddc = true; } From 6a660f06e8120977b25d30ace354c8f9dc3aff2a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Feb 2010 16:07:02 -0500 Subject: [PATCH 540/640] drm/radeon/rv740: fix backend setup This patch fixes occlusion queries and rendering errors on rv740 boards. Hardcoding the backend map is not an optimal solution, but a better fix is being worked on. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_cp.c | 9 ++++++--- drivers/gpu/drm/radeon/rv770.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 6d5a711c2e91..75bcf35a0931 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -1428,9 +1428,12 @@ static void r700_gfx_init(struct drm_device *dev, gb_tiling_config |= R600_BANK_SWAPS(1); - backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, - dev_priv->r600_max_backends, - (0xff << dev_priv->r600_max_backends) & 0xff); + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740) + backend_map = 0x28; + else + backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, + dev_priv->r600_max_backends, + (0xff << dev_priv->r600_max_backends) & 0xff); gb_tiling_config |= R600_BACKEND_MAP(backend_map); cc_gc_shader_pipe_config = diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 5943d561fd1e..03021674d097 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -549,9 +549,12 @@ static void rv770_gpu_init(struct radeon_device *rdev) gb_tiling_config |= BANK_SWAPS(1); - backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes, - rdev->config.rv770.max_backends, - (0xff << rdev->config.rv770.max_backends) & 0xff); + if (rdev->family == CHIP_RV740) + backend_map = 0x28; + else + backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes, + rdev->config.rv770.max_backends, + (0xff << rdev->config.rv770.max_backends) & 0xff); gb_tiling_config |= BACKEND_MAP(backend_map); cc_gc_shader_pipe_config = From f0e2f38befa787f0267419082b33e8ac72269d77 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 20 Feb 2010 07:30:15 +1000 Subject: [PATCH 541/640] drm/ttm: fix caching problem on non-PAT systems. http://bugzilla.kernel.org/show_bug.cgi?id=15328 This fixes a serious regression on AGP/non-PAT systems, where pages were ending up in the wrong state and slowing down the whole system. [airlied: taken this from the bug as the other option is to revert the change which caused it]. Tested-by: John W. Linville (in bug). Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_tt.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index e2123af7775a..3d47a2c12322 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -196,14 +196,15 @@ EXPORT_SYMBOL(ttm_tt_populate); #ifdef CONFIG_X86 static inline int ttm_tt_set_page_caching(struct page *p, - enum ttm_caching_state c_state) + enum ttm_caching_state c_old, + enum ttm_caching_state c_new) { int ret = 0; if (PageHighMem(p)) return 0; - if (get_page_memtype(p) != -1) { + if (c_old != tt_cached) { /* p isn't in the default caching state, set it to * writeback first to free its current memtype. */ @@ -212,16 +213,17 @@ static inline int ttm_tt_set_page_caching(struct page *p, return ret; } - if (c_state == tt_wc) + if (c_new == tt_wc) ret = set_memory_wc((unsigned long) page_address(p), 1); - else if (c_state == tt_uncached) + else if (c_new == tt_uncached) ret = set_pages_uc(p, 1); return ret; } #else /* CONFIG_X86 */ static inline int ttm_tt_set_page_caching(struct page *p, - enum ttm_caching_state c_state) + enum ttm_caching_state c_old, + enum ttm_caching_state c_new) { return 0; } @@ -254,7 +256,9 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm, for (i = 0; i < ttm->num_pages; ++i) { cur_page = ttm->pages[i]; if (likely(cur_page != NULL)) { - ret = ttm_tt_set_page_caching(cur_page, c_state); + ret = ttm_tt_set_page_caching(cur_page, + ttm->caching_state, + c_state); if (unlikely(ret != 0)) goto out_err; } @@ -268,7 +272,7 @@ out_err: for (j = 0; j < i; ++j) { cur_page = ttm->pages[j]; if (likely(cur_page != NULL)) { - (void)ttm_tt_set_page_caching(cur_page, + (void)ttm_tt_set_page_caching(cur_page, c_state, ttm->caching_state); } } From 7d404c7b5f4c004712bc15ed6e6edd6779842126 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 18 Feb 2010 13:13:29 +0000 Subject: [PATCH 542/640] drm/radeon/kms: free fence IB if it wasn't emited at IB free time If at IB free time fence wasn't emited that means the IB wasn't scheduled because an error occured somewhere, thus we can free then fence and mark the IB as free. Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 694799f6fac1..6579eb4c1f28 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -100,6 +100,8 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) if (tmp == NULL) { return; } + if (!tmp->fence->emited) + radeon_fence_unref(&tmp->fence); mutex_lock(&rdev->ib_pool.mutex); tmp->free = true; mutex_unlock(&rdev->ib_pool.mutex); From 635f1a31292087a2e99568bf4451c10ee287adaa Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 20 Feb 2010 09:17:18 +1000 Subject: [PATCH 543/640] drm/radeon: bump the UMS driver version number to indicate rv740 fix This lets UMS userspace know the rv740 fix is in. For KMS we can consider the kernel release to be the v2.0.0 release so we don't need the bump there. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_drv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index e13785282a82..c57ad606504d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -106,9 +106,10 @@ * 1.29- R500 3D cmd buffer support * 1.30- Add support for occlusion queries * 1.31- Add support for num Z pipes from GET_PARAM + * 1.32- fixes for rv740 setup */ #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 31 +#define DRIVER_MINOR 32 #define DRIVER_PATCHLEVEL 0 enum radeon_cp_microcode_version { From 1f474646fdc36b457606bbcd6a3592e6cbd31ac4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 19 Feb 2010 15:19:52 -0800 Subject: [PATCH 544/640] sparc64: Fix sun4u execute bit check in TSB I-TLB load. Thanks to testcase and report from Brad Spengler: -------------------- #include typedef int (* _wee)(void); int main(void) { char buf[8] = { '\x81', '\xc7', '\xe0', '\x08', '\x81', '\xe8', '\x00', '\x00' }; _wee wee; printf("%p\n", &buf); wee = (_wee)&buf; wee(); return 0; } -------------------- TSB I-tlb load code tries to use andcc to check the _PAGE_EXEC_4U bit, but that's bit 12 so it gets sign extended all the way up to bit 63 and the test nearly always passes as a result. Use sethi to fix the bug. Signed-off-by: David S. Miller --- arch/sparc/kernel/tsb.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 8c91d9b29a2f..db15d123f054 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -191,10 +191,12 @@ tsb_dtlb_load: tsb_itlb_load: /* Executable bit must be set. */ -661: andcc %g5, _PAGE_EXEC_4U, %g0 - .section .sun4v_1insn_patch, "ax" +661: sethi %hi(_PAGE_EXEC_4U), %g4 + andcc %g5, %g4, %g0 + .section .sun4v_2insn_patch, "ax" .word 661b andcc %g5, _PAGE_EXEC_4V, %g0 + nop .previous be,pn %xcc, tsb_do_fault From 88af182e389097997c5e2a0b42285b3522796759 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Feb 2010 13:22:59 +0000 Subject: [PATCH 545/640] net: Fix sysctl restarts... Yuck. It turns out that when we restart sysctls we were restarting with the values already changed. Which unfortunately meant that the second time through we thought there was no change and skipped all kinds of work, despite the fact that there was indeed a change. I have fixed this the simplest way possible by restoring the changed values when we restart the sysctl write. One of my coworkers spotted this bug when after disabling forwarding on an interface pings were still forwarded. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 7 ++++++- net/ipv6/addrconf.c | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 040c4f05b653..26dec2be9615 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1317,14 +1317,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, { int *valp = ctl->data; int val = *valp; + loff_t pos = *ppos; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *valp != val) { struct net *net = ctl->extra2; if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { - if (!rtnl_trylock()) + if (!rtnl_trylock()) { + /* Restore the original values before restarting */ + *valp = val; + *ppos = pos; return restart_syscall(); + } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); } else if (*valp) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index de7a194a64ab..143791da062c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -502,8 +502,11 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) if (p == &net->ipv6.devconf_dflt->forwarding) return 0; - if (!rtnl_trylock()) + if (!rtnl_trylock()) { + /* Restore the original values before restarting */ + *p = old; return restart_syscall(); + } if (p == &net->ipv6.devconf_all->forwarding) { __s32 newf = net->ipv6.devconf_all->forwarding; @@ -4028,12 +4031,15 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, { int *valp = ctl->data; int val = *valp; + loff_t pos = *ppos; int ret; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_fixup_forwarding(ctl, valp, val); + if (ret) + *ppos = pos; return ret; } @@ -4075,8 +4081,11 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) if (p == &net->ipv6.devconf_dflt->disable_ipv6) return 0; - if (!rtnl_trylock()) + if (!rtnl_trylock()) { + /* Restore the original values before restarting */ + *p = old; return restart_syscall(); + } if (p == &net->ipv6.devconf_all->disable_ipv6) { __s32 newf = net->ipv6.devconf_all->disable_ipv6; @@ -4095,12 +4104,15 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write, { int *valp = ctl->data; int val = *valp; + loff_t pos = *ppos; int ret; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_disable_ipv6(ctl, valp, val); + if (ret) + *ppos = pos; return ret; } From b8afe6416101549e877f8470f2a160df69676166 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 19 Feb 2010 13:23:47 +0000 Subject: [PATCH 546/640] net-sysfs: Use rtnl_trylock in wireless sysfs methods. The wireless sysfs methods like the rest of the networking sysfs methods are removed with the rtnl_lock held and block until the existing methods stop executing. So use rtnl_trylock and restart_syscall so that the code continues to work. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index fbc1c7472c5e..099c753c4213 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -410,7 +410,8 @@ static ssize_t wireless_show(struct device *d, char *buf, const struct iw_statistics *iw; ssize_t ret = -EINVAL; - rtnl_lock(); + if (!rtnl_trylock()) + return restart_syscall(); if (dev_isalive(dev)) { iw = get_wireless_stats(dev); if (iw) From e0bf54c93a15c365a37cfc4fe0137f5bc012d1b9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 19 Feb 2010 13:29:27 +0000 Subject: [PATCH 547/640] sfc: Fix sign of efx_mcdi_poll_reboot() error in efx_mcdi_poll() efx_mcdi_poll() uses positive error numbers, matching the MCDI protocol. It must negate the result of efx_mcdi_poll_reboot() which returns the usual negative error numbers. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/mcdi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c index 9f035b9f0350..f66b3da6ddff 100644 --- a/drivers/net/sfc/mcdi.c +++ b/drivers/net/sfc/mcdi.c @@ -127,7 +127,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) efx_dword_t reg; /* Check for a reboot atomically with respect to efx_mcdi_copyout() */ - rc = efx_mcdi_poll_reboot(efx); + rc = -efx_mcdi_poll_reboot(efx); if (rc) goto out; From 242cc0547f3bcecc0b02ca6f3e9512760185727e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 19 Feb 2010 13:34:03 +0000 Subject: [PATCH 548/640] sfc: SFE4002/SFN4112F: Widen temperature and voltage tolerances The temperature and voltage limits currently set on these boards are too conservative and will cause the driver to stop the net device erroneously in some systems. Based on a review of the chip datasheets and advice from the designer of these boards: - Raise the maximum board temperatures to the specified maximum ambient temperatures for their PHYs plus the expected temperature bias of the board - Raise the maximum controller temperature to 90 degrees - Lower the minimum temperatures to 0 degrees - Widen the voltage tolerances to at least +/- 10% Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/sfc/falcon_boards.c | 45 ++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/net/sfc/falcon_boards.c b/drivers/net/sfc/falcon_boards.c index bf0b96af5334..5712fddd72f2 100644 --- a/drivers/net/sfc/falcon_boards.c +++ b/drivers/net/sfc/falcon_boards.c @@ -29,6 +29,15 @@ #define FALCON_BOARD_SFN4111T 0x51 #define FALCON_BOARD_SFN4112F 0x52 +/* Board temperature is about 15°C above ambient when air flow is + * limited. */ +#define FALCON_BOARD_TEMP_BIAS 15 + +/* SFC4000 datasheet says: 'The maximum permitted junction temperature + * is 125°C; the thermal design of the environment for the SFC4000 + * should aim to keep this well below 100°C.' */ +#define FALCON_JUNC_TEMP_MAX 90 + /***************************************************************************** * Support for LM87 sensor chip used on several boards */ @@ -548,16 +557,16 @@ fail_hwmon: static u8 sfe4002_lm87_channel = 0x03; /* use AIN not FAN inputs */ static const u8 sfe4002_lm87_regs[] = { - LM87_IN_LIMITS(0, 0x83, 0x91), /* 2.5V: 1.8V +/- 5% */ - LM87_IN_LIMITS(1, 0x51, 0x5a), /* Vccp1: 1.2V +/- 5% */ - LM87_IN_LIMITS(2, 0xb6, 0xca), /* 3.3V: 3.3V +/- 5% */ - LM87_IN_LIMITS(3, 0xb0, 0xc9), /* 5V: 4.6-5.2V */ - LM87_IN_LIMITS(4, 0xb0, 0xe0), /* 12V: 11-14V */ - LM87_IN_LIMITS(5, 0x44, 0x4b), /* Vccp2: 1.0V +/- 5% */ - LM87_AIN_LIMITS(0, 0xa0, 0xb2), /* AIN1: 1.66V +/- 5% */ - LM87_AIN_LIMITS(1, 0x91, 0xa1), /* AIN2: 1.5V +/- 5% */ - LM87_TEMP_INT_LIMITS(10, 60), /* board */ - LM87_TEMP_EXT1_LIMITS(10, 70), /* Falcon */ + LM87_IN_LIMITS(0, 0x7c, 0x99), /* 2.5V: 1.8V +/- 10% */ + LM87_IN_LIMITS(1, 0x4c, 0x5e), /* Vccp1: 1.2V +/- 10% */ + LM87_IN_LIMITS(2, 0xac, 0xd4), /* 3.3V: 3.3V +/- 10% */ + LM87_IN_LIMITS(3, 0xac, 0xd4), /* 5V: 5.0V +/- 10% */ + LM87_IN_LIMITS(4, 0xac, 0xe0), /* 12V: 10.8-14V */ + LM87_IN_LIMITS(5, 0x3f, 0x4f), /* Vccp2: 1.0V +/- 10% */ + LM87_AIN_LIMITS(0, 0x98, 0xbb), /* AIN1: 1.66V +/- 10% */ + LM87_AIN_LIMITS(1, 0x8a, 0xa9), /* AIN2: 1.5V +/- 10% */ + LM87_TEMP_INT_LIMITS(0, 80 + FALCON_BOARD_TEMP_BIAS), + LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX), 0 }; @@ -619,14 +628,14 @@ static int sfe4002_init(struct efx_nic *efx) static u8 sfn4112f_lm87_channel = 0x03; /* use AIN not FAN inputs */ static const u8 sfn4112f_lm87_regs[] = { - LM87_IN_LIMITS(0, 0x83, 0x91), /* 2.5V: 1.8V +/- 5% */ - LM87_IN_LIMITS(1, 0x51, 0x5a), /* Vccp1: 1.2V +/- 5% */ - LM87_IN_LIMITS(2, 0xb6, 0xca), /* 3.3V: 3.3V +/- 5% */ - LM87_IN_LIMITS(4, 0xb0, 0xe0), /* 12V: 11-14V */ - LM87_IN_LIMITS(5, 0x44, 0x4b), /* Vccp2: 1.0V +/- 5% */ - LM87_AIN_LIMITS(1, 0x91, 0xa1), /* AIN2: 1.5V +/- 5% */ - LM87_TEMP_INT_LIMITS(10, 60), /* board */ - LM87_TEMP_EXT1_LIMITS(10, 70), /* Falcon */ + LM87_IN_LIMITS(0, 0x7c, 0x99), /* 2.5V: 1.8V +/- 10% */ + LM87_IN_LIMITS(1, 0x4c, 0x5e), /* Vccp1: 1.2V +/- 10% */ + LM87_IN_LIMITS(2, 0xac, 0xd4), /* 3.3V: 3.3V +/- 10% */ + LM87_IN_LIMITS(4, 0xac, 0xe0), /* 12V: 10.8-14V */ + LM87_IN_LIMITS(5, 0x3f, 0x4f), /* Vccp2: 1.0V +/- 10% */ + LM87_AIN_LIMITS(1, 0x8a, 0xa9), /* AIN2: 1.5V +/- 10% */ + LM87_TEMP_INT_LIMITS(0, 60 + FALCON_BOARD_TEMP_BIAS), + LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX), 0 }; From aeaa5ccd6421fbf9e7ded0ac67b12ea2b9fcf51e Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Mon, 15 Feb 2010 18:07:39 -0500 Subject: [PATCH 549/640] vfs: don't call ima_file_check() unconditionally in nfsd_open() commit 1e41568d7378d1ba8c64ba137b9ddd00b59f893a ("Take ima_path_check() in nfsd past dentry_open() in nfsd_open()") moved this code back to its original location but missed the "else". Signed-off-by: Chuck Ebbert Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 97d79eff6b7f..8715d194561a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -752,7 +752,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); - host_err = ima_file_check(*filp, access); + else + host_err = ima_file_check(*filp, access); out_nfserr: err = nfserrno(host_err); out: From d9c4f846997c6d37e4f56907d93f1be022c17c6b Mon Sep 17 00:00:00 2001 From: Samu Onkalo Date: Fri, 19 Feb 2010 23:17:58 -0800 Subject: [PATCH 550/640] Input: polldev can cause crash in case when polling disabled When polled input device is opened and closed and there are no other users of polled device, the workqueue is created and destroyed in every open / close operation. It is probable that at some point dynamic allocation of internal parts of the workqueue cause changes to the workqueue. When a work is queued to the workqueue the work struct contains pointers to the workqueue data. If the workqueue has been changed and the work has never been queued to the new workqueue, work-struct contains pointers to the non-existing workqueue. This will cause crash at the work cancellation during device close since cancellation of a work assumes that the workqueue exists. To prevent that, work struct is cleaned up at device close. This keeps work struct clean for the next use. Signed-off-by: Samu Onkalo Signed-off-by: Dmitry Torokhov --- drivers/input/input-polldev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c index aa6713b4a988..291d9393d359 100644 --- a/drivers/input/input-polldev.c +++ b/drivers/input/input-polldev.c @@ -100,6 +100,12 @@ static void input_close_polled_device(struct input_dev *input) struct input_polled_dev *dev = input_get_drvdata(input); cancel_delayed_work_sync(&dev->work); + /* + * Clean up work struct to remove references to the workqueue. + * It may be destroyed by the next call. This causes problems + * at next device open-close in case of poll_interval == 0. + */ + INIT_DELAYED_WORK(&dev->work, dev->work.work.func); input_polldev_stop_workqueue(); if (dev->close) From 1c8e170aaa7ba62c0160e96a52e25ad004419109 Mon Sep 17 00:00:00 2001 From: Abdoulaye Walsimou Gaye Date: Fri, 19 Feb 2010 12:47:14 +0100 Subject: [PATCH 551/640] ARM: 5950/1: ARM: Fix build error for arm1026ej-s processor This patch fix the below build error for arm1026ej-s processor (IntegratorCP/arm1026ej-s board). CC init/main.o In file included from include/linux/highmem.h:8, from include/linux/pagemap.h:10, from include/linux/mempolicy.h:62, from init/main.c:52: arch/arm/include/asm/cacheflush.h:134:2: error: #error Unknown cache maintainence model make[1]: *** [init/main.o] Erreur 1 make: *** [init] Erreur 2 Signed-off-by: Abdoulaye Walsimou Gaye Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index c77d2fa1f6e5..8113bb5fb66e 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -42,7 +42,8 @@ #endif #if defined(CONFIG_CPU_ARM920T) || defined(CONFIG_CPU_ARM922T) || \ - defined(CONFIG_CPU_ARM925T) || defined(CONFIG_CPU_ARM1020) + defined(CONFIG_CPU_ARM925T) || defined(CONFIG_CPU_ARM1020) || \ + defined(CONFIG_CPU_ARM1026) # define MULTI_CACHE 1 #endif From 4e10ae11317b238609fc3ec9d50a5dee9473e045 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 20 Feb 2010 09:41:30 +0100 Subject: [PATCH 552/640] ARM: 5951/1: ARM: fix documentation of the PrimeCell bus This fixes the filepath encoded in and adds some documentation as to what this bus really means. Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/bus.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index ab94335b4bb9..6816be6c3f77 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -1,5 +1,9 @@ /* - * linux/include/asm-arm/hardware/amba.h + * linux/include/amba/bus.h + * + * This device type deals with ARM PrimeCells and anything else that + * presents a proper CID (0xB105F00D) at the end of the I/O register + * region or that is derived from a PrimeCell. * * Copyright (C) 2003 Deep Blue Solutions Ltd, All Rights Reserved. * From 0fa11802e0dcbd4e211a9310500bf52d701b9c1b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 20 Feb 2010 14:16:16 +0000 Subject: [PATCH 553/640] ARM: Update mach-types Signed-off-by: Russell King --- arch/arm/tools/mach-types | 46 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 5a79fc6ee818..31c2f4c30a95 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -12,7 +12,7 @@ # # http://www.arm.linux.org.uk/developer/machines/?action=new # -# Last update: Thu Jan 28 22:15:54 2010 +# Last update: Sat Feb 20 14:16:15 2010 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -2257,7 +2257,7 @@ oratisalog MACH_ORATISALOG ORATISALOG 2268 oratismadi MACH_ORATISMADI ORATISMADI 2269 oratisot16 MACH_ORATISOT16 ORATISOT16 2270 oratisdesk MACH_ORATISDESK ORATISDESK 2271 -v2_ca9 MACH_V2P_CA9 V2P_CA9 2272 +vexpress MACH_VEXPRESS VEXPRESS 2272 sintexo MACH_SINTEXO SINTEXO 2273 cm3389 MACH_CM3389 CM3389 2274 omap3_cio MACH_OMAP3_CIO OMAP3_CIO 2275 @@ -2636,3 +2636,45 @@ hw90240 MACH_HW90240 HW90240 2648 dm365_leopard MACH_DM365_LEOPARD DM365_LEOPARD 2649 mityomapl138 MACH_MITYOMAPL138 MITYOMAPL138 2650 scat110 MACH_SCAT110 SCAT110 2651 +acer_a1 MACH_ACER_A1 ACER_A1 2652 +cmcontrol MACH_CMCONTROL CMCONTROL 2653 +pelco_lamar MACH_PELCO_LAMAR PELCO_LAMAR 2654 +rfp43 MACH_RFP43 RFP43 2655 +sk86r0301 MACH_SK86R0301 SK86R0301 2656 +ctpxa MACH_CTPXA CTPXA 2657 +epb_arm9_a MACH_EPB_ARM9_A EPB_ARM9_A 2658 +guruplug MACH_GURUPLUG GURUPLUG 2659 +spear310 MACH_SPEAR310 SPEAR310 2660 +spear320 MACH_SPEAR320 SPEAR320 2661 +robotx MACH_ROBOTX ROBOTX 2662 +lsxhl MACH_LSXHL LSXHL 2663 +smartlite MACH_SMARTLITE SMARTLITE 2664 +cws2 MACH_CWS2 CWS2 2665 +m619 MACH_M619 M619 2666 +smartview MACH_SMARTVIEW SMARTVIEW 2667 +lsa_salsa MACH_LSA_SALSA LSA_SALSA 2668 +kizbox MACH_KIZBOX KIZBOX 2669 +htccharmer MACH_HTCCHARMER HTCCHARMER 2670 +guf_neso_lt MACH_GUF_NESO_LT GUF_NESO_LT 2671 +pm9g45 MACH_PM9G45 PM9G45 2672 +htcpanther MACH_HTCPANTHER HTCPANTHER 2673 +htcpanther_cdma MACH_HTCPANTHER_CDMA HTCPANTHER_CDMA 2674 +reb01 MACH_REB01 REB01 2675 +aquila MACH_AQUILA AQUILA 2676 +spark_sls_hw2 MACH_SPARK_SLS_HW2 SPARK_SLS_HW2 2677 +sheeva_esata MACH_ESATA_SHEEVAPLUG ESATA_SHEEVAPLUG 2678 +surf7x30 MACH_SURF7X30 SURF7X30 2679 +micro2440 MACH_MICRO2440 MICRO2440 2680 +am2440 MACH_AM2440 AM2440 2681 +tq2440 MACH_TQ2440 TQ2440 2682 +lpc2478oem MACH_LPC2478OEM LPC2478OEM 2683 +ak880x MACH_AK880X AK880X 2684 +cobra3530 MACH_COBRA3530 COBRA3530 2685 +pmppb MACH_PMPPB PMPPB 2686 +u6715 MACH_U6715 U6715 2687 +axar1500_sender MACH_AXAR1500_SENDER AXAR1500_SENDER 2688 +g30_dvb MACH_G30_DVB G30_DVB 2689 +vc088x MACH_VC088X VC088X 2690 +mioa702 MACH_MIOA702 MIOA702 2691 +hpmin MACH_HPMIN HPMIN 2692 +ak880xak MACH_AK880XAK AK880XAK 2693 From 8f9941aeccc318f243ab3fa55aaa17f4c1cb33f9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 Feb 2010 18:14:21 +0000 Subject: [PATCH 554/640] CacheFiles: Fix a race in cachefiles_delete_object() vs rename cachefiles_delete_object() can race with rename. It gets the parent directory of the object it's asked to delete, then locks it - but rename may have changed the object's parent between the get and the completion of the lock. However, if such a circumstance is detected, we abandon our attempt to delete the object - since it's no longer in the index key path, it won't be seen again by lookups of that key. The assumption is that cachefilesd may have culled it by renaming it to the graveyard for later destruction. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/cachefiles/namei.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 14ac4806e291..eeb4986ea7db 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -348,7 +348,17 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, dir = dget_parent(object->dentry); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - ret = cachefiles_bury_object(cache, dir, object->dentry); + + /* we need to check that our parent is _still_ our parent - it may have + * been renamed */ + if (dir == object->dentry->d_parent) { + ret = cachefiles_bury_object(cache, dir, object->dentry); + } else { + /* it got moved, presumably by cachefilesd culling it, so it's + * no longer in the key path and we can ignore it */ + mutex_unlock(&dir->d_inode->i_mutex); + ret = 0; + } dput(dir); _leave(" = %d", ret); From d944d549aa86e08cba080396513234cf048fee1f Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 20 Feb 2010 16:13:29 +0000 Subject: [PATCH 555/640] ARM: allow alignment fault mode to be configured at kernel boot Some glibc versions intentionally create lots of alignment faults in their gconv code, which if not fixed up, results in segfaults during boot. This can prevent systems booting properly. There is no clear hard-configurable default for this; the desired default depends on the nature of the userspace which is going to be booted. So, provide a way for the alignment fault handler to be configured via the kernel command line. Signed-off-by: Russell King --- Documentation/kernel-parameters.txt | 5 +++++ arch/arm/mm/alignment.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 826b6e148316..e7848a0d99eb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -315,6 +315,11 @@ and is between 256 and 4096 characters. It is defined in the file aic79xx= [HW,SCSI] See Documentation/scsi/aic79xx.txt. + alignment= [KNL,ARM] + Allow the default userspace alignment fault handler + behaviour to be specified. Bit 0 enables warnings, + bit 1 enables fixups, and bit 2 sends a segfault. + amd_iommu= [HW,X86-84] Pass parameters to the AMD IOMMU driver in the system. Possible values are: diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index b270d6228fe2..62820eda84d9 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -11,6 +11,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include #include @@ -77,6 +78,8 @@ static unsigned long ai_dword; static unsigned long ai_multi; static int ai_usermode; +core_param(alignment, ai_usermode, int, 0600); + #define UM_WARN (1 << 0) #define UM_FIXUP (1 << 1) #define UM_SIGNAL (1 << 2) From 10fe12ef631a7e85022ed26304a37f033a6a95b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 20 Feb 2010 19:53:13 -0200 Subject: [PATCH 556/640] perf symbols: Fix up map end too on modular kernels with no modules installed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 2161db9 we stopped failing when not finding modules when asked too, but then the kernel maps (just one, for vmlinux) wasn't having its ->end field correctly set up, so symbols were not being found for the vmlinux map because its range was 0-0. Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1266702793-29434-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.c | 6 ++++++ tools/perf/util/symbol.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index c3831f633dec..9eb7005bc6d6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -379,6 +379,12 @@ int event__process_mmap(event_t *self, struct perf_session *session) session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start; session->vmlinux_maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len; + /* + * Be a bit paranoid here, some perf.data file came with + * a zero sized synthesized MMAP event for the kernel. + */ + if (session->vmlinux_maps[MAP__FUNCTION]->end == 0) + session->vmlinux_maps[MAP__FUNCTION]->end = ~0UL; perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name, self->mmap.pgoff); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6882e9fec2d6..ee9c37efdd36 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1937,7 +1937,7 @@ int map_groups__create_kernel_maps(struct map_groups *self, return -1; if (symbol_conf.use_modules && map_groups__create_modules(self) < 0) - return 0; + pr_debug("Problems creating module maps, continuing anyway...\n"); /* * Now that we have all the maps created, just set the ->end of them: */ From faa5c5c36ec50bf43e39c7798ce9701e6b002db3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Feb 2010 23:02:07 -0200 Subject: [PATCH 557/640] perf tools: Don't use parent comm if not set at fork time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the parent comm then is worthless, confusing users about the thread where the sample really happened, leading to think that the sample happened in the parent, not where it really happened, in the children of a thread for which a PERF_RECORD_COMM event was not received. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1266627727-19715-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 18 ++++++++++++------ tools/perf/util/thread.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 634b7f7140d5..9e8995eaf2b6 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -36,7 +36,10 @@ int thread__set_comm(struct thread *self, const char *comm) if (self->comm) free(self->comm); self->comm = strdup(comm); - return self->comm ? 0 : -ENOMEM; + if (self->comm == NULL) + return -ENOMEM; + self->comm_set = true; + return 0; } int thread__comm_len(struct thread *self) @@ -255,11 +258,14 @@ int thread__fork(struct thread *self, struct thread *parent) { int i; - if (self->comm) - free(self->comm); - self->comm = strdup(parent->comm); - if (!self->comm) - return -ENOMEM; + if (parent->comm_set) { + if (self->comm) + free(self->comm); + self->comm = strdup(parent->comm); + if (!self->comm) + return -ENOMEM; + self->comm_set = true; + } for (i = 0; i < MAP__NR_TYPES; ++i) if (map_groups__clone(&self->mg, &parent->mg, i) < 0) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 56f317b8a06c..0a28f39de545 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -15,6 +15,7 @@ struct thread { struct map_groups mg; pid_t pid; char shortname[3]; + bool comm_set; char *comm; int comm_len; }; From 87b8d1adefa1548b591cbf0d63965987e2cf893d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 18 Feb 2010 16:13:40 -0800 Subject: [PATCH 558/640] mm: Make copy_from_user() in migrate.c statically predictable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit x86-32 has had a static test for copy_on_user() overflow for a while. This test currently fails in mm/migrate.c resulting in an allyesconfig/allmodconfig build failure on x86-32: In function ‘copy_from_user’, inlined from ‘do_pages_stat’ at /home/hpa/kernel/git/mm/migrate.c:1012: /home/hpa/kernel/git/arch/x86/include/asm/uaccess_32.h:212: error: call to ‘copy_from_user_overflow’ declared Make the logic more explicit and therefore easier for gcc to understand. v2: rewrite the loop entirely using a more normal structure for a chunked-data loop (Linus Torvalds) Reported-by: Len Brown Signed-off-by: H. Peter Anvin Reviewed-and-Tested-by: KOSAKI Motohiro Cc: Arjan van de Ven Cc: Andrew Morton Cc: Christoph Lameter Cc: Hugh Dickins Cc: Rik van Riel Signed-off-by: Linus Torvalds --- mm/migrate.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9a0db5bbabe4..880bd592d38e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1002,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, #define DO_PAGES_STAT_CHUNK_NR 16 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; - unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; - int err; - for (i = 0; i < nr_pages; i += chunk_nr) { - if (chunk_nr > nr_pages - i) - chunk_nr = nr_pages - i; + while (nr_pages) { + unsigned long chunk_nr; - err = copy_from_user(chunk_pages, &pages[i], - chunk_nr * sizeof(*chunk_pages)); - if (err) { - err = -EFAULT; - goto out; - } + chunk_nr = nr_pages; + if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) + chunk_nr = DO_PAGES_STAT_CHUNK_NR; + + if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) + break; do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); - err = copy_to_user(&status[i], chunk_status, - chunk_nr * sizeof(*chunk_status)); - if (err) { - err = -EFAULT; - goto out; - } - } - err = 0; + if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) + break; -out: - return err; + pages += chunk_nr; + status += chunk_nr; + nr_pages -= chunk_nr; + } + return nr_pages ? -EFAULT : 0; } /* From 3dae93ec3ee1fceec69f40ef9b97892ce62ba7a5 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 15 Feb 2010 19:32:25 -0500 Subject: [PATCH 559/640] [WATCHDOG] bfin: fix max timeout calculation Relying on overflow/wrap around isn't exact because if you wrap far enough, you get back to "valid" values. Reported-by: Thorsten Pohlmann Signed-off-by: Mike Frysinger Signed-off-by: Wim Van Sebroeck Cc: stable --- drivers/watchdog/bfin_wdt.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/watchdog/bfin_wdt.c b/drivers/watchdog/bfin_wdt.c index c7b3f9df2317..2159e668751c 100644 --- a/drivers/watchdog/bfin_wdt.c +++ b/drivers/watchdog/bfin_wdt.c @@ -1,9 +1,8 @@ /* * Blackfin On-Chip Watchdog Driver - * Supports BF53[123]/BF53[467]/BF54[2489]/BF561 * * Originally based on softdog.c - * Copyright 2006-2007 Analog Devices Inc. + * Copyright 2006-2010 Analog Devices Inc. * Copyright 2006-2007 Michele d'Amico * Copyright 1996 Alan Cox * @@ -137,13 +136,15 @@ static int bfin_wdt_running(void) */ static int bfin_wdt_set_timeout(unsigned long t) { - u32 cnt; + u32 cnt, max_t, sclk; unsigned long flags; - stampit(); + sclk = get_sclk(); + max_t = -1 / sclk; + cnt = t * sclk; + stamp("maxtimeout=%us newtimeout=%lus (cnt=%#x)", max_t, t, cnt); - cnt = t * get_sclk(); - if (cnt < get_sclk()) { + if (t > max_t) { printk(KERN_WARNING PFX "timeout value is too large\n"); return -EINVAL; } From 2531be413b3f2f64c0282073de89fe52bbcbbab5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Feb 2010 18:03:16 -0800 Subject: [PATCH 560/640] sparc32: Fix struct stat uid/gid types. Commit 085219f79cad89291699bd2bfb21c9fdabafe65f ("sparc32: use proper types in struct stat") Accidently changed the struct stat uid/gid members to uid_t and gid_t, but those get set to __kernel_uid32_t and __kernel_gid32_t respectively. Those are of type 'int' but the structure is meant to have 'short'. So use uid16_t and gid16_t to correct this. Reported-by: Rob Landley Signed-off-by: David S. Miller --- arch/sparc/include/asm/stat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/stat.h b/arch/sparc/include/asm/stat.h index 55db5eca08e2..39327d6a57eb 100644 --- a/arch/sparc/include/asm/stat.h +++ b/arch/sparc/include/asm/stat.h @@ -53,8 +53,8 @@ struct stat { ino_t st_ino; mode_t st_mode; short st_nlink; - uid_t st_uid; - gid_t st_gid; + uid16_t st_uid; + gid16_t st_gid; unsigned short st_rdev; off_t st_size; time_t st_atime; From eb083ba260f21ad79e83e1ad05a0d27e93b58c83 Mon Sep 17 00:00:00 2001 From: Roy Yin Date: Sun, 21 Feb 2010 22:52:49 -0800 Subject: [PATCH 561/640] Input: usbtouchscreen - extend coordinate range for Generaltouch devices Generaltouch protocol allows for coordinates in [0, 0xffff] range and there are devices reporting coordinates as high as 0x7fff so let's update the driver to reflect that. Signed-off-by: Roy Yin Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/usbtouchscreen.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index 09a5e7341bd5..5256123a5228 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -618,8 +618,8 @@ static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt) #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { - dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1] ; - dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3] ; + dev->x = (pkt[2] << 8) | pkt[1]; + dev->y = (pkt[4] << 8) | pkt[3]; dev->press = pkt[5] & 0xff; dev->touch = pkt[0] & 0x01; @@ -809,9 +809,9 @@ static struct usbtouch_device_info usbtouch_dev_info[] = { #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH [DEVTYPE_GENERAL_TOUCH] = { .min_xc = 0x0, - .max_xc = 0x0500, + .max_xc = 0x7fff, .min_yc = 0x0, - .max_yc = 0x0500, + .max_yc = 0x7fff, .rept_size = 7, .read_data = general_touch_read_data, }, From a239a8b47cc0e5e6d7416a89f340beac06d5edaa Mon Sep 17 00:00:00 2001 From: Wey-Yi Guy Date: Fri, 19 Feb 2010 15:47:32 -0800 Subject: [PATCH 562/640] iwlwifi: error checking for number of tfds in queue When receive reply_tx and ready to decrement the count for number of tfds in queue, do error checking to prevent error condition and tfds_in_queue become negative number. Signed-off-by: Wey-Yi Guy Signed-off-by: Reinette Chatre CC: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-4965.c | 2 +- drivers/net/wireless/iwlwifi/iwl-5000.c | 4 ++-- drivers/net/wireless/iwlwifi/iwl-core.h | 2 ++ drivers/net/wireless/iwlwifi/iwl-tx.c | 16 +++++++++++++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index 9b4b8b5c7574..31462813bac0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -2008,7 +2008,7 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv, IWL_DEBUG_TX_REPLY(priv, "Retry scheduler reclaim scd_ssn " "%d index %d\n", scd_ssn , index); freed = iwl_tx_queue_reclaim(priv, txq_id, index); - priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; + iwl_free_tfds_in_queue(priv, sta_id, tid, freed); if (priv->mac80211_registered && (iwl_queue_space(&txq->q) > txq->q.low_mark) && diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index de45f308b744..f27c514886a5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -1125,7 +1125,7 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv, scd_ssn , index, txq_id, txq->swq_id); freed = iwl_tx_queue_reclaim(priv, txq_id, index); - priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; + iwl_free_tfds_in_queue(priv, sta_id, tid, freed); if (priv->mac80211_registered && (iwl_queue_space(&txq->q) > txq->q.low_mark) && @@ -1154,7 +1154,7 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv, freed = iwl_tx_queue_reclaim(priv, txq_id, index); if (ieee80211_is_data_qos(tx_resp->frame_ctrl)) - priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; + iwl_free_tfds_in_queue(priv, sta_id, tid, freed); if (priv->mac80211_registered && (iwl_queue_space(&txq->q) > txq->q.low_mark)) diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h index 675b7df632fc..fe37d6a6bf97 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.h +++ b/drivers/net/wireless/iwlwifi/iwl-core.h @@ -446,6 +446,8 @@ void iwl_hw_txq_ctx_free(struct iwl_priv *priv); int iwl_hw_tx_queue_init(struct iwl_priv *priv, struct iwl_tx_queue *txq); int iwl_txq_update_write_ptr(struct iwl_priv *priv, struct iwl_tx_queue *txq); +void iwl_free_tfds_in_queue(struct iwl_priv *priv, + int sta_id, int tid, int freed); int iwl_tx_queue_init(struct iwl_priv *priv, struct iwl_tx_queue *txq, int slots_num, u32 txq_id); void iwl_tx_queue_free(struct iwl_priv *priv, int txq_id); diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 87ce2bd292c7..72136c8f51da 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -120,6 +120,20 @@ int iwl_txq_update_write_ptr(struct iwl_priv *priv, struct iwl_tx_queue *txq) EXPORT_SYMBOL(iwl_txq_update_write_ptr); +void iwl_free_tfds_in_queue(struct iwl_priv *priv, + int sta_id, int tid, int freed) +{ + if (priv->stations[sta_id].tid[tid].tfds_in_queue >= freed) + priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; + else { + IWL_ERR(priv, "free more than tfds_in_queue (%u:%d)\n", + priv->stations[sta_id].tid[tid].tfds_in_queue, + freed); + priv->stations[sta_id].tid[tid].tfds_in_queue = 0; + } +} +EXPORT_SYMBOL(iwl_free_tfds_in_queue); + /** * iwl_tx_queue_free - Deallocate DMA queue. * @txq: Transmit queue to deallocate. @@ -1559,7 +1573,7 @@ void iwl_rx_reply_compressed_ba(struct iwl_priv *priv, if (txq->q.read_ptr != (ba_resp_scd_ssn & 0xff)) { /* calculate mac80211 ampdu sw queue to wake */ int freed = iwl_tx_queue_reclaim(priv, scd_flow, index); - priv->stations[sta_id].tid[tid].tfds_in_queue -= freed; + iwl_free_tfds_in_queue(priv, sta_id, tid, freed); if ((iwl_queue_space(&txq->q) > txq->q.low_mark) && priv->mac80211_registered && From a120e912eb51e347f36c71b60a1d13af74d30e83 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 19 Feb 2010 15:47:33 -0800 Subject: [PATCH 563/640] iwlwifi: sanity check before counting number of tfds can be free Check the frame control for ieee80211_is_data_qos() is true before counting the number of tfds can be free, the tfds_in_queue only increment when ieee80211_is_data_qos() is true before transmit; so it should only decrement if the type match. Remove ieee80211_is_data_qos check for frame_ctrl in tx_resp to avoid invalid information pass from uCode. Signed-off-by: Stanislaw Gruszka Signed-off-by: Wey-Yi Guy Signed-off-by: Reinette Chatre CC: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-5000.c | 6 ++---- drivers/net/wireless/iwlwifi/iwl-tx.c | 6 +++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index f27c514886a5..cffaae772d51 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -1153,16 +1153,14 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv, tx_resp->failure_frame); freed = iwl_tx_queue_reclaim(priv, txq_id, index); - if (ieee80211_is_data_qos(tx_resp->frame_ctrl)) - iwl_free_tfds_in_queue(priv, sta_id, tid, freed); + iwl_free_tfds_in_queue(priv, sta_id, tid, freed); if (priv->mac80211_registered && (iwl_queue_space(&txq->q) > txq->q.low_mark)) iwl_wake_queue(priv, txq_id); } - if (ieee80211_is_data_qos(tx_resp->frame_ctrl)) - iwl_txq_check_empty(priv, sta_id, tid, txq_id); + iwl_txq_check_empty(priv, sta_id, tid, txq_id); if (iwl_check_bits(status, TX_ABORT_REQUIRED_MSK)) IWL_ERR(priv, "TODO: Implement Tx ABORT REQUIRED!!!\n"); diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 72136c8f51da..8f4071562857 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -1145,6 +1145,7 @@ int iwl_tx_queue_reclaim(struct iwl_priv *priv, int txq_id, int index) struct iwl_queue *q = &txq->q; struct iwl_tx_info *tx_info; int nfreed = 0; + struct ieee80211_hdr *hdr; if ((index >= q->n_bd) || (iwl_queue_used(q, index) == 0)) { IWL_ERR(priv, "Read index for DMA queue txq id (%d), index %d, " @@ -1159,13 +1160,16 @@ int iwl_tx_queue_reclaim(struct iwl_priv *priv, int txq_id, int index) tx_info = &txq->txb[txq->q.read_ptr]; iwl_tx_status(priv, tx_info->skb[0]); + + hdr = (struct ieee80211_hdr *)tx_info->skb[0]->data; + if (hdr && ieee80211_is_data_qos(hdr->frame_control)) + nfreed++; tx_info->skb[0] = NULL; if (priv->cfg->ops->lib->txq_inval_byte_cnt_tbl) priv->cfg->ops->lib->txq_inval_byte_cnt_tbl(priv, txq); priv->cfg->ops->lib->txq_free_tfd(priv, txq); - nfreed++; } return nfreed; } From 61caf87cb5c2a198966018343a6ce4c5ab6cf8df Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Feb 2010 23:06:27 +0100 Subject: [PATCH 564/640] i915 / PM: Fix hibernate regression caused by suspend/resume splitting Commit 84b79f8d2882b0a84330c04839ed4d3cefd2ff77 (drm/i915: Fix crash while aborting hibernation) attempted to fix a regression introduced by commit cbda12d77ea590082edb6d30bd342a67ebc459e0 (drm/i915: implement new pm ops for i915), but it went too far trying to split the freeze/suspend and resume/thaw parts of the code. As a result, it introduced another regression, which only is visible on some systems. Fix the problem by merging i915_drm_suspend() with i915_drm_freeze() and moving some code from i915_resume() into i915_drm_thaw(), so that intel_opregion_free() and intel_opregion_init() are also executed in the freeze and thaw code paths, respectively. Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: Pedro Ribeiro Tested-by: Tino Keitel Acked-by: Eric Anholt Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/i915_drv.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 79beffcf5936..cf4cb3e9a0c2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -176,6 +176,8 @@ MODULE_DEVICE_TABLE(pci, pciidlist); static int i915_drm_freeze(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; + pci_save_state(dev->pdev); /* If KMS is active, we do the leavevt stuff here */ @@ -191,17 +193,12 @@ static int i915_drm_freeze(struct drm_device *dev) i915_save_state(dev); - return 0; -} - -static void i915_drm_suspend(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - intel_opregion_free(dev, 1); /* Modeset on resume, not lid events */ dev_priv->modeset_on_lid = 0; + + return 0; } static int i915_suspend(struct drm_device *dev, pm_message_t state) @@ -221,8 +218,6 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state) if (error) return error; - i915_drm_suspend(dev); - if (state.event == PM_EVENT_SUSPEND) { /* Shut down the device */ pci_disable_device(dev->pdev); @@ -237,6 +232,10 @@ static int i915_drm_thaw(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int error = 0; + i915_restore_state(dev); + + intel_opregion_init(dev, 1); + /* KMS EnterVT equivalent */ if (drm_core_check_feature(dev, DRIVER_MODESET)) { mutex_lock(&dev->struct_mutex); @@ -263,10 +262,6 @@ static int i915_resume(struct drm_device *dev) pci_set_master(dev->pdev); - i915_restore_state(dev); - - intel_opregion_init(dev, 1); - return i915_drm_thaw(dev); } @@ -423,8 +418,6 @@ static int i915_pm_suspend(struct device *dev) if (error) return error; - i915_drm_suspend(drm_dev); - pci_disable_device(pdev); pci_set_power_state(pdev, PCI_D3hot); @@ -464,13 +457,8 @@ static int i915_pm_poweroff(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); - int error; - error = i915_drm_freeze(drm_dev); - if (!error) - i915_drm_suspend(drm_dev); - - return error; + return i915_drm_freeze(drm_dev); } const struct dev_pm_ops i915_pm_ops = { From d306ebc28649b89877a22158fe0076f06cc46f60 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 10 Feb 2010 10:35:31 -0800 Subject: [PATCH 565/640] ACPI: Be in TS_POLLING state during mwait based C-state entry ACPI deep C-state entry had a long standing bug/missing feature, wherein we were sending resched IPIs when an idle CPU is in mwait based deep C-state. Only mwait based C1 was using the write to the monitored address to wake up mwait'ing CPU. This patch changes the code to retain TS_POLLING bit if we are entering an mwait based deep C-state. The patch has been verified to reduce the number of resched IPIs in general and also improves the performance/power on workloads with low system utilization (i.e., when mwait based deep C-states are being used). Fixes "netperf ~50% regression with 2.6.33-rc1, bisect to 1b9508f" http://marc.info/?l=linux-kernel&m=126441481427331&w=4 Reported-by: Lin Ming Tested-by: Alex Shi Signed-off-by: Venkatesh Pallipadi Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e88e8ae04fdb..cc978a8c00b7 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -880,12 +880,14 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, return(acpi_idle_enter_c1(dev, state)); local_irq_disable(); - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we test - * NEED_RESCHED: - */ - smp_mb(); + if (cx->entry_method != ACPI_CSTATE_FFH) { + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we test + * NEED_RESCHED: + */ + smp_mb(); + } if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; @@ -965,12 +967,14 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, } local_irq_disable(); - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we test - * NEED_RESCHED: - */ - smp_mb(); + if (cx->entry_method != ACPI_CSTATE_FFH) { + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we test + * NEED_RESCHED: + */ + smp_mb(); + } if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; From 15cbf627abcd93c3c668d5a92d58d9fec8f953dd Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 22 Feb 2010 10:43:43 -0800 Subject: [PATCH 566/640] Revert "parisc: HAVE_ARCH_TRACEHOOK" This reverts commit 81bf550d9cdfe0325eb1504b06c9f6511b442c1a. HAVE_ARCH_TRACEHOOK requires defining the user_regset interfaces, including task_user_regset_view(). parisc doesn't do that yet, so don't lie about it. Signed-off-by: Roland McGrath --- arch/parisc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 524d9352f17e..f388dc68f605 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -18,7 +18,6 @@ config PARISC select BUG select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT - select HAVE_ARCH_TRACEHOOK help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, From 52ab320ac560af3333191a473e56615fb48fff95 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Sat, 20 Feb 2010 21:23:22 +0900 Subject: [PATCH 567/640] MIPS: Highmem: Fix build error arch/mips/mm/highmem.c: In function 'kmap_init': arch/mips/mm/highmem.c:130: error: 'init_mm' undeclared (first use in this function) arch/mips/mm/highmem.c:130: error: (Each undeclared identifier is reported only once arch/mips/mm/highmem.c:130: error: for each function it appears in.) Signed-off-by: Yoichi Yuasa Cc: linux-mips Patchwork: http://patchwork.linux-mips.org/patch/980/ Signed-off-by: Ralf Baechle --- arch/mips/mm/highmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index e274fda329f4..127d732474bf 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include From 84a6fcb368a080620d12fc4d79e07902dbee7335 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 20 Feb 2010 19:51:20 +0100 Subject: [PATCH 568/640] MIPS: BCM47xx: Fix 128MB RAM support Ignoring the last page when ddr size is 128M. Cached accesses to last page is causing the processor to prefetch using address above 128M stepping out of the DDR address space. Signed-off-by: Hauke Mehrtens Cc: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/981/ Signed-off-by: Ralf Baechle --- arch/mips/bcm47xx/prom.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c index c51405e57921..29d3cbf9555f 100644 --- a/arch/mips/bcm47xx/prom.c +++ b/arch/mips/bcm47xx/prom.c @@ -141,6 +141,14 @@ static __init void prom_init_mem(void) break; } + /* Ignoring the last page when ddr size is 128M. Cached + * accesses to last page is causing the processor to prefetch + * using address above 128M stepping out of the ddr address + * space. + */ + if (mem == 0x8000000) + mem -= 0x1000; + add_memory_region(0, mem, BOOT_MEM_RAM); } From e7e65caefd57913260c82fb751f07655671ec47e Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 10 Feb 2010 19:25:58 +0100 Subject: [PATCH 569/640] drm/nouveau: Fix up pre-nv17 analog load detection. Signed-off-by: Francisco Jerez Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv04_dac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c index d0e038d28948..1d73b15d70da 100644 --- a/drivers/gpu/drm/nouveau/nv04_dac.c +++ b/drivers/gpu/drm/nouveau/nv04_dac.c @@ -119,7 +119,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector) { struct drm_device *dev = encoder->dev; - uint8_t saved_seq1, saved_pi, saved_rpc1; + uint8_t saved_seq1, saved_pi, saved_rpc1, saved_cr_mode; uint8_t saved_palette0[3], saved_palette_mask; uint32_t saved_rtest_ctrl, saved_rgen_ctrl; int i; @@ -135,6 +135,9 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder, /* only implemented for head A for now */ NVSetOwner(dev, 0); + saved_cr_mode = NVReadVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX); + NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode | 0x80); + saved_seq1 = NVReadVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX); NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1 & ~0x20); @@ -203,6 +206,7 @@ out: NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_PIXEL_INDEX, saved_pi); NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_RPC1_INDEX, saved_rpc1); NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1); + NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode); if (blue == 0x18) { NV_INFO(dev, "Load detected on head A\n"); From 66b6ebaccb176a2068bbe328f162614dce524621 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Feb 2010 10:23:30 +1000 Subject: [PATCH 570/640] drm/nv50: make nv50_mem_vm_{bind,unbind} operate only on vram GART is handled elsewhere, no reason to have the code for it here too. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 43 ++++++++------------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 8f3a12f614ed..04885d2fb15f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -285,53 +285,36 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, uint32_t flags, uint64_t phys) { struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_gpuobj **pgt; - unsigned psz, pfl, pages; + unsigned pages; - if (virt >= dev_priv->vm_gart_base && - (virt + size) < (dev_priv->vm_gart_base + dev_priv->vm_gart_size)) { - psz = 12; - pgt = &dev_priv->gart_info.sg_ctxdma; - pfl = 0x21; - virt -= dev_priv->vm_gart_base; - } else - if (virt >= dev_priv->vm_vram_base && - (virt + size) < (dev_priv->vm_vram_base + dev_priv->vm_vram_size)) { - psz = 16; - pgt = dev_priv->vm_vram_pt; - pfl = 0x01; - virt -= dev_priv->vm_vram_base; - } else { - NV_ERROR(dev, "Invalid address: 0x%16llx-0x%16llx\n", - virt, virt + size - 1); - return -EINVAL; - } - - pages = size >> psz; + virt -= dev_priv->vm_vram_base; + pages = size >> 16; dev_priv->engine.instmem.prepare_access(dev, true); if (flags & 0x80000000) { while (pages--) { - struct nouveau_gpuobj *pt = pgt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1; + struct nouveau_gpuobj *pt = + dev_priv->vm_vram_pt[virt >> 29]; + unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; nv_wo32(dev, pt, pte++, 0x00000000); nv_wo32(dev, pt, pte++, 0x00000000); - virt += (1 << psz); + virt += (1 << 16); } } else { while (pages--) { - struct nouveau_gpuobj *pt = pgt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1; + struct nouveau_gpuobj *pt = + dev_priv->vm_vram_pt[virt >> 29]; + unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; unsigned offset_h = upper_32_bits(phys) & 0xff; unsigned offset_l = lower_32_bits(phys); - nv_wo32(dev, pt, pte++, offset_l | pfl); + nv_wo32(dev, pt, pte++, offset_l | 1); nv_wo32(dev, pt, pte++, offset_h | flags); - phys += (1 << psz); - virt += (1 << psz); + phys += (1 << 16); + virt += (1 << 16); } } dev_priv->engine.instmem.finish_access(dev); From 4c27bd339d226175ac0e4dc3ab8289ba696db8be Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Feb 2010 10:25:53 +1000 Subject: [PATCH 571/640] drm/nv50: more efficient clearing of gpu page table entries Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 68 +++++++++++++++++---------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 04885d2fb15f..6832c4c969a3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -291,31 +291,17 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, pages = size >> 16; dev_priv->engine.instmem.prepare_access(dev, true); - if (flags & 0x80000000) { - while (pages--) { - struct nouveau_gpuobj *pt = - dev_priv->vm_vram_pt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; + while (pages--) { + struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt[virt >> 29]; + unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; + unsigned offset_h = upper_32_bits(phys) & 0xff; + unsigned offset_l = lower_32_bits(phys); - nv_wo32(dev, pt, pte++, 0x00000000); - nv_wo32(dev, pt, pte++, 0x00000000); + nv_wo32(dev, pt, pte++, offset_l | 1); + nv_wo32(dev, pt, pte++, offset_h | flags); - virt += (1 << 16); - } - } else { - while (pages--) { - struct nouveau_gpuobj *pt = - dev_priv->vm_vram_pt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; - unsigned offset_h = upper_32_bits(phys) & 0xff; - unsigned offset_l = lower_32_bits(phys); - - nv_wo32(dev, pt, pte++, offset_l | 1); - nv_wo32(dev, pt, pte++, offset_h | flags); - - phys += (1 << 16); - virt += (1 << 16); - } + phys += (1 << 16); + virt += (1 << 16); } dev_priv->engine.instmem.finish_access(dev); @@ -339,7 +325,41 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, void nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size) { - nv50_mem_vm_bind_linear(dev, virt, size, 0x80000000, 0); + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_gpuobj *pgt; + unsigned pages, pte, end; + + virt -= dev_priv->vm_vram_base; + pages = (size >> 16) << 1; + + dev_priv->engine.instmem.prepare_access(dev, true); + while (pages) { + pgt = dev_priv->vm_vram_pt[virt >> 29]; + pte = (virt & 0x1ffe0000ULL) >> 15; + + end = pte + pages; + if (end > 16384) + end = 16384; + pages -= (end - pte); + virt += (end - pte) << 15; + + while (pte < end) + nv_wo32(dev, pgt, pte++, 0); + } + dev_priv->engine.instmem.finish_access(dev); + + nv_wr32(dev, 0x100c80, 0x00050001); + if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) { + NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n"); + NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80)); + return; + } + + nv_wr32(dev, 0x100c80, 0x00000001); + if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) { + NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n"); + NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80)); + } } /* From 531e77139f26e8da32ee694b9ee5e6f4c764f1db Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Feb 2010 11:31:44 +1000 Subject: [PATCH 572/640] drm/nv50: improve vram page table construction This commit changes nouveau to construct PTEs which look very much like the ones the binary driver creates. I presume that filling multiple PTEs identically with length flags and the physical address of the start of a block of VRAM is a hint to the memory controller that it need not perform additional page table lookups for that range of addresses. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 44 ++++++++++++++++++++------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 6832c4c969a3..134fedbb7669 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -285,23 +285,45 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, uint32_t flags, uint64_t phys) { struct drm_nouveau_private *dev_priv = dev->dev_private; - unsigned pages; + struct nouveau_gpuobj *pgt; + unsigned block; + int i; - virt -= dev_priv->vm_vram_base; - pages = size >> 16; + virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1; + size = (size >> 16) << 1; + phys |= ((uint64_t)flags << 32) | 1; dev_priv->engine.instmem.prepare_access(dev, true); - while (pages--) { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; - unsigned offset_h = upper_32_bits(phys) & 0xff; + while (size) { + unsigned offset_h = upper_32_bits(phys); unsigned offset_l = lower_32_bits(phys); + unsigned pte, end; - nv_wo32(dev, pt, pte++, offset_l | 1); - nv_wo32(dev, pt, pte++, offset_h | flags); + for (i = 7; i >= 0; i--) { + block = 1 << (i + 1); + if (size >= block && !(virt & (block - 1))) + break; + } + offset_l |= (i << 7); - phys += (1 << 16); - virt += (1 << 16); + phys += block << 15; + size -= block; + + while (block) { + pgt = dev_priv->vm_vram_pt[virt >> 14]; + pte = virt & 0x3ffe; + + end = pte + block; + if (end > 16384) + end = 16384; + block -= (end - pte); + virt += (end - pte); + + while (pte < end) { + nv_wo32(dev, pgt, pte++, offset_l); + nv_wo32(dev, pgt, pte++, offset_h); + } + } } dev_priv->engine.instmem.finish_access(dev); From 76befb8c30cebe2af83fa346bdaf75b430893511 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 20 Feb 2010 08:06:36 +1000 Subject: [PATCH 573/640] drm/nv50: fix instmem binding on IGPs to point at stolen system memory This also modifies the unused PRAMIN PT entries to be all zeroes, can't really recall why I used 9/0 initially, just that it didn't work for some reason. It was likely masking a bug elsewhere that's since been fixed. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drv.h | 1 + drivers/gpu/drm/nouveau/nv50_instmem.c | 58 ++++++++++++++++++-------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 5445cefdd03e..1c15ef37b71c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -583,6 +583,7 @@ struct drm_nouveau_private { uint64_t vm_end; struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR]; int vm_vram_pt_nr; + uint64_t vram_sys_base; /* the mtrr covering the FB */ int fb_mtrr; diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c index 94400f777e7f..f0dc4e36ef05 100644 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c @@ -76,6 +76,11 @@ nv50_instmem_init(struct drm_device *dev) for (i = 0x1700; i <= 0x1710; i += 4) priv->save1700[(i-0x1700)/4] = nv_rd32(dev, i); + if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac) + dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10) << 12; + else + dev_priv->vram_sys_base = 0; + /* Reserve the last MiB of VRAM, we should probably try to avoid * setting up the below tables over the top of the VBIOS image at * some point. @@ -172,16 +177,28 @@ nv50_instmem_init(struct drm_device *dev) * We map the entire fake channel into the start of the PRAMIN BAR */ ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pt_size, 0x1000, - 0, &priv->pramin_pt); + 0, &priv->pramin_pt); if (ret) return ret; - for (i = 0, v = c_offset; i < pt_size; i += 8, v += 0x1000) { - if (v < (c_offset + c_size)) - BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v | 1); - else - BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000009); + v = c_offset | 1; + if (dev_priv->vram_sys_base) { + v += dev_priv->vram_sys_base; + v |= 0x30; + } + + i = 0; + while (v < dev_priv->vram_sys_base + c_offset + c_size) { + BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v); BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000); + v += 0x1000; + i += 8; + } + + while (i < pt_size) { + BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000000); + BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000); + i += 8; } BAR0_WI32(chan->vm_pd, 0x00, priv->pramin_pt->instance | 0x63); @@ -416,7 +433,9 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv; - uint32_t pte, pte_end, vram; + struct nouveau_gpuobj *pramin_pt = priv->pramin_pt->gpuobj; + uint32_t pte, pte_end; + uint64_t vram; if (!gpuobj->im_backing || !gpuobj->im_pramin || gpuobj->im_bound) return -EINVAL; @@ -424,20 +443,24 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj) NV_DEBUG(dev, "st=0x%0llx sz=0x%0llx\n", gpuobj->im_pramin->start, gpuobj->im_pramin->size); - pte = (gpuobj->im_pramin->start >> 12) << 3; - pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte; + pte = (gpuobj->im_pramin->start >> 12) << 1; + pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte; vram = gpuobj->im_backing_start; NV_DEBUG(dev, "pramin=0x%llx, pte=%d, pte_end=%d\n", gpuobj->im_pramin->start, pte, pte_end); NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start); + vram |= 1; + if (dev_priv->vram_sys_base) { + vram += dev_priv->vram_sys_base; + vram |= 0x30; + } + dev_priv->engine.instmem.prepare_access(dev, true); while (pte < pte_end) { - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, vram | 1); - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000); - - pte += 8; + nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram)); + nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram)); vram += NV50_INSTMEM_PAGE_SIZE; } dev_priv->engine.instmem.finish_access(dev); @@ -470,14 +493,13 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj) if (gpuobj->im_bound == 0) return -EINVAL; - pte = (gpuobj->im_pramin->start >> 12) << 3; - pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte; + pte = (gpuobj->im_pramin->start >> 12) << 1; + pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte; dev_priv->engine.instmem.prepare_access(dev, true); while (pte < pte_end) { - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, 0x00000009); - nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000); - pte += 8; + nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000); + nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000); } dev_priv->engine.instmem.finish_access(dev); From 6c42966768b0254f465a8f451333795283f53d22 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 20 Feb 2010 08:10:11 +1000 Subject: [PATCH 574/640] drm/nv50: fix vram ptes on IGPs to point at stolen system memory Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_mem.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 134fedbb7669..2dc09dbd817d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -291,7 +291,13 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1; size = (size >> 16) << 1; - phys |= ((uint64_t)flags << 32) | 1; + + phys |= ((uint64_t)flags << 32); + phys |= 1; + if (dev_priv->vram_sys_base) { + phys += dev_priv->vram_sys_base; + phys |= 0x30; + } dev_priv->engine.instmem.prepare_access(dev, true); while (size) { From 5a2d41961dd6815b874b5c0afec0ac96cd90eea4 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 22 Feb 2010 12:44:14 -0800 Subject: [PATCH 575/640] memcg: fix oom killing a child process in an other cgroup Presently the oom-killer is memcg aware and it finds the worst process from processes under memcg(s) in oom. Then, it kills victim's child first. It may kill a child in another cgroup and may not be any help for recovery. And it will break the assumption users have. This patch fixes it. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Minchan Kim Cc: Balbir Singh Reviewed-by: Daisuke Nishimura Acked-by: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f52481b1c1e5..237050478f28 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, list_for_each_entry(c, &p->children, sibling) { if (c->mm == p->mm) continue; + if (mem && !task_in_mem_cgroup(c, mem)) + continue; if (!oom_kill_task(c)) return 0; } From 701188374b6f1ef9cf7e4dce4a2e69ef4c0012ac Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 22 Feb 2010 12:44:16 -0800 Subject: [PATCH 576/640] kernel/sys.c: fix missing rcu protection for sys_getpriority() find_task_by_vpid() is not safe without rcu_read_lock(). 2.6.33-rc7 got RCU protection for sys_setpriority() but missed it for sys_getpriority(). Signed-off-by: Tetsuo Handa Cc: Oleg Nesterov Cc: "Paul E. McKenney" Acked-by: Serge Hallyn Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index 26a6b73a6b85..18bde979f346 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) if (which > PRIO_USER || which < PRIO_PROCESS) return -EINVAL; + rcu_read_lock(); read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: @@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) } out_unlock: read_unlock(&tasklist_lock); + rcu_read_unlock(); return retval; } From d2e7276b6b5e4bc2148891a056d5862c5314342d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2010 12:44:19 -0800 Subject: [PATCH 577/640] idr: fix a critical misallocation bug, take#2 This is retry of reverted 859ddf09743a8cc680af33f7259ccd0fd36bfe9d ("idr: fix a critical misallocation bug") which contained two bugs. * pa[idp->layers] should be cleared even if it's not used by sub_alloc() because it's used by mark idr_mark_full(). * The original condition check also assigned pa[l] to p which the new code didn't do thus leaving p pointing at the wrong layer. Both problems have been fixed and the idr code has received good amount testing using userland testing setup where simple bitmap allocator is run parallel to verify the result of idr allocation. The bug this patch fixes is caused by sub_alloc() optimization path bypassing out-of-room condition check and restarting allocation loop with starting value higher than maximum allowed value. For detailed description, please read commit message of 859ddf09. Signed-off-by: Tejun Heo Based-on-patch-from: Eric Paris Reported-by: Eric Paris Tested-by: Stefan Lippers-Hollmann Tested-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/idr.c b/lib/idr.c index 1cac726c44bc..0dc782216d4b 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -156,10 +156,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; /* if already at the top layer, we need to grow */ - if (!(p = pa[l])) { + if (id >= 1 << (idp->layers * IDR_BITS)) { *starting_id = id; return IDR_NEED_TO_GROW; } + p = pa[l]; + BUG_ON(!p); /* If we need to go up one layer, continue the * loop; otherwise, restart from the top. From 115079aad949cba31755eb4e2576edba7fddfdbc Mon Sep 17 00:00:00 2001 From: Jens Rottmann Date: Mon, 22 Feb 2010 12:44:20 -0800 Subject: [PATCH 578/640] geode-mfgpt: restore previous behavior for selecting IRQ geode-mfgpt: restore previous behavior for selecting IRQ The MFGPT IRQ used to be, in order of decreasing priority, * IRQ supplied by the user as a boot-time parameter, * IRQ previously set by the BIOS or another driver, * default IRQ given at compile time. Return to this behavior, which got broken when splitting the MFGPT/clocksource driver for 2.6.33-rc1. Signed-off-by: Jens Rottmann Acked-by: Andres Salomon Cc: Jordan Crouse Cc: Ingo Molnar Cc: john stultz Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/clocksource/cs5535-clockevt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c index 27d20fac19d1..b314a999aabe 100644 --- a/drivers/clocksource/cs5535-clockevt.c +++ b/drivers/clocksource/cs5535-clockevt.c @@ -21,7 +21,7 @@ #define DRV_NAME "cs5535-clockevt" -static int timer_irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ; +static int timer_irq; module_param_named(irq, timer_irq, int, 0644); MODULE_PARM_DESC(irq, "Which IRQ to use for the clock source MFGPT ticks."); From 89f3f2199084a160a3a45fa6d9af235696321758 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 22 Feb 2010 12:44:22 -0800 Subject: [PATCH 579/640] efifb: fix framebuffer handoff Commit 4410f3910947dcea8672280b3adecd53cec4e85e ("fbdev: add support for handoff from firmware to hw framebuffers") didn't add fb_destroy operation to efifb. Fix it and change aperture_size to match size passed to request_mem_region. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=15151 Signed-off-by: Marcin Slusarz Reported-by: Alex Zhavnerchik Tested-by: Alex Zhavnerchik Acked-by: Peter Jones Cc: Huang Ying Cc: Dave Airlie Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/efifb.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c index eb12182b2059..d25df51bb0d2 100644 --- a/drivers/video/efifb.c +++ b/drivers/video/efifb.c @@ -161,8 +161,17 @@ static int efifb_setcolreg(unsigned regno, unsigned red, unsigned green, return 0; } +static void efifb_destroy(struct fb_info *info) +{ + if (info->screen_base) + iounmap(info->screen_base); + release_mem_region(info->aperture_base, info->aperture_size); + framebuffer_release(info); +} + static struct fb_ops efifb_ops = { .owner = THIS_MODULE, + .fb_destroy = efifb_destroy, .fb_setcolreg = efifb_setcolreg, .fb_fillrect = cfb_fillrect, .fb_copyarea = cfb_copyarea, @@ -281,7 +290,7 @@ static int __init efifb_probe(struct platform_device *dev) info->par = NULL; info->aperture_base = efifb_fix.smem_start; - info->aperture_size = size_total; + info->aperture_size = size_remap; info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len); if (!info->screen_base) { From a17e18790a8c47113a73139d54a375dc9ccd8f08 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 22 Feb 2010 12:44:24 -0800 Subject: [PATCH 580/640] fs/exec.c: fix initial stack reservation 803bf5ec259941936262d10ecc84511b76a20921 ("fs/exec.c: restrict initial stack space expansion to rlimit") attempts to limit the initial stack to 20*PAGE_SIZE. Unfortunately, in attempting ensure the stack is not reduced in size, we ended up not changing the stack at all. This size reduction check is not necessary as the expand_stack call does this already. This caused a regression in UML resulting in most guest processes being killed. Signed-off-by: Michael Neuling Reviewed-by: KOSAKI Motohiro Acked-by: WANG Cong Cc: Anton Blanchard Cc: Oleg Nesterov Cc: James Morris Cc: Serge Hallyn Cc: Benjamin Herrenschmidt Cc: Jouni Malinen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index e95c692ef0e4..cce6bbdbdbb1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -637,7 +637,6 @@ int setup_arg_pages(struct linux_binprm *bprm, * will align it up. */ rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; - rlim_stack = min(rlim_stack, stack_size); #ifdef CONFIG_STACK_GROWSUP if (stack_size + stack_expand > rlim_stack) stack_base = vma->vm_start + rlim_stack; From 4e4ddd47774313accc86b233d6ca2c6a9037a671 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Sun, 21 Feb 2010 14:54:55 +0000 Subject: [PATCH 581/640] drm/vmwgfx: Fix queries if no dma buffer thrashing is occuring. Intercept query commands and apply relocations to their guest pointers. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 108 ++++++++++++++++++++---- 1 file changed, 92 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index d69caf92ffe7..0897359b3e4e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -182,25 +182,19 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv, return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid); } -static int vmw_cmd_dma(struct vmw_private *dev_priv, - struct vmw_sw_context *sw_context, - SVGA3dCmdHeader *header) +static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGAGuestPtr *ptr, + struct vmw_dma_buffer **vmw_bo_p) { - uint32_t handle; struct vmw_dma_buffer *vmw_bo = NULL; struct ttm_buffer_object *bo; - struct vmw_surface *srf = NULL; - struct vmw_dma_cmd { - SVGA3dCmdHeader header; - SVGA3dCmdSurfaceDMA dma; - } *cmd; + uint32_t handle = ptr->gmrId; struct vmw_relocation *reloc; - int ret; uint32_t cur_validate_node; struct ttm_validate_buffer *val_buf; + int ret; - cmd = container_of(header, struct vmw_dma_cmd, header); - handle = cmd->dma.guest.ptr.gmrId; ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use GMR region.\n"); @@ -209,14 +203,14 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, bo = &vmw_bo->base; if (unlikely(sw_context->cur_reloc >= VMWGFX_MAX_RELOCATIONS)) { - DRM_ERROR("Max number of DMA commands per submission" + DRM_ERROR("Max number relocations per submission" " exceeded\n"); ret = -EINVAL; goto out_no_reloc; } reloc = &sw_context->relocs[sw_context->cur_reloc++]; - reloc->location = &cmd->dma.guest.ptr; + reloc->location = ptr; cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf); if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) { @@ -234,7 +228,89 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, list_add_tail(&val_buf->head, &sw_context->validate_nodes); ++sw_context->cur_val_buf; } + *vmw_bo_p = vmw_bo; + return 0; +out_no_reloc: + vmw_dmabuf_unreference(&vmw_bo); + vmw_bo_p = NULL; + return ret; +} + +static int vmw_cmd_end_query(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_dma_buffer *vmw_bo; + struct vmw_query_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdEndQuery q; + } *cmd; + int ret; + + cmd = container_of(header, struct vmw_query_cmd, header); + ret = vmw_cmd_cid_check(dev_priv, sw_context, header); + if (unlikely(ret != 0)) + return ret; + + ret = vmw_translate_guest_ptr(dev_priv, sw_context, + &cmd->q.guestResult, + &vmw_bo); + if (unlikely(ret != 0)) + return ret; + + vmw_dmabuf_unreference(&vmw_bo); + return 0; +} + +static int vmw_cmd_wait_query(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_dma_buffer *vmw_bo; + struct vmw_query_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdWaitForQuery q; + } *cmd; + int ret; + + cmd = container_of(header, struct vmw_query_cmd, header); + ret = vmw_cmd_cid_check(dev_priv, sw_context, header); + if (unlikely(ret != 0)) + return ret; + + ret = vmw_translate_guest_ptr(dev_priv, sw_context, + &cmd->q.guestResult, + &vmw_bo); + if (unlikely(ret != 0)) + return ret; + + vmw_dmabuf_unreference(&vmw_bo); + return 0; +} + + +static int vmw_cmd_dma(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct vmw_dma_buffer *vmw_bo = NULL; + struct ttm_buffer_object *bo; + struct vmw_surface *srf = NULL; + struct vmw_dma_cmd { + SVGA3dCmdHeader header; + SVGA3dCmdSurfaceDMA dma; + } *cmd; + int ret; + + cmd = container_of(header, struct vmw_dma_cmd, header); + ret = vmw_translate_guest_ptr(dev_priv, sw_context, + &cmd->dma.guest.ptr, + &vmw_bo); + if (unlikely(ret != 0)) + return ret; + + bo = &vmw_bo->base; ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile, cmd->dma.host.sid, &srf); if (ret) { @@ -379,8 +455,8 @@ static vmw_cmd_func vmw_cmd_funcs[SVGA_3D_CMD_MAX] = { VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw), VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check), VMW_CMD_DEF(SVGA_3D_CMD_BEGIN_QUERY, &vmw_cmd_cid_check), - VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_cid_check), - VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_cid_check), + VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_end_query), + VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_wait_query), VMW_CMD_DEF(SVGA_3D_CMD_PRESENT_READBACK, &vmw_cmd_ok), VMW_CMD_DEF(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN, &vmw_cmd_blt_surf_screen_check) From 79da0644a8e0838522828f106e4049639eea6baf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Feb 2010 08:40:43 +0100 Subject: [PATCH 582/640] Revert "block: improve queue_should_plug() by looking at IO depths" This reverts commit fb1e75389bd06fd5987e9cda1b4e0305c782f854. "Benjamin S." reports that the patch in question causes a big drop in sequential throughput for him, dropping from 200MB/sec down to only 70MB/sec. Needs to be investigated more fully, for now lets just revert the offending commit. Conflicts: include/linux/blkdev.h Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ++--------- include/linux/blkdev.h | 4 +--- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 718897e6d37f..d1a9a0a64f95 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1147,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) */ static inline bool queue_should_plug(struct request_queue *q) { - return !(blk_queue_nonrot(q) && blk_queue_queuing(q)); + return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); } static int __make_request(struct request_queue *q, struct bio *bio) @@ -1859,15 +1859,8 @@ void blk_dequeue_request(struct request *rq) * and to it is freed is accounted as io that is in progress at * the driver side. */ - if (blk_account_rq(rq)) { + if (blk_account_rq(rq)) q->in_flight[rq_is_sync(rq)]++; - /* - * Mark this device as supporting hardware queuing, if - * we have more IOs in flight than 4. - */ - if (!blk_queue_queuing(q) && queue_in_flight(q) > 4) - set_bit(QUEUE_FLAG_CQ, &q->queue_flags); - } } /** diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c8018977efa..1896e868854f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -461,8 +461,7 @@ struct request_queue #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ -#define QUEUE_FLAG_CQ 16 /* hardware does queuing */ -#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ +#define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -586,7 +585,6 @@ enum { #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) -#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) From b5abb028e214cca68f4231d4f3bc0847ddbc986e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 19 Feb 2010 17:54:53 +0000 Subject: [PATCH 583/640] e1000: Fix DMA mapping error handling on RX Check for error return from pci_map_single/pci_map_page and clean up. With this and the previous patch the driver was able to handle a significant percentage of errors (I set the fault injection rate to 10% and could still download large files at a reasonable speed). Signed-off-by: Anton Blanchard Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_main.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index d29bb532eccf..765543663a4f 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -4006,11 +4006,21 @@ check_page: } } - if (!buffer_info->dma) + if (!buffer_info->dma) { buffer_info->dma = pci_map_page(pdev, buffer_info->page, 0, buffer_info->length, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(pdev, buffer_info->dma)) { + put_page(buffer_info->page); + dev_kfree_skb(skb); + buffer_info->page = NULL; + buffer_info->skb = NULL; + buffer_info->dma = 0; + adapter->alloc_rx_buff_failed++; + break; /* while !buffer_info->skb */ + } + } rx_desc = E1000_RX_DESC(*rx_ring, i); rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); @@ -4101,6 +4111,13 @@ map_skb: skb->data, buffer_info->length, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(pdev, buffer_info->dma)) { + dev_kfree_skb(skb); + buffer_info->skb = NULL; + buffer_info->dma = 0; + adapter->alloc_rx_buff_failed++; + break; /* while !buffer_info->skb */ + } /* * XXX if it was allocated cleanly it will never map to a From f5ca8502f70ccc77008b7bee671f5301995240a4 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 22 Feb 2010 22:34:54 +0000 Subject: [PATCH 584/640] MAINTAINERS: update mv643xx_eth maintenance status I am no longer with Marvell. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 412eff60c33d..318d2e417168 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3490,9 +3490,9 @@ S: Maintained F: drivers/net/wireless/libertas/ MARVELL MV643XX ETHERNET DRIVER -M: Lennert Buytenhek +M: Lennert Buytenhek L: netdev@vger.kernel.org -S: Supported +S: Maintained F: drivers/net/mv643xx_eth.* F: include/linux/mv643xx.h From e79dc48431e7731f5bb6bab8f6b499fe03802ca0 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 22 Feb 2010 12:27:21 +0000 Subject: [PATCH 585/640] IPv6: better document max_addresses parameter Andrew Morton wrote: >> >From ip-sysctl.txt file in kernel documentation I can see following description >> for max_addresses: >> max_addresses - INTEGER >> Number of maximum addresses per interface. 0 disables limitation. >> It is recommended not set too large value (or 0) because it would >> be too easy way to crash kernel to allow to create too much of >> autoconfigured addresses. ^^^^^^^^^^^^^^ >> If this parameter applies only for auto-configured IP addressed, please state >> it more clearly in docs or rename the parameter to show that it refers to >> auto-configuration. It did mention autoconfigured in the text, but the below makes it more obvious. More clearly document IPv6 max_addresses parameter. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 006b39dec87d..e87f3cdc8a6a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1074,10 +1074,10 @@ regen_max_retry - INTEGER Default: 5 max_addresses - INTEGER - Number of maximum addresses per interface. 0 disables limitation. - It is recommended not set too large value (or 0) because it would - be too easy way to crash kernel to allow to create too much of - autoconfigured addresses. + Maximum number of autoconfigured addresses per interface. Setting + to zero disables the limitation. It is not recommended to set this + value too large (or to zero) because it would be an easy way to + crash the kernel by allowing too many addresses to be created. Default: 16 disable_ipv6 - BOOLEAN From cac43a1b7b091b17113502e4128dcb0ff7e3503d Mon Sep 17 00:00:00 2001 From: Torgny Johansson Date: Fri, 19 Feb 2010 01:59:15 +0000 Subject: [PATCH 586/640] cdc_ether: new PID for Ericsson C3607w to the whitelist (resubmit) This patch adds a new vid/pid to the cdc_ether whitelist. Device added: - Ericsson Mobile Broadband variant C3607w Signed-off-by: Torgny Johansson -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 4f27f022fbf7..5f3b9eaeb04f 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -583,6 +583,11 @@ static const struct usb_device_id products [] = { USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1049, USB_CLASS_COMM, USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &mbm_info, +}, { + /* Ericsson C3607w ver 2 */ + USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x190b, USB_CLASS_COMM, + USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long) &mbm_info, }, { /* Toshiba F3507g */ USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x130b, USB_CLASS_COMM, From 662a96bd6f020782dfbdc0d0bd177c7dbb556687 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 19 Feb 2010 05:13:58 +0000 Subject: [PATCH 587/640] tc35815: Remove a wrong netif_wake_queue() call which triggers BUG_ON The netif_wake_queue() is called correctly (i.e. only on !txfull condition) from txdone routine. So Unconditional call to the netif_wake_queue() here is wrong. This might cause calling of start_xmit routine on txfull state and trigger BUG_ON. This bug does not happen when NAPI disabled. After txdone there must be at least one free tx slot. But with NAPI, this is not true anymore and the BUG_ON can hits on heavy load. In this driver NAPI was enabled on 2.6.33-rc1 so this is regression from 2.6.32 kernel. Reported-by: Ralf Roesch Signed-off-by: Atsushi Nemoto Signed-off-by: David S. Miller --- drivers/net/tc35815.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c index 75a669d48e5e..d71c1976072e 100644 --- a/drivers/net/tc35815.c +++ b/drivers/net/tc35815.c @@ -1437,7 +1437,6 @@ static int tc35815_do_interrupt(struct net_device *dev, u32 status, int limit) /* Transmit complete. */ lp->lstats.tx_ints++; tc35815_txdone(dev); - netif_wake_queue(dev); if (ret < 0) ret = 0; } From f526d68b6ce9ba7a2bd94e663e240a022524c58a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:27:52 -0600 Subject: [PATCH 588/640] perf/scripts: Fix supported language listing option 'perf trace -s list' prints a list of the supported scripting languages. One problem with it is that it falls through and prints the trace as well. The use of 'list' for this also makes it easy to confuse with 'perf trace -l', used for listing available scripts. So change 'perf trace -s list' to 'perf trace -s lang' and fixes the fall-through problem. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- tools/perf/Documentation/perf-trace.txt | 4 +++- tools/perf/builtin-trace.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 60e5900da483..c00a76fcb8d6 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -45,9 +45,11 @@ OPTIONS --list=:: Display a list of available trace scripts. --s:: +-s ['lang']:: --script=:: Process trace data with the given script ([lang]:script[.ext]). + If the string 'lang' is specified in place of a script name, a + list of supported languages will be displayed instead. -g:: --gen-script=:: diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0b65779e3c10..d5d20c34e221 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -219,9 +219,9 @@ static int parse_scriptname(const struct option *opt __used, const char *script, *ext; int len; - if (strcmp(str, "list") == 0) { + if (strcmp(str, "lang") == 0) { list_available_languages(); - return 0; + exit(0); } script = strchr(str, ':'); From e26207a3819684e9b4450a2d30bdd065fa92d9c7 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:27:53 -0600 Subject: [PATCH 589/640] perf/scripts: Fix bug in Util.pm Fix bogus calculation. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-3-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm index 052f132ced24..f869c48dc9b0 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm @@ -44,7 +44,7 @@ sub nsecs_secs { sub nsecs_nsecs { my ($nsecs) = @_; - return $nsecs - nsecs_secs($nsecs); + return $nsecs % $NSECS_PER_SEC; } sub nsecs_str { From 7397d80ddde8eef3b1dce6c29e0c53bd322ef824 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:27:54 -0600 Subject: [PATCH 590/640] perf/scripts: Move common code out of Perl-specific files This stuff is needed by all scripting engines; move it from the Perl engine source to a more common place. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-4-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- .../scripts/perl/Perf-Trace-Util/Context.c | 5 ++-- .../scripts/perl/Perf-Trace-Util/Context.xs | 3 ++- tools/perf/util/trace-event-parse.c | 15 +++++++++++ tools/perf/util/trace-event-perl.c | 27 ------------------- tools/perf/util/trace-event-perl.h | 8 ------ tools/perf/util/trace-event.h | 9 ++++++- 6 files changed, 28 insertions(+), 39 deletions(-) diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c index af78d9a52a7d..01a64ad693f2 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -31,13 +31,14 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" -#include "../../../util/trace-event-perl.h" +#include "../../../perf.h" +#include "../../../util/trace-event.h" #ifndef PERL_UNUSED_VAR # define PERL_UNUSED_VAR(var) if (0) var = var #endif -#line 41 "Context.c" +#line 42 "Context.c" XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */ XS(XS_Perf__Trace__Context_common_pc) diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs index fb78006c165e..549cf0467d30 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs @@ -22,7 +22,8 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" -#include "../../../util/trace-event-perl.h" +#include "../../../perf.h" +#include "../../../util/trace-event.h" MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context PROTOTYPES: ENABLE diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c4b3cb8a02b1..9b3c20f42f98 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -3286,3 +3286,18 @@ void parse_set_info(int nr_cpus, int long_sz) cpus = nr_cpus; long_size = long_sz; } + +int common_pc(struct scripting_context *context) +{ + return parse_common_pc(context->event_data); +} + +int common_flags(struct scripting_context *context) +{ + return parse_common_flags(context->event_data); +} + +int common_lock_depth(struct scripting_context *context) +{ + return parse_common_lock_depth(context->event_data); +} diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c index 6d6d76b8a21e..5b49df067df0 100644 --- a/tools/perf/util/trace-event-perl.c +++ b/tools/perf/util/trace-event-perl.c @@ -239,33 +239,6 @@ static inline struct event *find_cache_event(int type) return event; } -int common_pc(struct scripting_context *context) -{ - int pc; - - pc = parse_common_pc(context->event_data); - - return pc; -} - -int common_flags(struct scripting_context *context) -{ - int flags; - - flags = parse_common_flags(context->event_data); - - return flags; -} - -int common_lock_depth(struct scripting_context *context) -{ - int lock_depth; - - lock_depth = parse_common_lock_depth(context->event_data); - - return lock_depth; -} - static void perl_process_event(int cpu, void *data, int size __unused, unsigned long long nsecs, char *comm) diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h index e88fb26137bb..01efcc9564fb 100644 --- a/tools/perf/util/trace-event-perl.h +++ b/tools/perf/util/trace-event-perl.h @@ -44,12 +44,4 @@ void boot_DynaLoader(pTHX_ CV *cv); typedef PerlInterpreter * INTERP; #endif -struct scripting_context { - void *event_data; -}; - -int common_pc(struct scripting_context *context); -int common_flags(struct scripting_context *context); -int common_lock_depth(struct scripting_context *context); - #endif /* __PERF_TRACE_EVENT_PERL_H */ diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 6ad405620c9b..aaf2da2d21e5 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -279,7 +279,14 @@ struct scripting_ops { int script_spec_register(const char *spec, struct scripting_ops *ops); -extern struct scripting_ops perl_scripting_ops; void setup_perl_scripting(void); +struct scripting_context { + void *event_data; +}; + +int common_pc(struct scripting_context *context); +int common_flags(struct scripting_context *context); +int common_lock_depth(struct scripting_context *context); + #endif /* __PERF_TRACE_EVENTS_H */ From 82d156cd5e817055c63ec50247a425c195f4cb14 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:27:55 -0600 Subject: [PATCH 591/640] perf/scripts: Move Perl scripting files to scripting-engines dir Create a scripting-engines directory to contain scripting engine implementation code, in anticipation of the addition of new scripting support. Also removes trace-event-perl.h. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-5-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- tools/perf/Makefile | 9 +- .../trace-event-perl.c | 88 ++------------- tools/perf/util/trace-event-perl.h | 47 -------- tools/perf/util/trace-event-scripting.c | 106 ++++++++++++++++++ 4 files changed, 122 insertions(+), 128 deletions(-) rename tools/perf/util/{ => scripting-engines}/trace-event-perl.c (87%) delete mode 100644 tools/perf/util/trace-event-perl.h create mode 100644 tools/perf/util/trace-event-scripting.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3a5fb36ccc97..0a3c0c8b3fc0 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -385,7 +385,6 @@ LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h LIB_H += util/trace-event.h -LIB_H += util/trace-event-perl.h LIB_H += util/probe-finder.h LIB_H += util/probe-event.h @@ -428,7 +427,7 @@ LIB_OBJS += util/thread.o LIB_OBJS += util/trace-event-parse.o LIB_OBJS += util/trace-event-read.o LIB_OBJS += util/trace-event-info.o -LIB_OBJS += util/trace-event-perl.o +LIB_OBJS += util/trace-event-scripting.o LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o @@ -519,6 +518,7 @@ ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; e BASIC_CFLAGS += -DNO_LIBPERL else ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) + LIB_OBJS += util/scripting-engines/trace-event-perl.o LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o endif @@ -893,8 +893,8 @@ util/hweight.o: ../../lib/hweight.c PERF-CFLAGS util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< -util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS - $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< +util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< @@ -1012,6 +1012,7 @@ install: all $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + ifdef BUILT_INS $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c similarity index 87% rename from tools/perf/util/trace-event-perl.c rename to tools/perf/util/scripting-engines/trace-event-perl.c index 5b49df067df0..5376378e0cfc 100644 --- a/tools/perf/util/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -25,10 +25,16 @@ #include #include -#include "../perf.h" -#include "util.h" -#include "trace-event.h" -#include "trace-event-perl.h" +#include "../../perf.h" +#include "../util.h" +#include "../trace-event.h" + +#include +#include + +void boot_Perf__Trace__Context(pTHX_ CV *cv); +void boot_DynaLoader(pTHX_ CV *cv); +typedef PerlInterpreter * INTERP; void xs_init(pTHX); @@ -49,7 +55,7 @@ INTERP my_perl; struct event *events[FTRACE_MAX_EVENT]; -static struct scripting_context *scripting_context; +extern struct scripting_context *scripting_context; static char *cur_field_name; static int zero_flag_atom; @@ -560,75 +566,3 @@ struct scripting_ops perl_scripting_ops = { .process_event = perl_process_event, .generate_script = perl_generate_script, }; - -static void print_unsupported_msg(void) -{ - fprintf(stderr, "Perl scripting not supported." - " Install libperl and rebuild perf to enable it.\n" - "For example:\n # apt-get install libperl-dev (ubuntu)" - "\n # yum install perl-ExtUtils-Embed (Fedora)" - "\n etc.\n"); -} - -static int perl_start_script_unsupported(const char *script __unused, - int argc __unused, - const char **argv __unused) -{ - print_unsupported_msg(); - - return -1; -} - -static int perl_stop_script_unsupported(void) -{ - return 0; -} - -static void perl_process_event_unsupported(int cpu __unused, - void *data __unused, - int size __unused, - unsigned long long nsecs __unused, - char *comm __unused) -{ -} - -static int perl_generate_script_unsupported(const char *outfile __unused) -{ - print_unsupported_msg(); - - return -1; -} - -struct scripting_ops perl_scripting_unsupported_ops = { - .name = "Perl", - .start_script = perl_start_script_unsupported, - .stop_script = perl_stop_script_unsupported, - .process_event = perl_process_event_unsupported, - .generate_script = perl_generate_script_unsupported, -}; - -static void register_perl_scripting(struct scripting_ops *scripting_ops) -{ - int err; - err = script_spec_register("Perl", scripting_ops); - if (err) - die("error registering Perl script extension"); - - err = script_spec_register("pl", scripting_ops); - if (err) - die("error registering pl script extension"); - - scripting_context = malloc(sizeof(struct scripting_context)); -} - -#ifdef NO_LIBPERL -void setup_perl_scripting(void) -{ - register_perl_scripting(&perl_scripting_unsupported_ops); -} -#else -void setup_perl_scripting(void) -{ - register_perl_scripting(&perl_scripting_ops); -} -#endif diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h deleted file mode 100644 index 01efcc9564fb..000000000000 --- a/tools/perf/util/trace-event-perl.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef __PERF_TRACE_EVENT_PERL_H -#define __PERF_TRACE_EVENT_PERL_H -#ifdef NO_LIBPERL -typedef int INTERP; -#define dSP -#define ENTER -#define SAVETMPS -#define PUTBACK -#define SPAGAIN -#define FREETMPS -#define LEAVE -#define SP -#define ERRSV -#define G_SCALAR (0) -#define G_DISCARD (0) -#define G_NOARGS (0) -#define PUSHMARK(a) -#define SvTRUE(a) (0) -#define XPUSHs(s) -#define sv_2mortal(a) -#define newSVpv(a,b) -#define newSVuv(a) -#define newSViv(a) -#define get_cv(a,b) (0) -#define call_pv(a,b) (0) -#define perl_alloc() (0) -#define perl_construct(a) (0) -#define perl_parse(a,b,c,d,e) (0) -#define perl_run(a) (0) -#define perl_destruct(a) (0) -#define perl_free(a) (0) -#define pTHX void -#define CV void -#define dXSUB_SYS -#define pTHX_ -static inline void newXS(const char *a, void *b, const char *c) {} -static void boot_Perf__Trace__Context(pTHX_ CV *cv) {} -static void boot_DynaLoader(pTHX_ CV *cv) {} -#else -#include -#include -void boot_Perf__Trace__Context(pTHX_ CV *cv); -void boot_DynaLoader(pTHX_ CV *cv); -typedef PerlInterpreter * INTERP; -#endif - -#endif /* __PERF_TRACE_EVENT_PERL_H */ diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c new file mode 100644 index 000000000000..9e371965c034 --- /dev/null +++ b/tools/perf/util/trace-event-scripting.c @@ -0,0 +1,106 @@ +/* + * trace-event-scripting. Scripting engine common and initialization code. + * + * Copyright (C) 2009-2010 Tom Zanussi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +#include "../perf.h" +#include "util.h" +#include "trace-event.h" + +struct scripting_context *scripting_context; + +static int stop_script_unsupported(void) +{ + return 0; +} + +static void process_event_unsupported(int cpu __unused, + void *data __unused, + int size __unused, + unsigned long long nsecs __unused, + char *comm __unused) +{ +} + +static void print_perl_unsupported_msg(void) +{ + fprintf(stderr, "Perl scripting not supported." + " Install libperl and rebuild perf to enable it.\n" + "For example:\n # apt-get install libperl-dev (ubuntu)" + "\n # yum install 'perl(ExtUtils::Embed)' (Fedora)" + "\n etc.\n"); +} + +static int perl_start_script_unsupported(const char *script __unused, + int argc __unused, + const char **argv __unused) +{ + print_perl_unsupported_msg(); + + return -1; +} + +static int perl_generate_script_unsupported(const char *outfile __unused) +{ + print_perl_unsupported_msg(); + + return -1; +} + +struct scripting_ops perl_scripting_unsupported_ops = { + .name = "Perl", + .start_script = perl_start_script_unsupported, + .stop_script = stop_script_unsupported, + .process_event = process_event_unsupported, + .generate_script = perl_generate_script_unsupported, +}; + +static void register_perl_scripting(struct scripting_ops *scripting_ops) +{ + int err; + err = script_spec_register("Perl", scripting_ops); + if (err) + die("error registering Perl script extension"); + + err = script_spec_register("pl", scripting_ops); + if (err) + die("error registering pl script extension"); + + scripting_context = malloc(sizeof(struct scripting_context)); +} + +#ifdef NO_LIBPERL +void setup_perl_scripting(void) +{ + register_perl_scripting(&perl_scripting_unsupported_ops); +} +#else +struct scripting_ops perl_scripting_ops; + +void setup_perl_scripting(void) +{ + register_perl_scripting(&perl_scripting_ops); +} +#endif From 266fe2f217d1dc9f8041e395c0ab4569a5bad91a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:27:56 -0600 Subject: [PATCH 592/640] perf/scripts: Remove check-perf-trace from listed scripts The check-perf-trace script only checks Perl functionality, and doesn't really need to be listed as as user script anyway. This only removes the '-report' shell script, so although it doesn't appear in the listing, the '-record' shell script and the check perf trace perl script itself is still available and can still be run manually as such: $ libexec/perf-core/scripts/perl/bin/check-perf-trace-record $ perf trace -s libexec/perf-core/scripts/perl/check-perf-trace.pl Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-6-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- tools/perf/scripts/perl/bin/check-perf-trace-record | 5 ----- tools/perf/scripts/perl/bin/check-perf-trace-report | 6 ------ 2 files changed, 11 deletions(-) delete mode 100644 tools/perf/scripts/perl/bin/check-perf-trace-report diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record index c7ec5de2f535..3c1574498942 100644 --- a/tools/perf/scripts/perl/bin/check-perf-trace-record +++ b/tools/perf/scripts/perl/bin/check-perf-trace-record @@ -1,7 +1,2 @@ #!/bin/bash perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry - - - - - diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report deleted file mode 100644 index 7fc4a033dd49..000000000000 --- a/tools/perf/scripts/perl/bin/check-perf-trace-report +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# description: useless but exhaustive test script -perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl - - - From f7624c97b8e5bca49be7854309550bff8ce98c47 Mon Sep 17 00:00:00 2001 From: Hedi Berriche Date: Tue, 23 Feb 2010 23:58:49 +0000 Subject: [PATCH 593/640] [IA64] Fix broken sn2 build Revert the change made to arch/ia64/sn/kernel/setup.c by commit 204fba4aa303ea4a7bb726a539bf4a5b9e3203d0 as it breaks the build. Fixing the build the b94b08081fcecf83fa690d6c5664f6316fe72208 way breaks xpc because genksyms then fails to generate an CRC for per_cpu____sn_cnodeid_to_nasid because of limitations in the generic genksyms code. Signed-off-by: Hedi Berriche Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index ece1bf994499..e456f062f241 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); -DEFINE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid); +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); From c4d49794ff2838038fd9756eae39c39a5a685833 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Tue, 16 Feb 2010 20:25:43 +0000 Subject: [PATCH 594/640] net: bug fix for vlan + gro issue Traffic (tcp) doesnot start on a vlan interface when gro is enabled. Even the tcp handshake was not taking place. This is because, the eth_type_trans call before the netif_receive_skb in napi_gro_finish() resets the skb->dev to napi->dev from the previously set vlan netdev interface. This causes the ip_route_input to drop the incoming packet considering it as a packet coming from a martian source. I could repro this on 2.6.32.7 (stable) and 2.6.33-rc7. With this fix, the traffic starts and the test runs fine on both vlan and non-vlan interfaces. CC: Herbert Xu CC: Patrick McHardy Signed-off-by: Ajit Khaparde Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index be9924f60ec3..ec874218b206 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2761,7 +2761,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, switch (ret) { case GRO_NORMAL: case GRO_HELD: - skb->protocol = eth_type_trans(skb, napi->dev); + skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_HELD) skb_gro_pull(skb, -ETH_HLEN); From 0d670b24729be268eba98b3920b8571f60798d8d Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 15 Feb 2010 10:50:42 +0100 Subject: [PATCH 595/640] microblaze: Fix cache loop function for cache range I create wrong asm code but none test shows that this part of code is wrong. I am not convinces that were good idea to create asm optimized macros for caches. The reason is that there is not optimization with previous code that's why make sense to add old code and do some benchmarking which functions are faster. Signed-off-by: Michal Simek --- arch/microblaze/kernel/cpu/cache.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index d9d63831cc2f..2a56bccce4e0 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -172,16 +172,15 @@ do { \ /* It is used only first parameter for OP - for wic, wdc */ #define CACHE_RANGE_LOOP_1(start, end, line_length, op) \ do { \ - int step = -line_length; \ - int count = end - start; \ - BUG_ON(count <= 0); \ + int volatile temp; \ + BUG_ON(end - start <= 0); \ \ - __asm__ __volatile__ (" 1: addk %0, %0, %1; \ - " #op " %0, r0; \ - bgtid %1, 1b; \ - addk %1, %1, %2; \ - " : : "r" (start), "r" (count), \ - "r" (step) : "memory"); \ + __asm__ __volatile__ (" 1: " #op " %1, r0; \ + cmpu %0, %1, %2; \ + bgtid %0, 1b; \ + addk %1, %1, %3; \ + " : : "r" (temp), "r" (start), "r" (end),\ + "r" (line_length) : "memory"); \ } while (0); static void __flush_icache_range_msr_irq(unsigned long start, unsigned long end) @@ -313,16 +312,6 @@ static void __invalidate_dcache_all_wb(void) pr_debug("%s\n", __func__); CACHE_ALL_LOOP2(cpuinfo.dcache_size, cpuinfo.dcache_line_length, wdc.clear) - -#if 0 - unsigned int i; - - pr_debug("%s\n", __func__); - - /* Just loop through cache size and invalidate it */ - for (i = 0; i < cpuinfo.dcache_size; i += cpuinfo.dcache_line_length) - __invalidate_dcache(0, i); -#endif } static void __invalidate_dcache_range_wb(unsigned long start, From 83b4d17d8841a9a7b8ed02ac99ca92afada154e1 Mon Sep 17 00:00:00 2001 From: "Steven J. Magnani" Date: Mon, 22 Feb 2010 09:25:42 -0600 Subject: [PATCH 596/640] microblaze: Fix out_le32() macro Trailing semicolon causes compilation involving out_le32() to fail. Signed-off-by: Steven J. Magnani Signed-off-by: Michal Simek --- arch/microblaze/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index fc9997b73c09..267c7c779e53 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -217,7 +217,7 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size, * Little endian */ -#define out_le32(a, v) __raw_writel(__cpu_to_le32(v), (a)); +#define out_le32(a, v) __raw_writel(__cpu_to_le32(v), (a)) #define out_le16(a, v) __raw_writew(__cpu_to_le16(v), (a)) #define in_le32(a) __le32_to_cpu(__raw_readl(a)) From 5fd4514bb351b5ecb0da3692fff70741e5ed200c Mon Sep 17 00:00:00 2001 From: Carlos O'Donell Date: Mon, 22 Feb 2010 23:25:59 +0000 Subject: [PATCH 597/640] parisc: Set PCI CLS early in boot. Set the PCI CLS early in the boot process to prevent device failures. In pcibios_set_master use the new pci_cache_line_size instead of a hard-coded value. Signed-off-by: Carlos O'Donell Reviewed-by: Grant Grundler Signed-off-by: Kyle McMartin --- arch/parisc/kernel/pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c index f7064abc3bb6..9e74bfe071dc 100644 --- a/arch/parisc/kernel/pci.c +++ b/arch/parisc/kernel/pci.c @@ -18,7 +18,6 @@ #include #include -#include /* for L1_CACHE_BYTES */ #include #define DEBUG_RESOURCES 0 @@ -123,6 +122,10 @@ static int __init pcibios_init(void) } else { printk(KERN_WARNING "pci_bios != NULL but init() is!\n"); } + + /* Set the CLS for PCI as early as possible. */ + pci_cache_line_size = pci_dfl_cache_line_size; + return 0; } @@ -171,7 +174,7 @@ void pcibios_set_master(struct pci_dev *dev) ** upper byte is PCI_LATENCY_TIMER. */ pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, - (0x80 << 8) | (L1_CACHE_BYTES / sizeof(u32))); + (0x80 << 8) | pci_cache_line_size); } From 60b341b778cc2929df16c0a504c91621b3c6a4ad Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 24 Feb 2010 10:52:17 -0800 Subject: [PATCH 598/640] Linux 2.6.33 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 12b1aa1103ee..1b24895212d8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 33 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* From 7e4b21b84c43bb8a80b916e40718ca4ed1fc52e6 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:27:57 -0600 Subject: [PATCH 599/640] perf/scripts: Add Python scripting engine Add base support for Python scripting to perf trace. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-6-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- tools/perf/Makefile | 21 + tools/perf/builtin-trace.c | 1 + .../scripts/python/Perf-Trace-Util/Context.c | 88 +++ .../Perf-Trace-Util/lib/Perf/Trace/Core.py | 91 +++ .../Perf-Trace-Util/lib/Perf/Trace/Util.py | 25 + .../scripting-engines/trace-event-python.c | 576 ++++++++++++++++++ tools/perf/util/trace-event-scripting.c | 61 ++ tools/perf/util/trace-event.h | 1 + 8 files changed, 864 insertions(+) create mode 100644 tools/perf/scripts/python/Perf-Trace-Util/Context.c create mode 100644 tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py create mode 100644 tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py create mode 100644 tools/perf/util/scripting-engines/trace-event-python.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0a3c0c8b3fc0..14273164db04 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -522,6 +522,19 @@ else LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o endif +ifndef NO_LIBPYTHON +PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` +PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` +endif + +ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o /dev/null $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) + BASIC_CFLAGS += -DNO_LIBPYTHON +else + ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) + LIB_OBJS += util/scripting-engines/trace-event-python.o + LIB_OBJS += scripts/python/Perf-Trace-Util/Context.o +endif + ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else @@ -899,6 +912,12 @@ util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-pe scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< +util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-python.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< + +scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o scripts/python/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< + perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -1012,6 +1031,8 @@ install: all $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' ifdef BUILT_INS $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d5d20c34e221..5db687fc13de 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -44,6 +44,7 @@ static void setup_scripting(void) perf_set_argv_exec_path(perf_exec_path()); setup_perl_scripting(); + setup_python_scripting(); scripting_ops = &default_scripting_ops; } diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c new file mode 100644 index 000000000000..957085dd5d8d --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -0,0 +1,88 @@ +/* + * Context.c. Python interfaces for perf trace. + * + * Copyright (C) 2010 Tom Zanussi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include "../../../perf.h" +#include "../../../util/trace-event.h" + +PyMODINIT_FUNC initperf_trace_context(void); + +static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args) +{ + static struct scripting_context *scripting_context; + PyObject *context; + int retval; + + if (!PyArg_ParseTuple(args, "O", &context)) + return NULL; + + scripting_context = PyCObject_AsVoidPtr(context); + retval = common_pc(scripting_context); + + return Py_BuildValue("i", retval); +} + +static PyObject *perf_trace_context_common_flags(PyObject *self, + PyObject *args) +{ + static struct scripting_context *scripting_context; + PyObject *context; + int retval; + + if (!PyArg_ParseTuple(args, "O", &context)) + return NULL; + + scripting_context = PyCObject_AsVoidPtr(context); + retval = common_flags(scripting_context); + + return Py_BuildValue("i", retval); +} + +static PyObject *perf_trace_context_common_lock_depth(PyObject *self, + PyObject *args) +{ + static struct scripting_context *scripting_context; + PyObject *context; + int retval; + + if (!PyArg_ParseTuple(args, "O", &context)) + return NULL; + + scripting_context = PyCObject_AsVoidPtr(context); + retval = common_lock_depth(scripting_context); + + return Py_BuildValue("i", retval); +} + +static PyMethodDef ContextMethods[] = { + { "common_pc", perf_trace_context_common_pc, METH_VARARGS, + "Get the common preempt count event field value."}, + { "common_flags", perf_trace_context_common_flags, METH_VARARGS, + "Get the common flags event field value."}, + { "common_lock_depth", perf_trace_context_common_lock_depth, + METH_VARARGS, "Get the common lock depth event field value."}, + { NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC initperf_trace_context(void) +{ + (void) Py_InitModule("perf_trace_context", ContextMethods); +} diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py new file mode 100644 index 000000000000..1dc464ee2ca8 --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py @@ -0,0 +1,91 @@ +# Core.py - Python extension for perf trace, core functions +# +# Copyright (C) 2010 by Tom Zanussi +# +# This software may be distributed under the terms of the GNU General +# Public License ("GPL") version 2 as published by the Free Software +# Foundation. + +from collections import defaultdict + +def autodict(): + return defaultdict(autodict) + +flag_fields = autodict() +symbolic_fields = autodict() + +def define_flag_field(event_name, field_name, delim): + flag_fields[event_name][field_name]['delim'] = delim + +def define_flag_value(event_name, field_name, value, field_str): + flag_fields[event_name][field_name]['values'][value] = field_str + +def define_symbolic_field(event_name, field_name): + # nothing to do, really + pass + +def define_symbolic_value(event_name, field_name, value, field_str): + symbolic_fields[event_name][field_name]['values'][value] = field_str + +def flag_str(event_name, field_name, value): + string = "" + + if flag_fields[event_name][field_name]: + print_delim = 0 + keys = flag_fields[event_name][field_name]['values'].keys() + keys.sort() + for idx in keys: + if not value and not idx: + string += flag_fields[event_name][field_name]['values'][idx] + break + if idx and (value & idx) == idx: + if print_delim and flag_fields[event_name][field_name]['delim']: + string += " " + flag_fields[event_name][field_name]['delim'] + " " + string += flag_fields[event_name][field_name]['values'][idx] + print_delim = 1 + value &= ~idx + + return string + +def symbol_str(event_name, field_name, value): + string = "" + + if symbolic_fields[event_name][field_name]: + keys = symbolic_fields[event_name][field_name]['values'].keys() + keys.sort() + for idx in keys: + if not value and not idx: + string = symbolic_fields[event_name][field_name]['values'][idx] + break + if (value == idx): + string = symbolic_fields[event_name][field_name]['values'][idx] + break + + return string + +trace_flags = { 0x00: "NONE", \ + 0x01: "IRQS_OFF", \ + 0x02: "IRQS_NOSUPPORT", \ + 0x04: "NEED_RESCHED", \ + 0x08: "HARDIRQ", \ + 0x10: "SOFTIRQ" } + +def trace_flag_str(value): + string = "" + print_delim = 0 + + keys = trace_flags.keys() + + for idx in keys: + if not value and not idx: + string += "NONE" + break + + if idx and (value & idx) == idx: + if print_delim: + string += " | "; + string += trace_flags[idx] + print_delim = 1 + value &= ~idx + + return string diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py new file mode 100644 index 000000000000..83e91435ed09 --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -0,0 +1,25 @@ +# Util.py - Python extension for perf trace, miscellaneous utility code +# +# Copyright (C) 2010 by Tom Zanussi +# +# This software may be distributed under the terms of the GNU General +# Public License ("GPL") version 2 as published by the Free Software +# Foundation. + +NSECS_PER_SEC = 1000000000 + +def avg(total, n): + return total / n + +def nsecs(secs, nsecs): + return secs * NSECS_PER_SEC + nsecs + +def nsecs_secs(nsecs): + return nsecs / NSECS_PER_SEC + +def nsecs_nsecs(nsecs): + return nsecs % NSECS_PER_SEC + +def nsecs_str(nsecs): + str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)), + return str diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c new file mode 100644 index 000000000000..d402f64f9b46 --- /dev/null +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -0,0 +1,576 @@ +/* + * trace-event-python. Feed trace events to an embedded Python interpreter. + * + * Copyright (C) 2010 Tom Zanussi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +#include +#include +#include +#include +#include + +#include "../../perf.h" +#include "../util.h" +#include "../trace-event.h" + +PyMODINIT_FUNC initperf_trace_context(void); + +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(unsigned short) * 8)) - 1) + +struct event *events[FTRACE_MAX_EVENT]; + +#define MAX_FIELDS 64 +#define N_COMMON_FIELDS 7 + +extern struct scripting_context *scripting_context; + +static char *cur_field_name; +static int zero_flag_atom; + +static PyObject *main_module, *main_dict; + +static void handler_call_die(const char *handler_name) +{ + PyErr_Print(); + Py_FatalError("problem in Python trace event handler"); +} + +static void define_value(enum print_arg_type field_type, + const char *ev_name, + const char *field_name, + const char *field_value, + const char *field_str) +{ + const char *handler_name = "define_flag_value"; + PyObject *handler, *t, *retval; + unsigned long long value; + unsigned n = 0; + + if (field_type == PRINT_SYMBOL) + handler_name = "define_symbolic_value"; + + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + value = eval_flag(field_value); + + PyTuple_SetItem(t, n++, PyString_FromString(ev_name)); + PyTuple_SetItem(t, n++, PyString_FromString(field_name)); + PyTuple_SetItem(t, n++, PyInt_FromLong(value)); + PyTuple_SetItem(t, n++, PyString_FromString(field_str)); + + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + handler = PyDict_GetItemString(main_dict, handler_name); + if (handler && PyCallable_Check(handler)) { + retval = PyObject_CallObject(handler, t); + if (retval == NULL) + handler_call_die(handler_name); + } + + Py_DECREF(t); +} + +static void define_values(enum print_arg_type field_type, + struct print_flag_sym *field, + const char *ev_name, + const char *field_name) +{ + define_value(field_type, ev_name, field_name, field->value, + field->str); + + if (field->next) + define_values(field_type, field->next, ev_name, field_name); +} + +static void define_field(enum print_arg_type field_type, + const char *ev_name, + const char *field_name, + const char *delim) +{ + const char *handler_name = "define_flag_field"; + PyObject *handler, *t, *retval; + unsigned n = 0; + + if (field_type == PRINT_SYMBOL) + handler_name = "define_symbolic_field"; + + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + PyTuple_SetItem(t, n++, PyString_FromString(ev_name)); + PyTuple_SetItem(t, n++, PyString_FromString(field_name)); + if (field_type == PRINT_FLAGS) + PyTuple_SetItem(t, n++, PyString_FromString(delim)); + + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + handler = PyDict_GetItemString(main_dict, handler_name); + if (handler && PyCallable_Check(handler)) { + retval = PyObject_CallObject(handler, t); + if (retval == NULL) + handler_call_die(handler_name); + } + + Py_DECREF(t); +} + +static void define_event_symbols(struct event *event, + const char *ev_name, + struct print_arg *args) +{ + switch (args->type) { + case PRINT_NULL: + break; + case PRINT_ATOM: + define_value(PRINT_FLAGS, ev_name, cur_field_name, "0", + args->atom.atom); + zero_flag_atom = 0; + break; + case PRINT_FIELD: + if (cur_field_name) + free(cur_field_name); + cur_field_name = strdup(args->field.name); + break; + case PRINT_FLAGS: + define_event_symbols(event, ev_name, args->flags.field); + define_field(PRINT_FLAGS, ev_name, cur_field_name, + args->flags.delim); + define_values(PRINT_FLAGS, args->flags.flags, ev_name, + cur_field_name); + break; + case PRINT_SYMBOL: + define_event_symbols(event, ev_name, args->symbol.field); + define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL); + define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name, + cur_field_name); + break; + case PRINT_STRING: + break; + case PRINT_TYPE: + define_event_symbols(event, ev_name, args->typecast.item); + break; + case PRINT_OP: + if (strcmp(args->op.op, ":") == 0) + zero_flag_atom = 1; + define_event_symbols(event, ev_name, args->op.left); + define_event_symbols(event, ev_name, args->op.right); + break; + default: + /* we should warn... */ + return; + } + + if (args->next) + define_event_symbols(event, ev_name, args->next); +} + +static inline struct event *find_cache_event(int type) +{ + static char ev_name[256]; + struct event *event; + + if (events[type]) + return events[type]; + + events[type] = event = trace_find_event(type); + if (!event) + return NULL; + + sprintf(ev_name, "%s__%s", event->system, event->name); + + define_event_symbols(event, ev_name, event->print_fmt.args); + + return event; +} + +static void python_process_event(int cpu, void *data, + int size __unused, + unsigned long long nsecs, char *comm) +{ + PyObject *handler, *retval, *context, *t; + static char handler_name[256]; + struct format_field *field; + unsigned long long val; + unsigned long s, ns; + struct event *event; + unsigned n = 0; + int type; + int pid; + + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + type = trace_parse_common_type(data); + + event = find_cache_event(type); + if (!event) + die("ug! no event found for type %d", type); + + pid = trace_parse_common_pid(data); + + sprintf(handler_name, "%s__%s", event->system, event->name); + + s = nsecs / NSECS_PER_SEC; + ns = nsecs - s * NSECS_PER_SEC; + + scripting_context->event_data = data; + + context = PyCObject_FromVoidPtr(scripting_context, NULL); + + PyTuple_SetItem(t, n++, PyString_FromString(handler_name)); + PyTuple_SetItem(t, n++, + PyCObject_FromVoidPtr(scripting_context, NULL)); + PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); + PyTuple_SetItem(t, n++, PyInt_FromLong(s)); + PyTuple_SetItem(t, n++, PyInt_FromLong(ns)); + PyTuple_SetItem(t, n++, PyInt_FromLong(pid)); + PyTuple_SetItem(t, n++, PyString_FromString(comm)); + + for (field = event->format.fields; field; field = field->next) { + if (field->flags & FIELD_IS_STRING) { + int offset; + if (field->flags & FIELD_IS_DYNAMIC) { + offset = *(int *)(data + field->offset); + offset &= 0xffff; + } else + offset = field->offset; + PyTuple_SetItem(t, n++, + PyString_FromString((char *)data + offset)); + } else { /* FIELD_IS_NUMERIC */ + val = read_size(data + field->offset, field->size); + if (field->flags & FIELD_IS_SIGNED) { + PyTuple_SetItem(t, n++, PyInt_FromLong(val)); + } else { + PyTuple_SetItem(t, n++, PyInt_FromLong(val)); + } + } + } + + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + handler = PyDict_GetItemString(main_dict, handler_name); + if (handler && PyCallable_Check(handler)) { + retval = PyObject_CallObject(handler, t); + if (retval == NULL) + handler_call_die(handler_name); + } else { + handler = PyDict_GetItemString(main_dict, "trace_unhandled"); + if (handler && PyCallable_Check(handler)) { + if (_PyTuple_Resize(&t, N_COMMON_FIELDS) == -1) + Py_FatalError("error resizing Python tuple"); + + retval = PyObject_CallObject(handler, t); + if (retval == NULL) + handler_call_die("trace_unhandled"); + } + } + + Py_DECREF(t); +} + +static int run_start_sub(void) +{ + PyObject *handler, *retval; + int err = 0; + + main_module = PyImport_AddModule("__main__"); + if (main_module == NULL) + return -1; + Py_INCREF(main_module); + + main_dict = PyModule_GetDict(main_module); + if (main_dict == NULL) { + err = -1; + goto error; + } + Py_INCREF(main_dict); + + handler = PyDict_GetItemString(main_dict, "trace_begin"); + if (handler == NULL || !PyCallable_Check(handler)) + goto out; + + retval = PyObject_CallObject(handler, NULL); + if (retval == NULL) + handler_call_die("trace_begin"); + + Py_DECREF(retval); + return err; +error: + Py_XDECREF(main_dict); + Py_XDECREF(main_module); +out: + return err; +} + +/* + * Start trace script + */ +static int python_start_script(const char *script, int argc, const char **argv) +{ + const char **command_line; + char buf[PATH_MAX]; + int i, err = 0; + FILE *fp; + + command_line = malloc((argc + 1) * sizeof(const char *)); + command_line[0] = script; + for (i = 1; i < argc + 1; i++) + command_line[i] = argv[i - 1]; + + Py_Initialize(); + + initperf_trace_context(); + + PySys_SetArgv(argc + 1, (char **)command_line); + + fp = fopen(script, "r"); + if (!fp) { + sprintf(buf, "Can't open python script \"%s\"", script); + perror(buf); + err = -1; + goto error; + } + + err = PyRun_SimpleFile(fp, script); + if (err) { + fprintf(stderr, "Error running python script %s\n", script); + goto error; + } + + err = run_start_sub(); + if (err) { + fprintf(stderr, "Error starting python script %s\n", script); + goto error; + } + + free(command_line); + fprintf(stderr, "perf trace started with Python script %s\n\n", + script); + + return err; +error: + Py_Finalize(); + free(command_line); + + return err; +} + +/* + * Stop trace script + */ +static int python_stop_script(void) +{ + PyObject *handler, *retval; + int err = 0; + + handler = PyDict_GetItemString(main_dict, "trace_end"); + if (handler == NULL || !PyCallable_Check(handler)) + goto out; + + retval = PyObject_CallObject(handler, NULL); + if (retval == NULL) + handler_call_die("trace_end"); + else + Py_DECREF(retval); +out: + Py_XDECREF(main_dict); + Py_XDECREF(main_module); + Py_Finalize(); + + fprintf(stderr, "\nperf trace Python script stopped\n"); + + return err; +} + +static int python_generate_script(const char *outfile) +{ + struct event *event = NULL; + struct format_field *f; + char fname[PATH_MAX]; + int not_first, count; + FILE *ofp; + + sprintf(fname, "%s.py", outfile); + ofp = fopen(fname, "w"); + if (ofp == NULL) { + fprintf(stderr, "couldn't open %s\n", fname); + return -1; + } + fprintf(ofp, "# perf trace event handlers, " + "generated by perf trace -g python\n"); + + fprintf(ofp, "# Licensed under the terms of the GNU GPL" + " License version 2\n\n"); + + fprintf(ofp, "# The common_* event handler fields are the most useful " + "fields common to\n"); + + fprintf(ofp, "# all events. They don't necessarily correspond to " + "the 'common_*' fields\n"); + + fprintf(ofp, "# in the format files. Those fields not available as " + "handler params can\n"); + + fprintf(ofp, "# be retrieved using Python functions of the form " + "common_*(context).\n"); + + fprintf(ofp, "# See the perf-trace-python Documentation for the list " + "of available functions.\n\n"); + + fprintf(ofp, "import os\n"); + fprintf(ofp, "import sys\n\n"); + + fprintf(ofp, "sys.path.append(os.environ['PERF_EXEC_PATH'] + \\\n"); + fprintf(ofp, "\t'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')\n"); + fprintf(ofp, "\nfrom perf_trace_context import *\n"); + fprintf(ofp, "from Core import *\n\n\n"); + + fprintf(ofp, "def trace_begin():\n"); + fprintf(ofp, "\tprint \"in trace_begin\"\n\n"); + + fprintf(ofp, "def trace_end():\n"); + fprintf(ofp, "\tprint \"in trace_end\"\n\n"); + + while ((event = trace_find_next_event(event))) { + fprintf(ofp, "def %s__%s(", event->system, event->name); + fprintf(ofp, "event_name, "); + fprintf(ofp, "context, "); + fprintf(ofp, "common_cpu,\n"); + fprintf(ofp, "\tcommon_secs, "); + fprintf(ofp, "common_nsecs, "); + fprintf(ofp, "common_pid, "); + fprintf(ofp, "common_comm,\n\t"); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + if (++count % 5 == 0) + fprintf(ofp, "\n\t"); + + fprintf(ofp, "%s", f->name); + } + fprintf(ofp, "):\n"); + + fprintf(ofp, "\t\tprint_header(event_name, common_cpu, " + "common_secs, common_nsecs,\n\t\t\t" + "common_pid, common_comm)\n\n"); + + fprintf(ofp, "\t\tprint \""); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + if (count && count % 3 == 0) { + fprintf(ofp, "\" \\\n\t\t\""); + } + count++; + + fprintf(ofp, "%s=", f->name); + if (f->flags & FIELD_IS_STRING || + f->flags & FIELD_IS_FLAG || + f->flags & FIELD_IS_SYMBOLIC) + fprintf(ofp, "%%s"); + else if (f->flags & FIELD_IS_SIGNED) + fprintf(ofp, "%%d"); + else + fprintf(ofp, "%%u"); + } + + fprintf(ofp, "\\n\" %% \\\n\t\t("); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + + if (++count % 5 == 0) + fprintf(ofp, "\n\t\t"); + + if (f->flags & FIELD_IS_FLAG) { + if ((count - 1) % 5 != 0) { + fprintf(ofp, "\n\t\t"); + count = 4; + } + fprintf(ofp, "flag_str(\""); + fprintf(ofp, "%s__%s\", ", event->system, + event->name); + fprintf(ofp, "\"%s\", %s)", f->name, + f->name); + } else if (f->flags & FIELD_IS_SYMBOLIC) { + if ((count - 1) % 5 != 0) { + fprintf(ofp, "\n\t\t"); + count = 4; + } + fprintf(ofp, "symbol_str(\""); + fprintf(ofp, "%s__%s\", ", event->system, + event->name); + fprintf(ofp, "\"%s\", %s)", f->name, + f->name); + } else + fprintf(ofp, "%s", f->name); + } + + fprintf(ofp, "),\n\n"); + } + + fprintf(ofp, "def trace_unhandled(event_name, context, " + "common_cpu, common_secs, common_nsecs,\n\t\t" + "common_pid, common_comm):\n"); + + fprintf(ofp, "\t\tprint_header(event_name, common_cpu, " + "common_secs, common_nsecs,\n\t\tcommon_pid, " + "common_comm)\n\n"); + + fprintf(ofp, "def print_header(" + "event_name, cpu, secs, nsecs, pid, comm):\n" + "\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" + "(event_name, cpu, secs, nsecs, pid, comm),\n"); + + fclose(ofp); + + fprintf(stderr, "generated Python script: %s\n", fname); + + return 0; +} + +struct scripting_ops python_scripting_ops = { + .name = "Python", + .start_script = python_start_script, + .stop_script = python_stop_script, + .process_event = python_process_event, + .generate_script = python_generate_script, +}; diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 9e371965c034..7ea983acfaea 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -44,6 +44,67 @@ static void process_event_unsupported(int cpu __unused, { } +static void print_python_unsupported_msg(void) +{ + fprintf(stderr, "Python scripting not supported." + " Install libpython and rebuild perf to enable it.\n" + "For example:\n # apt-get install python-dev (ubuntu)" + "\n # yum install python-devel (Fedora)" + "\n etc.\n"); +} + +static int python_start_script_unsupported(const char *script __unused, + int argc __unused, + const char **argv __unused) +{ + print_python_unsupported_msg(); + + return -1; +} + +static int python_generate_script_unsupported(const char *outfile __unused) +{ + print_python_unsupported_msg(); + + return -1; +} + +struct scripting_ops python_scripting_unsupported_ops = { + .name = "Python", + .start_script = python_start_script_unsupported, + .stop_script = stop_script_unsupported, + .process_event = process_event_unsupported, + .generate_script = python_generate_script_unsupported, +}; + +static void register_python_scripting(struct scripting_ops *scripting_ops) +{ + int err; + err = script_spec_register("Python", scripting_ops); + if (err) + die("error registering Python script extension"); + + err = script_spec_register("py", scripting_ops); + if (err) + die("error registering py script extension"); + + scripting_context = malloc(sizeof(struct scripting_context)); +} + +#ifdef NO_LIBPYTHON +void setup_python_scripting(void) +{ + register_python_scripting(&python_scripting_unsupported_ops); +} +#else +struct scripting_ops python_scripting_ops; + +void setup_python_scripting(void) +{ + register_python_scripting(&python_scripting_ops); +} +#endif + static void print_perl_unsupported_msg(void) { fprintf(stderr, "Perl scripting not supported." diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index aaf2da2d21e5..c3269b937db4 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -280,6 +280,7 @@ struct scripting_ops { int script_spec_register(const char *spec, struct scripting_ops *ops); void setup_perl_scripting(void); +void setup_python_scripting(void); struct scripting_context { void *event_data; From 4d161f0360d00d46a89827b3fd6da395f00c5d90 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:27:58 -0600 Subject: [PATCH 600/640] perf/scripts: Add syscall tracing scripts Adds a set of scripts that aggregate system call totals and system call errors. Most are Python scripts that also test basic functionality of the new Python engine, but there's also one Perl script added for comparison and for reference in some new Documentation contained in a later patch. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-8-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- tools/perf/Makefile | 3 + .../scripts/perl/bin/check-perf-trace-record | 2 +- .../scripts/perl/bin/failed-syscalls-record | 2 + .../scripts/perl/bin/failed-syscalls-report | 4 + tools/perf/scripts/perl/failed-syscalls.pl | 38 +++++++++ .../python/bin/failed-syscalls-by-pid-record | 2 + .../python/bin/failed-syscalls-by-pid-report | 4 + .../python/bin/syscall-counts-by-pid-record | 2 + .../python/bin/syscall-counts-by-pid-report | 4 + .../scripts/python/bin/syscall-counts-record | 2 + .../scripts/python/bin/syscall-counts-report | 4 + tools/perf/scripts/python/check-perf-trace.py | 83 +++++++++++++++++++ .../scripts/python/failed-syscalls-by-pid.py | 68 +++++++++++++++ .../scripts/python/syscall-counts-by-pid.py | 64 ++++++++++++++ tools/perf/scripts/python/syscall-counts.py | 58 +++++++++++++ 15 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 tools/perf/scripts/perl/bin/failed-syscalls-record create mode 100644 tools/perf/scripts/perl/bin/failed-syscalls-report create mode 100644 tools/perf/scripts/perl/failed-syscalls.pl create mode 100644 tools/perf/scripts/python/bin/failed-syscalls-by-pid-record create mode 100644 tools/perf/scripts/python/bin/failed-syscalls-by-pid-report create mode 100644 tools/perf/scripts/python/bin/syscall-counts-by-pid-record create mode 100644 tools/perf/scripts/python/bin/syscall-counts-by-pid-report create mode 100644 tools/perf/scripts/python/bin/syscall-counts-record create mode 100644 tools/perf/scripts/python/bin/syscall-counts-report create mode 100644 tools/perf/scripts/python/check-perf-trace.py create mode 100644 tools/perf/scripts/python/failed-syscalls-by-pid.py create mode 100644 tools/perf/scripts/python/syscall-counts-by-pid.py create mode 100644 tools/perf/scripts/python/syscall-counts.py diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 14273164db04..54a5b50ff312 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -1032,7 +1032,10 @@ install: all $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' + $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' ifdef BUILT_INS $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record index 3c1574498942..e6cb1474f8e8 100644 --- a/tools/perf/scripts/perl/bin/check-perf-trace-record +++ b/tools/perf/scripts/perl/bin/check-perf-trace-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry +perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record new file mode 100644 index 000000000000..f8885d389e6f --- /dev/null +++ b/tools/perf/scripts/perl/bin/failed-syscalls-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report new file mode 100644 index 000000000000..8bfc660e5056 --- /dev/null +++ b/tools/perf/scripts/perl/bin/failed-syscalls-report @@ -0,0 +1,4 @@ +#!/bin/bash +# description: system-wide failed syscalls +# args: [comm] +perf trace -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $1 diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl new file mode 100644 index 000000000000..c18e7e27a84b --- /dev/null +++ b/tools/perf/scripts/perl/failed-syscalls.pl @@ -0,0 +1,38 @@ +# failed system call counts +# (c) 2010, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 +# +# Displays system-wide failed system call totals +# If a [comm] arg is specified, only syscalls called by [comm] are displayed. + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Context; +use Perf::Trace::Util; + +my %failed_syscalls; + +sub raw_syscalls::sys_exit +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $id, $ret) = @_; + + if ($ret < 0) { + $failed_syscalls{$common_comm}++; + } +} + +sub trace_end +{ + printf("\nfailed syscalls by comm:\n\n"); + + printf("%-20s %10s\n", "comm", "# errors"); + printf("%-20s %6s %10s\n", "--------------------", "----------"); + + foreach my $comm (sort {$failed_syscalls{$b} <=> $failed_syscalls{$a}} + keys %failed_syscalls) { + printf("%-20s %10s\n", $comm, $failed_syscalls{$comm}); + } +} diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record new file mode 100644 index 000000000000..f8885d389e6f --- /dev/null +++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report new file mode 100644 index 000000000000..1e0c0a860c87 --- /dev/null +++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report @@ -0,0 +1,4 @@ +#!/bin/bash +# description: system-wide failed syscalls, by pid +# args: [comm] +perf trace -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $1 diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record new file mode 100644 index 000000000000..45a8c50359da --- /dev/null +++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report new file mode 100644 index 000000000000..f8044d192271 --- /dev/null +++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report @@ -0,0 +1,4 @@ +#!/bin/bash +# description: system-wide syscall counts, by pid +# args: [comm] +perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $1 diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record new file mode 100644 index 000000000000..45a8c50359da --- /dev/null +++ b/tools/perf/scripts/python/bin/syscall-counts-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report new file mode 100644 index 000000000000..a366aa61612f --- /dev/null +++ b/tools/perf/scripts/python/bin/syscall-counts-report @@ -0,0 +1,4 @@ +#!/bin/bash +# description: system-wide syscall counts +# args: [comm] +perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py $1 diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py new file mode 100644 index 000000000000..964d934395ff --- /dev/null +++ b/tools/perf/scripts/python/check-perf-trace.py @@ -0,0 +1,83 @@ +# perf trace event handlers, generated by perf trace -g python +# (c) 2010, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 +# +# This script tests basic functionality such as flag and symbol +# strings, common_xxx() calls back into perf, begin, end, unhandled +# events, etc. Basically, if this script runs successfully and +# displays expected results, Python scripting support should be ok. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from Core import * +from perf_trace_context import * + +unhandled = autodict() + +def trace_begin(): + print "trace_begin" + pass + +def trace_end(): + print_unhandled() + +def irq__softirq_entry(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + vec): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + + print_uncommon(context) + + print "vec=%s\n" % \ + (symbol_str("irq__softirq_entry", "vec", vec)), + +def kmem__kmalloc(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + call_site, ptr, bytes_req, bytes_alloc, + gfp_flags): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + + print_uncommon(context) + + print "call_site=%u, ptr=%u, bytes_req=%u, " \ + "bytes_alloc=%u, gfp_flags=%s\n" % \ + (call_site, ptr, bytes_req, bytes_alloc, + + flag_str("kmem__kmalloc", "gfp_flags", gfp_flags)), + +def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs, + common_pid, common_comm): + try: + unhandled[event_name] += 1 + except TypeError: + unhandled[event_name] = 1 + +def print_header(event_name, cpu, secs, nsecs, pid, comm): + print "%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), + +# print trace fields not included in handler args +def print_uncommon(context): + print "common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, " \ + % (common_pc(context), trace_flag_str(common_flags(context)), \ + common_lock_depth(context)) + +def print_unhandled(): + keys = unhandled.keys() + if not keys: + return + + print "\nunhandled events:\n\n", + + print "%-40s %10s\n" % ("event", "count"), + print "%-40s %10s\n" % ("----------------------------------------", \ + "-----------"), + + for event_name in keys: + print "%-40s %10d\n" % (event_name, unhandled[event_name]) diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py new file mode 100644 index 000000000000..0ca02278fe69 --- /dev/null +++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py @@ -0,0 +1,68 @@ +# failed system call counts, by pid +# (c) 2010, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 +# +# Displays system-wide failed system call totals, broken down by pid. +# If a [comm] arg is specified, only syscalls called by [comm] are displayed. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; + +for_comm = None + +if len(sys.argv) > 2: + sys.exit(usage) + +if len(sys.argv) > 1: + for_comm = sys.argv[1] + +syscalls = autodict() + +def trace_begin(): + pass + +def trace_end(): + print_error_totals() + +def raw_syscalls__sys_exit(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, ret): + if for_comm is not None: + if common_comm != for_comm: + return + + if ret < 0: + try: + syscalls[common_comm][common_pid][id][ret] += 1 + except TypeError: + syscalls[common_comm][common_pid][id][ret] = 1 + +def print_error_totals(): + if for_comm is not None: + print "\nsyscall errors for %s:\n\n" % (for_comm), + else: + print "\nsyscall errors:\n\n", + + print "%-30s %10s\n" % ("comm [pid]", "count"), + print "%-30s %10s\n" % ("------------------------------", \ + "----------"), + + comm_keys = syscalls.keys() + for comm in comm_keys: + pid_keys = syscalls[comm].keys() + for pid in pid_keys: + print "\n%s [%d]\n" % (comm, pid), + id_keys = syscalls[comm][pid].keys() + for id in id_keys: + print " syscall: %-16d\n" % (id), + ret_keys = syscalls[comm][pid][id].keys() + for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True): + print " err = %-20d %10d\n" % (ret, val), diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py new file mode 100644 index 000000000000..af722d6a4b3f --- /dev/null +++ b/tools/perf/scripts/python/syscall-counts-by-pid.py @@ -0,0 +1,64 @@ +# system call counts, by pid +# (c) 2010, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 +# +# Displays system-wide system call totals, broken down by syscall. +# If a [comm] arg is specified, only syscalls called by [comm] are displayed. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; + +for_comm = None + +if len(sys.argv) > 2: + sys.exit(usage) + +if len(sys.argv) > 1: + for_comm = sys.argv[1] + +syscalls = autodict() + +def trace_begin(): + pass + +def trace_end(): + print_syscall_totals() + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + if for_comm is not None: + if common_comm != for_comm: + return + try: + syscalls[common_comm][common_pid][id] += 1 + except TypeError: + syscalls[common_comm][common_pid][id] = 1 + +def print_syscall_totals(): + if for_comm is not None: + print "\nsyscall events for %s:\n\n" % (for_comm), + else: + print "\nsyscall events by comm/pid:\n\n", + + print "%-40s %10s\n" % ("comm [pid]/syscalls", "count"), + print "%-40s %10s\n" % ("----------------------------------------", \ + "----------"), + + comm_keys = syscalls.keys() + for comm in comm_keys: + pid_keys = syscalls[comm].keys() + for pid in pid_keys: + print "\n%s [%d]\n" % (comm, pid), + id_keys = syscalls[comm][pid].keys() + for id, val in sorted(syscalls[comm][pid].iteritems(), \ + key = lambda(k, v): (v, k), reverse = True): + print " %-38d %10d\n" % (id, val), diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py new file mode 100644 index 000000000000..f977e85ff049 --- /dev/null +++ b/tools/perf/scripts/python/syscall-counts.py @@ -0,0 +1,58 @@ +# system call counts +# (c) 2010, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 +# +# Displays system-wide system call totals, broken down by syscall. +# If a [comm] arg is specified, only syscalls called by [comm] are displayed. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +usage = "perf trace -s syscall-counts.py [comm]\n"; + +for_comm = None + +if len(sys.argv) > 2: + sys.exit(usage) + +if len(sys.argv) > 1: + for_comm = sys.argv[1] + +syscalls = autodict() + +def trace_begin(): + pass + +def trace_end(): + print_syscall_totals() + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + if for_comm is not None: + if common_comm != for_comm: + return + try: + syscalls[id] += 1 + except TypeError: + syscalls[id] = 1 + +def print_syscall_totals(): + if for_comm is not None: + print "\nsyscall events for %s:\n\n" % (for_comm), + else: + print "\nsyscall events:\n\n", + + print "%-40s %10s\n" % ("event", "count"), + print "%-40s %10s\n" % ("----------------------------------------", \ + "-----------"), + + for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ + reverse = True): + print "%-40d %10d\n" % (id, val), From 44ad9cd8f0893b9ae0ac729a7dc2a1ebcd170ac6 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 22 Feb 2010 01:12:59 -0600 Subject: [PATCH 601/640] perf/scripts: Remove unnecessary PyTuple resizes If we know the size of a tuple in advance, there's no need to resize it - start out with the known size in the first place. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1266822779.6426.4.camel@tropicana> Signed-off-by: Frederic Weisbecker --- .../util/scripting-engines/trace-event-python.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d402f64f9b46..33a414bbba3e 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -68,7 +68,7 @@ static void define_value(enum print_arg_type field_type, if (field_type == PRINT_SYMBOL) handler_name = "define_symbolic_value"; - t = PyTuple_New(MAX_FIELDS); + t = PyTuple_New(4); if (!t) Py_FatalError("couldn't create Python tuple"); @@ -79,9 +79,6 @@ static void define_value(enum print_arg_type field_type, PyTuple_SetItem(t, n++, PyInt_FromLong(value)); PyTuple_SetItem(t, n++, PyString_FromString(field_str)); - if (_PyTuple_Resize(&t, n) == -1) - Py_FatalError("error resizing Python tuple"); - handler = PyDict_GetItemString(main_dict, handler_name); if (handler && PyCallable_Check(handler)) { retval = PyObject_CallObject(handler, t); @@ -116,7 +113,10 @@ static void define_field(enum print_arg_type field_type, if (field_type == PRINT_SYMBOL) handler_name = "define_symbolic_field"; - t = PyTuple_New(MAX_FIELDS); + if (field_type == PRINT_FLAGS) + t = PyTuple_New(3); + else + t = PyTuple_New(2); if (!t) Py_FatalError("couldn't create Python tuple"); @@ -125,9 +125,6 @@ static void define_field(enum print_arg_type field_type, if (field_type == PRINT_FLAGS) PyTuple_SetItem(t, n++, PyString_FromString(delim)); - if (_PyTuple_Resize(&t, n) == -1) - Py_FatalError("error resizing Python tuple"); - handler = PyDict_GetItemString(main_dict, handler_name); if (handler && PyCallable_Check(handler)) { retval = PyObject_CallObject(handler, t); From cff68e582237cae3cf456f01153202175961dfbe Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 27 Jan 2010 02:28:03 -0600 Subject: [PATCH 602/640] perf/scripts: Add perf-trace-python Documentation Also small update to perf-trace-perl and perf-trace docs. Signed-off-by: Tom Zanussi Cc: Ingo Molnar Cc: Steven Rostedt Cc: Keiichi KII Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1264580883-15324-13-git-send-email-tzanussi@gmail.com> Signed-off-by: Frederic Weisbecker --- tools/perf/Documentation/perf-trace-perl.txt | 3 +- .../perf/Documentation/perf-trace-python.txt | 624 ++++++++++++++++++ tools/perf/Documentation/perf-trace.txt | 11 +- 3 files changed, 636 insertions(+), 2 deletions(-) create mode 100644 tools/perf/Documentation/perf-trace-python.txt diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt index c5f55f439091..d2206c3c7aa6 100644 --- a/tools/perf/Documentation/perf-trace-perl.txt +++ b/tools/perf/Documentation/perf-trace-perl.txt @@ -8,7 +8,7 @@ perf-trace-perl - Process trace data with a Perl script SYNOPSIS -------- [verse] -'perf trace' [-s [lang]:script[.ext] ] +'perf trace' [-s [Perl]:script[.pl] ] DESCRIPTION ----------- @@ -213,6 +213,7 @@ Various utility functions for use with perf trace: nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs nsecs_str($nsecs) - returns printable string in the form secs.nsecs avg($total, $n) - returns average given a sum and a total number of values + syscall_name($id) - returns the syscall name for the specified syscall_nr SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-trace-python.txt new file mode 100644 index 000000000000..119d5deba1db --- /dev/null +++ b/tools/perf/Documentation/perf-trace-python.txt @@ -0,0 +1,624 @@ +perf-trace-python(1) +================== + +NAME +---- +perf-trace-python - Process trace data with a Python script + +SYNOPSIS +-------- +[verse] +'perf trace' [-s [Python]:script[.py] ] + +DESCRIPTION +----------- + +This perf trace option is used to process perf trace data using perf's +built-in Python interpreter. It reads and processes the input file and +displays the results of the trace analysis implemented in the given +Python script, if any. + +A QUICK EXAMPLE +--------------- + +This section shows the process, start to finish, of creating a working +Python script that aggregates and extracts useful information from a +raw perf trace stream. You can avoid reading the rest of this +document if an example is enough for you; the rest of the document +provides more details on each step and lists the library functions +available to script writers. + +This example actually details the steps that were used to create the +'syscall-counts' script you see when you list the available perf trace +scripts via 'perf trace -l'. As such, this script also shows how to +integrate your script into the list of general-purpose 'perf trace' +scripts listed by that command. + +The syscall-counts script is a simple script, but demonstrates all the +basic ideas necessary to create a useful script. Here's an example +of its output: + +---- +syscall events: + +event count +---------------------------------------- ----------- +sys_write 455067 +sys_getdents 4072 +sys_close 3037 +sys_swapoff 1769 +sys_read 923 +sys_sched_setparam 826 +sys_open 331 +sys_newfstat 326 +sys_mmap 217 +sys_munmap 216 +sys_futex 141 +sys_select 102 +sys_poll 84 +sys_setitimer 12 +sys_writev 8 +15 8 +sys_lseek 7 +sys_rt_sigprocmask 6 +sys_wait4 3 +sys_ioctl 3 +sys_set_robust_list 1 +sys_exit 1 +56 1 +sys_access 1 +---- + +Basically our task is to keep a per-syscall tally that gets updated +every time a system call occurs in the system. Our script will do +that, but first we need to record the data that will be processed by +that script. Theoretically, there are a couple of ways we could do +that: + +- we could enable every event under the tracing/events/syscalls + directory, but this is over 600 syscalls, well beyond the number + allowable by perf. These individual syscall events will however be + useful if we want to later use the guidance we get from the + general-purpose scripts to drill down and get more detail about + individual syscalls of interest. + +- we can enable the sys_enter and/or sys_exit syscalls found under + tracing/events/raw_syscalls. These are called for all syscalls; the + 'id' field can be used to distinguish between individual syscall + numbers. + +For this script, we only need to know that a syscall was entered; we +don't care how it exited, so we'll use 'perf record' to record only +the sys_enter events: + +---- +# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter + +^C[ perf record: Woken up 1 times to write data ] +[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ] +---- + +The options basically say to collect data for every syscall event +system-wide and multiplex the per-cpu output into a single stream. +That single stream will be recorded in a file in the current directory +called perf.data. + +Once we have a perf.data file containing our data, we can use the -g +'perf trace' option to generate a Python script that will contain a +callback handler for each event type found in the perf.data trace +stream (for more details, see the STARTER SCRIPTS section). + +---- +# perf trace -g python +generated Python script: perf-trace.py + +The output file created also in the current directory is named +perf-trace.py. Here's the file in its entirety: + +# perf trace event handlers, generated by perf trace -g python +# Licensed under the terms of the GNU GPL License version 2 + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the format files. Those fields not available as handler params can +# be retrieved using Python functions of the form common_*(context). +# See the perf-trace-python Documentation for the list of available functions. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +def trace_begin(): + print "in trace_begin" + +def trace_end(): + print "in trace_end" + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + + print "id=%d, args=%s\n" % \ + (id, args), + +def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs, + common_pid, common_comm): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + +def print_header(event_name, cpu, secs, nsecs, pid, comm): + print "%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), +---- + +At the top is a comment block followed by some import statements and a +path append which every perf trace script should include. + +Following that are a couple generated functions, trace_begin() and +trace_end(), which are called at the beginning and the end of the +script respectively (for more details, see the SCRIPT_LAYOUT section +below). + +Following those are the 'event handler' functions generated one for +every event in the 'perf record' output. The handler functions take +the form subsystem__event_name, and contain named parameters, one for +each field in the event; in this case, there's only one event, +raw_syscalls__sys_enter(). (see the EVENT HANDLERS section below for +more info on event handlers). + +The final couple of functions are, like the begin and end functions, +generated for every script. The first, trace_unhandled(), is called +every time the script finds an event in the perf.data file that +doesn't correspond to any event handler in the script. This could +mean either that the record step recorded event types that it wasn't +really interested in, or the script was run against a trace file that +doesn't correspond to the script. + +The script generated by -g option option simply prints a line for each +event found in the trace stream i.e. it basically just dumps the event +and its parameter values to stdout. The print_header() function is +simply a utility function used for that purpose. Let's rename the +script and run it to see the default output: + +---- +# mv perf-trace.py syscall-counts.py +# perf trace -s syscall-counts.py + +raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847620860 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847710478 6533 npviewer.bin id=78, args= +raw_syscalls__sys_enter 1 00840.847719204 6533 npviewer.bin id=142, args= +raw_syscalls__sys_enter 1 00840.847755445 6533 npviewer.bin id=3, args= +raw_syscalls__sys_enter 1 00840.847775601 6533 npviewer.bin id=3, args= +raw_syscalls__sys_enter 1 00840.847781820 6533 npviewer.bin id=3, args= +. +. +. +---- + +Of course, for this script, we're not interested in printing every +trace event, but rather aggregating it in a useful way. So we'll get +rid of everything to do with printing as well as the trace_begin() and +trace_unhandled() functions, which we won't be using. That leaves us +with this minimalistic skeleton: + +---- +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +def trace_end(): + print "in trace_end" + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): +---- + +In trace_end(), we'll simply print the results, but first we need to +generate some results to print. To do that we need to have our +sys_enter() handler do the necessary tallying until all events have +been counted. A hash table indexed by syscall id is a good way to +store that information; every time the sys_enter() handler is called, +we simply increment a count associated with that hash entry indexed by +that syscall id: + +---- + syscalls = autodict() + + try: + syscalls[id] += 1 + except TypeError: + syscalls[id] = 1 +---- + +The syscalls 'autodict' object is a special kind of Python dictionary +(implemented in Core.py) that implements Perl's 'autovivifying' hashes +in Python i.e. with autovivifying hashes, you can assign nested hash +values without having to go to the trouble of creating intermediate +levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create +the intermediate hash levels and finally assign the value 1 to the +hash entry for 'id' (because the value being assigned isn't a hash +object itself, the initial value is assigned in the TypeError +exception. Well, there may be a better way to do this in Python but +that's what works for now). + +Putting that code into the raw_syscalls__sys_enter() handler, we +effectively end up with a single-level dictionary keyed on syscall id +and having the counts we've tallied as values. + +The print_syscall_totals() function iterates over the entries in the +dictionary and displays a line for each entry containing the syscall +name (the dictonary keys contain the syscall ids, which are passed to +the Util function syscall_name(), which translates the raw syscall +numbers to the corresponding syscall name strings). The output is +displayed after all the events in the trace have been processed, by +calling the print_syscall_totals() function from the trace_end() +handler called at the end of script processing. + +The final script producing the output shown above is shown in its +entirety below: + +---- +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +syscalls = autodict() + +def trace_end(): + print_syscall_totals() + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + try: + syscalls[id] += 1 + except TypeError: + syscalls[id] = 1 + +def print_syscall_totals(): + if for_comm is not None: + print "\nsyscall events for %s:\n\n" % (for_comm), + else: + print "\nsyscall events:\n\n", + + print "%-40s %10s\n" % ("event", "count"), + print "%-40s %10s\n" % ("----------------------------------------", \ + "-----------"), + + for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ + reverse = True): + print "%-40s %10d\n" % (syscall_name(id), val), +---- + +The script can be run just as before: + + # perf trace -s syscall-counts.py + +So those are the essential steps in writing and running a script. The +process can be generalized to any tracepoint or set of tracepoints +you're interested in - basically find the tracepoint(s) you're +interested in by looking at the list of available events shown by +'perf list' and/or look in /sys/kernel/debug/tracing events for +detailed event and field info, record the corresponding trace data +using 'perf record', passing it the list of interesting events, +generate a skeleton script using 'perf trace -g python' and modify the +code to aggregate and display it for your particular needs. + +After you've done that you may end up with a general-purpose script +that you want to keep around and have available for future use. By +writing a couple of very simple shell scripts and putting them in the +right place, you can have your script listed alongside the other +scripts listed by the 'perf trace -l' command e.g.: + +---- +root@tropicana:~# perf trace -l +List of available trace scripts: + workqueue-stats workqueue stats (ins/exe/create/destroy) + wakeup-latency system-wide min/max/avg wakeup latency + rw-by-file r/w activity for a program, by file + rw-by-pid system-wide r/w activity +---- + +A nice side effect of doing this is that you also then capture the +probably lengthy 'perf record' command needed to record the events for +the script. + +To have the script appear as a 'built-in' script, you write two simple +scripts, one for recording and one for 'reporting'. + +The 'record' script is a shell script with the same base name as your +script, but with -record appended. The shell script should be put +into the perf/scripts/python/bin directory in the kernel source tree. +In that script, you write the 'perf record' command-line needed for +your script: + +---- +# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record + +#!/bin/bash +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter +---- + +The 'report' script is also a shell script with the same base name as +your script, but with -report appended. It should also be located in +the perf/scripts/python/bin directory. In that script, you write the +'perf trace -s' command-line needed for running your script: + +---- +# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report + +#!/bin/bash +# description: system-wide syscall counts +perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py +---- + +Note that the location of the Python script given in the shell script +is in the libexec/perf-core/scripts/python directory - this is where +the script will be copied by 'make install' when you install perf. +For the installation to install your script there, your script needs +to be located in the perf/scripts/python directory in the kernel +source tree: + +---- +# ls -al kernel-source/tools/perf/scripts/python + +root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python +total 32 +drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . +drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. +drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin +-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util +-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py +---- + +Once you've done that (don't forget to do a new 'make install', +otherwise your script won't show up at run-time), 'perf trace -l' +should show a new entry for your script: + +---- +root@tropicana:~# perf trace -l +List of available trace scripts: + workqueue-stats workqueue stats (ins/exe/create/destroy) + wakeup-latency system-wide min/max/avg wakeup latency + rw-by-file r/w activity for a program, by file + rw-by-pid system-wide r/w activity + syscall-counts system-wide syscall counts +---- + +You can now perform the record step via 'perf trace record': + + # perf trace record syscall-counts + +and display the output using 'perf trace report': + + # perf trace report syscall-counts + +STARTER SCRIPTS +--------------- + +You can quickly get started writing a script for a particular set of +trace data by generating a skeleton script using 'perf trace -g +python' in the same directory as an existing perf.data trace file. +That will generate a starter script containing a handler for each of +the event types in the trace file; it simply prints every available +field for each event in the trace file. + +You can also look at the existing scripts in +~/libexec/perf-core/scripts/python for typical examples showing how to +do basic things like aggregate event data, print results, etc. Also, +the check-perf-trace.py script, while not interesting for its results, +attempts to exercise all of the main scripting features. + +EVENT HANDLERS +-------------- + +When perf trace is invoked using a trace script, a user-defined +'handler function' is called for each event in the trace. If there's +no handler function defined for a given event type, the event is +ignored (or passed to a 'trace_handled' function, see below) and the +next event is processed. + +Most of the event's field values are passed as arguments to the +handler function; some of the less common ones aren't - those are +available as calls back into the perf executable (see below). + +As an example, the following perf record command can be used to record +all sched_wakeup events in the system: + + # perf record -c 1 -f -a -M -R -e sched:sched_wakeup + +Traces meant to be processed using a script should be recorded with +the above options: -c 1 says to sample every event, -a to enable +system-wide collection, -M to multiplex the output, and -R to collect +raw samples. + +The format file for the sched_wakep event defines the following fields +(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): + +---- + format: + field:unsigned short common_type; + field:unsigned char common_flags; + field:unsigned char common_preempt_count; + field:int common_pid; + field:int common_lock_depth; + + field:char comm[TASK_COMM_LEN]; + field:pid_t pid; + field:int prio; + field:int success; + field:int target_cpu; +---- + +The handler function for this event would be defined as: + +---- +def sched__sched_wakeup(event_name, context, common_cpu, common_secs, + common_nsecs, common_pid, common_comm, + comm, pid, prio, success, target_cpu): + pass +---- + +The handler function takes the form subsystem__event_name. + +The common_* arguments in the handler's argument list are the set of +arguments passed to all event handlers; some of the fields correspond +to the common_* fields in the format file, but some are synthesized, +and some of the common_* fields aren't common enough to to be passed +to every event as arguments but are available as library functions. + +Here's a brief description of each of the invariant event args: + + event_name the name of the event as text + context an opaque 'cookie' used in calls back into perf + common_cpu the cpu the event occurred on + common_secs the secs portion of the event timestamp + common_nsecs the nsecs portion of the event timestamp + common_pid the pid of the current task + common_comm the name of the current process + +All of the remaining fields in the event's format file have +counterparts as handler function arguments of the same name, as can be +seen in the example above. + +The above provides the basics needed to directly access every field of +every event in a trace, which covers 90% of what you need to know to +write a useful trace script. The sections below cover the rest. + +SCRIPT LAYOUT +------------- + +Every perf trace Python script should start by setting up a Python +module search path and 'import'ing a few support modules (see module +descriptions below): + +---- + import os + import sys + + sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + + from perf_trace_context import * + from Core import * +---- + +The rest of the script can contain handler functions and support +functions in any order. + +Aside from the event handler functions discussed above, every script +can implement a set of optional functions: + +*trace_begin*, if defined, is called before any event is processed and +gives scripts a chance to do setup tasks: + +---- +def trace_begin: + pass +---- + +*trace_end*, if defined, is called after all events have been + processed and gives scripts a chance to do end-of-script tasks, such + as display results: + +---- +def trace_end: + pass +---- + +*trace_unhandled*, if defined, is called after for any event that + doesn't have a handler explicitly defined for it. The standard set + of common arguments are passed into it: + +---- +def trace_unhandled(event_name, context, common_cpu, common_secs, + common_nsecs, common_pid, common_comm): + pass +---- + +The remaining sections provide descriptions of each of the available +built-in perf trace Python modules and their associated functions. + +AVAILABLE MODULES AND FUNCTIONS +------------------------------- + +The following sections describe the functions and variables available +via the various perf trace Python modules. To use the functions and +variables from the given module, add the corresponding 'from XXXX +import' line to your perf trace script. + +Core.py Module +~~~~~~~~~~~~~~ + +These functions provide some essential functions to user scripts. + +The *flag_str* and *symbol_str* functions provide human-readable +strings for flag and symbolic fields. These correspond to the strings +and values parsed from the 'print fmt' fields of the event format +files: + + flag_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the flag field field_name of event event_name + symbol_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the symbolic field field_name of event event_name + +The *autodict* function returns a special special kind of Python +dictionary that implements Perl's 'autovivifying' hashes in Python +i.e. with autovivifying hashes, you can assign nested hash values +without having to go to the trouble of creating intermediate levels if +they don't exist. + + autodict() - returns an autovivifying dictionary instance + + +perf_trace_context Module +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some of the 'common' fields in the event format file aren't all that +common, but need to be made accessible to user scripts nonetheless. + +perf_trace_context defines a set of functions that can be used to +access this data in the context of the current event. Each of these +functions expects a context variable, which is the same as the +context variable passed into every event handler as the second +argument. + + common_pc(context) - returns common_preempt count for the current event + common_flags(context) - returns common_flags for the current event + common_lock_depth(context) - returns common_lock_depth for the current event + +Util.py Module +~~~~~~~~~~~~~~ + +Various utility functions for use with perf trace: + + nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair + nsecs_secs(nsecs) - returns whole secs portion given nsecs + nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs + nsecs_str(nsecs) - returns printable string in the form secs.nsecs + avg(total, n) - returns average given a sum and a total number of values + syscall_name(id) - returns the syscall name for the specified syscall_nr + +SEE ALSO +-------- +linkperf:perf-trace[1] diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index c00a76fcb8d6..8879299cd9df 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -19,6 +19,11 @@ There are several variants of perf trace: 'perf trace' to see a detailed trace of the workload that was recorded. + You can also run a set of pre-canned scripts that aggregate and + summarize the raw trace data in various ways (the list of scripts is + available via 'perf trace -l'). The following variants allow you to + record and run those scripts: + 'perf trace record