From 3912f2abc942a002ef611fc973add5e5eadb3432 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Mon, 14 Dec 2009 03:23:56 -0500
Subject: [PATCH 001/640] perf: Use format string of printf to align strings

Instead of filling whitespaces to do alignment, use
printf's format string.

This simplifies the code a bit.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20091214082700.4224.57640.sendpatchset@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-help.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 9f810b17c25c..e427d6965e0c 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -286,8 +286,7 @@ void list_common_cmds_help(void)
 
 	puts(" The most commonly used perf commands are:");
 	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
-		printf("   %s   ", common_cmds[i].name);
-		mput_char(' ', longest - strlen(common_cmds[i].name));
+		printf("   %-*s   ", longest, common_cmds[i].name);
 		puts(common_cmds[i].help);
 	}
 }

From 06aae590033d1ae3c35b2920ef950cfc603e2a2d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:36:59 -0200
Subject: [PATCH 002/640] perf session: Move the event processing routines to
 session.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No need for an extra "data_map" file since the routines there
operate mainly on a perf_session instance.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile        |   1 -
 tools/perf/util/data_map.c | 252 -------------------------------------
 tools/perf/util/session.c  | 245 ++++++++++++++++++++++++++++++++++++
 3 files changed, 245 insertions(+), 253 deletions(-)
 delete mode 100644 tools/perf/util/data_map.c

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 652a470b5f74..4172c3b0e4a7 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -423,7 +423,6 @@ LIB_OBJS += util/trace-event-perl.o
 LIB_OBJS += util/svghelper.o
 LIB_OBJS += util/sort.o
 LIB_OBJS += util/hist.o
-LIB_OBJS += util/data_map.o
 LIB_OBJS += util/probe-event.o
 
 BUILTIN_OBJS += builtin-annotate.o
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
deleted file mode 100644
index b557b836de3d..000000000000
--- a/tools/perf/util/data_map.c
+++ /dev/null
@@ -1,252 +0,0 @@
-#include "symbol.h"
-#include "util.h"
-#include "debug.h"
-#include "thread.h"
-#include "session.h"
-
-static int process_event_stub(event_t *event __used,
-			      struct perf_session *session __used)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
-static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
-{
-	if (!handler->process_sample_event)
-		handler->process_sample_event = process_event_stub;
-	if (!handler->process_mmap_event)
-		handler->process_mmap_event = process_event_stub;
-	if (!handler->process_comm_event)
-		handler->process_comm_event = process_event_stub;
-	if (!handler->process_fork_event)
-		handler->process_fork_event = process_event_stub;
-	if (!handler->process_exit_event)
-		handler->process_exit_event = process_event_stub;
-	if (!handler->process_lost_event)
-		handler->process_lost_event = process_event_stub;
-	if (!handler->process_read_event)
-		handler->process_read_event = process_event_stub;
-	if (!handler->process_throttle_event)
-		handler->process_throttle_event = process_event_stub;
-	if (!handler->process_unthrottle_event)
-		handler->process_unthrottle_event = process_event_stub;
-}
-
-static const char *event__name[] = {
-	[0]			 = "TOTAL",
-	[PERF_RECORD_MMAP]	 = "MMAP",
-	[PERF_RECORD_LOST]	 = "LOST",
-	[PERF_RECORD_COMM]	 = "COMM",
-	[PERF_RECORD_EXIT]	 = "EXIT",
-	[PERF_RECORD_THROTTLE]	 = "THROTTLE",
-	[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
-	[PERF_RECORD_FORK]	 = "FORK",
-	[PERF_RECORD_READ]	 = "READ",
-	[PERF_RECORD_SAMPLE]	 = "SAMPLE",
-};
-
-unsigned long event__total[PERF_RECORD_MAX];
-
-void event__print_totals(void)
-{
-	int i;
-	for (i = 0; i < PERF_RECORD_MAX; ++i)
-		pr_info("%10s events: %10ld\n",
-			event__name[i], event__total[i]);
-}
-
-static int process_event(event_t *event, struct perf_session *session,
-			 struct perf_event_ops *ops,
-			 unsigned long offset, unsigned long head)
-{
-	trace_event(event);
-
-	if (event->header.type < PERF_RECORD_MAX) {
-		dump_printf("%p [%p]: PERF_RECORD_%s",
-			    (void *)(offset + head),
-			    (void *)(long)(event->header.size),
-			    event__name[event->header.type]);
-		++event__total[0];
-		++event__total[event->header.type];
-	}
-
-	switch (event->header.type) {
-	case PERF_RECORD_SAMPLE:
-		return ops->process_sample_event(event, session);
-	case PERF_RECORD_MMAP:
-		return ops->process_mmap_event(event, session);
-	case PERF_RECORD_COMM:
-		return ops->process_comm_event(event, session);
-	case PERF_RECORD_FORK:
-		return ops->process_fork_event(event, session);
-	case PERF_RECORD_EXIT:
-		return ops->process_exit_event(event, session);
-	case PERF_RECORD_LOST:
-		return ops->process_lost_event(event, session);
-	case PERF_RECORD_READ:
-		return ops->process_read_event(event, session);
-	case PERF_RECORD_THROTTLE:
-		return ops->process_throttle_event(event, session);
-	case PERF_RECORD_UNTHROTTLE:
-		return ops->process_unthrottle_event(event, session);
-	default:
-		ops->total_unknown++;
-		return -1;
-	}
-}
-
-int perf_header__read_build_ids(int input, u64 offset, u64 size)
-{
-	struct build_id_event bev;
-	char filename[PATH_MAX];
-	u64 limit = offset + size;
-	int err = -1;
-
-	while (offset < limit) {
-		struct dso *dso;
-		ssize_t len;
-
-		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
-			goto out;
-
-		len = bev.header.size - sizeof(bev);
-		if (read(input, filename, len) != len)
-			goto out;
-
-		dso = dsos__findnew(filename);
-		if (dso != NULL)
-			dso__set_build_id(dso, &bev.build_id);
-
-		offset += bev.header.size;
-	}
-	err = 0;
-out:
-	return err;
-}
-
-static struct thread *perf_session__register_idle_thread(struct perf_session *self)
-{
-	struct thread *thread = perf_session__findnew(self, 0);
-
-	if (!thread || thread__set_comm(thread, "swapper")) {
-		pr_err("problem inserting idle task.\n");
-		thread = NULL;
-	}
-
-	return thread;
-}
-
-int perf_session__process_events(struct perf_session *self,
-				 struct perf_event_ops *ops)
-{
-	int err;
-	unsigned long head, shift;
-	unsigned long offset = 0;
-	size_t	page_size;
-	event_t *event;
-	uint32_t size;
-	char *buf;
-
-	if (perf_session__register_idle_thread(self) == NULL)
-		return -ENOMEM;
-
-	perf_event_ops__fill_defaults(ops);
-
-	page_size = getpagesize();
-
-	head = self->header.data_offset;
-	self->sample_type = perf_header__sample_type(&self->header);
-
-	err = -EINVAL;
-	if (ops->sample_type_check && ops->sample_type_check(self) < 0)
-		goto out_err;
-
-	if (!ops->full_paths) {
-		char bf[PATH_MAX];
-
-		if (getcwd(bf, sizeof(bf)) == NULL) {
-			err = -errno;
-out_getcwd_err:
-			pr_err("failed to get the current directory\n");
-			goto out_err;
-		}
-		self->cwd = strdup(bf);
-		if (self->cwd == NULL) {
-			err = -ENOMEM;
-			goto out_getcwd_err;
-		}
-		self->cwdlen = strlen(self->cwd);
-	}
-
-	shift = page_size * (head / page_size);
-	offset += shift;
-	head -= shift;
-
-remap:
-	buf = mmap(NULL, page_size * self->mmap_window, PROT_READ,
-		   MAP_SHARED, self->fd, offset);
-	if (buf == MAP_FAILED) {
-		pr_err("failed to mmap file\n");
-		err = -errno;
-		goto out_err;
-	}
-
-more:
-	event = (event_t *)(buf + head);
-
-	size = event->header.size;
-	if (!size)
-		size = 8;
-
-	if (head + event->header.size >= page_size * self->mmap_window) {
-		int munmap_ret;
-
-		shift = page_size * (head / page_size);
-
-		munmap_ret = munmap(buf, page_size * self->mmap_window);
-		assert(munmap_ret == 0);
-
-		offset += shift;
-		head -= shift;
-		goto remap;
-	}
-
-	size = event->header.size;
-
-	dump_printf("\n%p [%p]: event: %d\n",
-			(void *)(offset + head),
-			(void *)(long)event->header.size,
-			event->header.type);
-
-	if (!size || process_event(event, self, ops, offset, head) < 0) {
-
-		dump_printf("%p [%p]: skipping unknown header type: %d\n",
-			(void *)(offset + head),
-			(void *)(long)(event->header.size),
-			event->header.type);
-
-		/*
-		 * assume we lost track of the stream, check alignment, and
-		 * increment a single u64 in the hope to catch on again 'soon'.
-		 */
-
-		if (unlikely(head & 7))
-			head &= ~7ULL;
-
-		size = 8;
-	}
-
-	head += size;
-
-	if (offset + head >= self->header.data_offset + self->header.data_size)
-		goto done;
-
-	if (offset + head < self->size)
-		goto more;
-
-done:
-	err = 0;
-out_err:
-	return err;
-}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ce3a6c8abe76..736d4fda9272 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -148,3 +148,248 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
 
 	return syms;
 }
+
+static int process_event_stub(event_t *event __used,
+			      struct perf_session *session __used)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
+{
+	if (handler->process_sample_event == NULL)
+		handler->process_sample_event = process_event_stub;
+	if (handler->process_mmap_event == NULL)
+		handler->process_mmap_event = process_event_stub;
+	if (handler->process_comm_event == NULL)
+		handler->process_comm_event = process_event_stub;
+	if (handler->process_fork_event == NULL)
+		handler->process_fork_event = process_event_stub;
+	if (handler->process_exit_event == NULL)
+		handler->process_exit_event = process_event_stub;
+	if (handler->process_lost_event == NULL)
+		handler->process_lost_event = process_event_stub;
+	if (handler->process_read_event == NULL)
+		handler->process_read_event = process_event_stub;
+	if (handler->process_throttle_event == NULL)
+		handler->process_throttle_event = process_event_stub;
+	if (handler->process_unthrottle_event == NULL)
+		handler->process_unthrottle_event = process_event_stub;
+}
+
+static const char *event__name[] = {
+	[0]			 = "TOTAL",
+	[PERF_RECORD_MMAP]	 = "MMAP",
+	[PERF_RECORD_LOST]	 = "LOST",
+	[PERF_RECORD_COMM]	 = "COMM",
+	[PERF_RECORD_EXIT]	 = "EXIT",
+	[PERF_RECORD_THROTTLE]	 = "THROTTLE",
+	[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+	[PERF_RECORD_FORK]	 = "FORK",
+	[PERF_RECORD_READ]	 = "READ",
+	[PERF_RECORD_SAMPLE]	 = "SAMPLE",
+};
+
+unsigned long event__total[PERF_RECORD_MAX];
+
+void event__print_totals(void)
+{
+	int i;
+	for (i = 0; i < PERF_RECORD_MAX; ++i)
+		pr_info("%10s events: %10ld\n",
+			event__name[i], event__total[i]);
+}
+
+static int perf_session__process_event(struct perf_session *self,
+				       event_t *event,
+				       struct perf_event_ops *ops,
+				       unsigned long offset, unsigned long head)
+{
+	trace_event(event);
+
+	if (event->header.type < PERF_RECORD_MAX) {
+		dump_printf("%p [%p]: PERF_RECORD_%s",
+			    (void *)(offset + head),
+			    (void *)(long)(event->header.size),
+			    event__name[event->header.type]);
+		++event__total[0];
+		++event__total[event->header.type];
+	}
+
+	switch (event->header.type) {
+	case PERF_RECORD_SAMPLE:
+		return ops->process_sample_event(event, self);
+	case PERF_RECORD_MMAP:
+		return ops->process_mmap_event(event, self);
+	case PERF_RECORD_COMM:
+		return ops->process_comm_event(event, self);
+	case PERF_RECORD_FORK:
+		return ops->process_fork_event(event, self);
+	case PERF_RECORD_EXIT:
+		return ops->process_exit_event(event, self);
+	case PERF_RECORD_LOST:
+		return ops->process_lost_event(event, self);
+	case PERF_RECORD_READ:
+		return ops->process_read_event(event, self);
+	case PERF_RECORD_THROTTLE:
+		return ops->process_throttle_event(event, self);
+	case PERF_RECORD_UNTHROTTLE:
+		return ops->process_unthrottle_event(event, self);
+	default:
+		ops->total_unknown++;
+		return -1;
+	}
+}
+
+int perf_header__read_build_ids(int input, u64 offset, u64 size)
+{
+	struct build_id_event bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size;
+	int err = -1;
+
+	while (offset < limit) {
+		struct dso *dso;
+		ssize_t len;
+
+		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+			goto out;
+
+		len = bev.header.size - sizeof(bev);
+		if (read(input, filename, len) != len)
+			goto out;
+
+		dso = dsos__findnew(filename);
+		if (dso != NULL)
+			dso__set_build_id(dso, &bev.build_id);
+
+		offset += bev.header.size;
+	}
+	err = 0;
+out:
+	return err;
+}
+
+static struct thread *perf_session__register_idle_thread(struct perf_session *self)
+{
+	struct thread *thread = perf_session__findnew(self, 0);
+
+	if (thread == NULL || thread__set_comm(thread, "swapper")) {
+		pr_err("problem inserting idle task.\n");
+		thread = NULL;
+	}
+
+	return thread;
+}
+
+int perf_session__process_events(struct perf_session *self,
+				 struct perf_event_ops *ops)
+{
+	int err;
+	unsigned long head, shift;
+	unsigned long offset = 0;
+	size_t	page_size;
+	event_t *event;
+	uint32_t size;
+	char *buf;
+
+	if (perf_session__register_idle_thread(self) == NULL)
+		return -ENOMEM;
+
+	perf_event_ops__fill_defaults(ops);
+
+	page_size = getpagesize();
+
+	head = self->header.data_offset;
+	self->sample_type = perf_header__sample_type(&self->header);
+
+	err = -EINVAL;
+	if (ops->sample_type_check && ops->sample_type_check(self) < 0)
+		goto out_err;
+
+	if (!ops->full_paths) {
+		char bf[PATH_MAX];
+
+		if (getcwd(bf, sizeof(bf)) == NULL) {
+			err = -errno;
+out_getcwd_err:
+			pr_err("failed to get the current directory\n");
+			goto out_err;
+		}
+		self->cwd = strdup(bf);
+		if (self->cwd == NULL) {
+			err = -ENOMEM;
+			goto out_getcwd_err;
+		}
+		self->cwdlen = strlen(self->cwd);
+	}
+
+	shift = page_size * (head / page_size);
+	offset += shift;
+	head -= shift;
+
+remap:
+	buf = mmap(NULL, page_size * self->mmap_window, PROT_READ,
+		   MAP_SHARED, self->fd, offset);
+	if (buf == MAP_FAILED) {
+		pr_err("failed to mmap file\n");
+		err = -errno;
+		goto out_err;
+	}
+
+more:
+	event = (event_t *)(buf + head);
+
+	size = event->header.size;
+	if (size == 0)
+		size = 8;
+
+	if (head + event->header.size >= page_size * self->mmap_window) {
+		int munmap_ret;
+
+		shift = page_size * (head / page_size);
+
+		munmap_ret = munmap(buf, page_size * self->mmap_window);
+		assert(munmap_ret == 0);
+
+		offset += shift;
+		head -= shift;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	dump_printf("\n%p [%p]: event: %d\n",
+			(void *)(offset + head),
+			(void *)(long)event->header.size,
+			event->header.type);
+
+	if (size == 0 ||
+	    perf_session__process_event(self, event, ops, offset, head) < 0) {
+		dump_printf("%p [%p]: skipping unknown header type: %d\n",
+			    (void *)(offset + head),
+			    (void *)(long)(event->header.size),
+			    event->header.type);
+		/*
+		 * assume we lost track of the stream, check alignment, and
+		 * increment a single u64 in the hope to catch on again 'soon'.
+		 */
+		if (unlikely(head & 7))
+			head &= ~7ULL;
+
+		size = 8;
+	}
+
+	head += size;
+
+	if (offset + head >= self->header.data_offset + self->header.data_size)
+		goto done;
+
+	if (offset + head < self->size)
+		goto more;
+done:
+	err = 0;
+out_err:
+	return err;
+}

From 4a58e61161074776aa34187ea369414ce4852394 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:37:00 -0200
Subject: [PATCH 003/640] perf tools: Move the map class definition to a
 separate header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

And this resulted in the need for adding some missing includes
in some places that were getting the definitions needed out of
sheer luck.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-4-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile            |  1 +
 tools/perf/util/debug.c        |  1 +
 tools/perf/util/event.h        | 65 ++----------------------------
 tools/perf/util/map.h          | 73 ++++++++++++++++++++++++++++++++++
 tools/perf/util/probe-finder.h |  2 +
 5 files changed, 80 insertions(+), 62 deletions(-)
 create mode 100644 tools/perf/util/map.h

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 4172c3b0e4a7..fafea0b6f323 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -357,6 +357,7 @@ LIB_H += util/event.h
 LIB_H += util/exec_cmd.h
 LIB_H += util/types.h
 LIB_H += util/levenshtein.h
+LIB_H += util/map.h
 LIB_H += util/parse-options.h
 LIB_H += util/parse-events.h
 LIB_H += util/quote.h
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 28d520d5a1fb..0905600c3851 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -9,6 +9,7 @@
 #include "color.h"
 #include "event.h"
 #include "debug.h"
+#include "util.h"
 
 int verbose = 0;
 int dump_trace = 0;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 690a96d0467c..80fb3653c809 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,10 +1,10 @@
 #ifndef __PERF_RECORD_H
 #define __PERF_RECORD_H
 
+#include <limits.h>
+
 #include "../perf.h"
-#include "util.h"
-#include <linux/list.h>
-#include <linux/rbtree.h>
+#include "map.h"
 
 /*
  * PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
@@ -101,67 +101,8 @@ struct events_stats {
 
 void event__print_totals(void);
 
-enum map_type {
-	MAP__FUNCTION = 0,
-	MAP__VARIABLE,
-};
-
-#define MAP__NR_TYPES (MAP__VARIABLE + 1)
-
-struct map {
-	union {
-		struct rb_node	rb_node;
-		struct list_head node;
-	};
-	u64			start;
-	u64			end;
-	enum map_type		type;
-	u64			pgoff;
-	u64			(*map_ip)(struct map *, u64);
-	u64			(*unmap_ip)(struct map *, u64);
-	struct dso		*dso;
-};
-
-static inline u64 map__map_ip(struct map *map, u64 ip)
-{
-	return ip - map->start + map->pgoff;
-}
-
-static inline u64 map__unmap_ip(struct map *map, u64 ip)
-{
-	return ip + map->start - map->pgoff;
-}
-
-static inline u64 identity__map_ip(struct map *map __used, u64 ip)
-{
-	return ip;
-}
-
-struct symbol;
-
-typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
-
-void map__init(struct map *self, enum map_type type,
-	       u64 start, u64 end, u64 pgoff, struct dso *dso);
-struct map *map__new(struct mmap_event *event, enum map_type,
-		     char *cwd, int cwdlen);
-void map__delete(struct map *self);
-struct map *map__clone(struct map *self);
-int map__overlap(struct map *l, struct map *r);
-size_t map__fprintf(struct map *self, FILE *fp);
-
 struct perf_session;
 
-int map__load(struct map *self, struct perf_session *session,
-	      symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
-				u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
-					struct perf_session *session,
-					symbol_filter_t filter);
-void map__fixup_start(struct map *self);
-void map__fixup_end(struct map *self);
-
 int event__synthesize_thread(pid_t pid,
 			     int (*process)(event_t *event,
 					    struct perf_session *session),
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
new file mode 100644
index 000000000000..72f0b6ab5ea5
--- /dev/null
+++ b/tools/perf/util/map.h
@@ -0,0 +1,73 @@
+#ifndef __PERF_MAP_H
+#define __PERF_MAP_H
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+enum map_type {
+	MAP__FUNCTION = 0,
+	MAP__VARIABLE,
+};
+
+#define MAP__NR_TYPES (MAP__VARIABLE + 1)
+
+struct dso;
+
+struct map {
+	union {
+		struct rb_node	rb_node;
+		struct list_head node;
+	};
+	u64			start;
+	u64			end;
+	enum map_type		type;
+	u64			pgoff;
+	u64			(*map_ip)(struct map *, u64);
+	u64			(*unmap_ip)(struct map *, u64);
+	struct dso		*dso;
+};
+
+static inline u64 map__map_ip(struct map *map, u64 ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static inline u64 map__unmap_ip(struct map *map, u64 ip)
+{
+	return ip + map->start - map->pgoff;
+}
+
+static inline u64 identity__map_ip(struct map *map __used, u64 ip)
+{
+	return ip;
+}
+
+struct symbol;
+struct mmap_event;
+
+typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+
+void map__init(struct map *self, enum map_type type,
+	       u64 start, u64 end, u64 pgoff, struct dso *dso);
+struct map *map__new(struct mmap_event *event, enum map_type,
+		     char *cwd, int cwdlen);
+void map__delete(struct map *self);
+struct map *map__clone(struct map *self);
+int map__overlap(struct map *l, struct map *r);
+size_t map__fprintf(struct map *self, FILE *fp);
+
+struct perf_session;
+
+int map__load(struct map *self, struct perf_session *session,
+	      symbol_filter_t filter);
+struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
+				u64 addr, symbol_filter_t filter);
+struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
+					struct perf_session *session,
+					symbol_filter_t filter);
+void map__fixup_start(struct map *self);
+void map__fixup_end(struct map *self);
+
+#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index a4086aaddb73..e3f396806e6e 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -1,6 +1,8 @@
 #ifndef _PROBE_FINDER_H
 #define _PROBE_FINDER_H
 
+#include "util.h"
+
 #define MAX_PATH_LEN		 256
 #define MAX_PROBE_BUFFER	1024
 #define MAX_PROBES		 128

From 27295592c22e71bbd38110c302da8dbb43912a60 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:37:01 -0200
Subject: [PATCH 004/640] perf session: Share the common trace sample_check
 routine as perf_session__has_traces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-5-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-kmem.c      | 14 +-------------
 tools/perf/builtin-sched.c     | 14 +-------------
 tools/perf/builtin-timechart.c | 13 +------------
 tools/perf/builtin-trace.c     | 14 +-------------
 tools/perf/util/session.c      | 11 +++++++++++
 tools/perf/util/session.h      |  2 ++
 6 files changed, 17 insertions(+), 51 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fc21ad79dd83..a85936f09f3e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -342,22 +342,10 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	return 0;
 }
 
-static int sample_type_check(struct perf_session *session)
-{
-	if (!(session->sample_type & PERF_SAMPLE_RAW)) {
-		fprintf(stderr,
-			"No trace sample to read. Did you call perf record "
-			"without -R?");
-		return -1;
-	}
-
-	return 0;
-}
-
 static struct perf_event_ops event_ops = {
 	.process_sample_event	= process_sample_event,
 	.process_comm_event	= event__process_comm,
-	.sample_type_check	= sample_type_check,
+	.sample_type_check	= perf_session__has_traces,
 };
 
 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 80209df6cfe8..d65098c42990 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1653,23 +1653,11 @@ static int process_lost_event(event_t *event __used,
 	return 0;
 }
 
-static int sample_type_check(struct perf_session *session __used)
-{
-	if (!(session->sample_type & PERF_SAMPLE_RAW)) {
-		fprintf(stderr,
-			"No trace sample to read. Did you call perf record "
-			"without -R?");
-		return -1;
-	}
-
-	return 0;
-}
-
 static struct perf_event_ops event_ops = {
 	.process_sample_event	= process_sample_event,
 	.process_comm_event	= event__process_comm,
 	.process_lost_event	= process_lost_event,
-	.sample_type_check	= sample_type_check,
+	.sample_type_check	= perf_session__has_traces,
 };
 
 static int read_events(void)
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index a589a43112d6..b42f337c17d9 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1029,23 +1029,12 @@ static void process_samples(struct perf_session *session)
 	}
 }
 
-static int sample_type_check(struct perf_session *session)
-{
-	if (!(session->sample_type & PERF_SAMPLE_RAW)) {
-		fprintf(stderr, "No trace samples found in the file.\n"
-				"Have you used 'perf timechart record' to record it?\n");
-		return -1;
-	}
-
-	return 0;
-}
-
 static struct perf_event_ops event_ops = {
 	.process_comm_event	= process_comm_event,
 	.process_fork_event	= process_fork_event,
 	.process_exit_event	= process_exit_event,
 	.process_sample_event	= queue_sample_event,
-	.sample_type_check	= sample_type_check,
+	.sample_type_check	= perf_session__has_traces,
 };
 
 static int __cmd_timechart(void)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 574a215e800b..b0ba2ac37e2c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -103,22 +103,10 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	return 0;
 }
 
-static int sample_type_check(struct perf_session *session)
-{
-	if (!(session->sample_type & PERF_SAMPLE_RAW)) {
-		fprintf(stderr,
-			"No trace sample to read. Did you call perf record "
-			"without -R?");
-		return -1;
-	}
-
-	return 0;
-}
-
 static struct perf_event_ops event_ops = {
 	.process_sample_event	= process_sample_event,
 	.process_comm_event	= event__process_comm,
-	.sample_type_check	= sample_type_check,
+	.sample_type_check	= perf_session__has_traces,
 };
 
 static int __cmd_trace(struct perf_session *session)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 736d4fda9272..60eab8b3ff34 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -393,3 +393,14 @@ done:
 out_err:
 	return err;
 }
+
+int perf_session__has_traces(struct perf_session *self)
+{
+	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
+		pr_err("No trace sample to read. Did you call perf record "
+		       "without -R?");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 32eaa1bada06..a6951d2f700f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -56,6 +56,8 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
 						struct ip_callchain *chain,
 						struct symbol **parent);
 
+int perf_session__has_traces(struct perf_session *self);
+
 int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
 
 #endif /* __PERF_SESSION_H */

From d549c7690190d9739005e19604faad6da4b802ac Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:37:02 -0200
Subject: [PATCH 005/640] perf session: Remove sample_type_check from event_ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is really something tools need to do before asking for the
events to be processed, leaving perf_session__process_events to
do just that, process events.

Also add a msg parameter to perf_session__has_traces() so that
the right message can be printed, fixing a regression added by
me in the previous cset (right timechart message) and also
fixing 'perf kmem', that was not asking if 'perf kmem record'
was ran.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-6-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-kmem.c      |  6 ++++--
 tools/perf/builtin-report.c    | 16 +++++++++-------
 tools/perf/builtin-sched.c     |  7 ++++---
 tools/perf/builtin-timechart.c |  6 ++++--
 tools/perf/builtin-trace.c     |  4 +++-
 tools/perf/util/session.c      | 16 ++++++----------
 tools/perf/util/session.h      |  3 +--
 7 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index a85936f09f3e..73b065022e27 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -345,7 +345,6 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 static struct perf_event_ops event_ops = {
 	.process_sample_event	= process_sample_event,
 	.process_comm_event	= event__process_comm,
-	.sample_type_check	= perf_session__has_traces,
 };
 
 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -492,11 +491,14 @@ static void sort_result(void)
 
 static int __cmd_kmem(void)
 {
-	int err;
+	int err = -EINVAL;
 	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (!perf_session__has_traces(session, "kmem record"))
+		goto out_delete;
+
 	setup_pager();
 	err = perf_session__process_events(session, &event_ops);
 	if (err != 0)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index db10c0e8ecae..08259184cedb 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -156,14 +156,14 @@ static int process_read_event(event_t *event, struct perf_session *session __use
 	return 0;
 }
 
-static int sample_type_check(struct perf_session *session)
+static int perf_session__setup_sample_type(struct perf_session *self)
 {
-	if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) {
+	if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		if (sort__has_parent) {
 			fprintf(stderr, "selected --sort parent, but no"
 					" callchain data. Did you call"
 					" perf record without -g?\n");
-			return -1;
+			return -EINVAL;
 		}
 		if (symbol_conf.use_callchain) {
 			fprintf(stderr, "selected -g but no callchain data."
@@ -176,7 +176,7 @@ static int sample_type_check(struct perf_session *session)
 			if (register_callchain_param(&callchain_param) < 0) {
 				fprintf(stderr, "Can't register callchain"
 						" params\n");
-				return -1;
+				return -EINVAL;
 			}
 	}
 
@@ -191,13 +191,11 @@ static struct perf_event_ops event_ops = {
 	.process_fork_event	= event__process_task,
 	.process_lost_event	= event__process_lost,
 	.process_read_event	= process_read_event,
-	.sample_type_check	= sample_type_check,
 };
 
-
 static int __cmd_report(void)
 {
-	int ret;
+	int ret = -EINVAL;
 	struct perf_session *session;
 
 	session = perf_session__new(input_name, O_RDONLY, force);
@@ -207,6 +205,10 @@ static int __cmd_report(void)
 	if (show_threads)
 		perf_read_values_init(&show_threads_values);
 
+	ret = perf_session__setup_sample_type(session);
+	if (ret)
+		goto out_delete;
+
 	ret = perf_session__process_events(session, &event_ops);
 	if (ret)
 		goto out_delete;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d65098c42990..e862e71f4e68 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1657,17 +1657,18 @@ static struct perf_event_ops event_ops = {
 	.process_sample_event	= process_sample_event,
 	.process_comm_event	= event__process_comm,
 	.process_lost_event	= process_lost_event,
-	.sample_type_check	= perf_session__has_traces,
 };
 
 static int read_events(void)
 {
-	int err;
+	int err = -EINVAL;
 	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
 	if (session == NULL)
 		return -ENOMEM;
 
-	err = perf_session__process_events(session, &event_ops);
+	if (perf_session__has_traces(session, "record -R"))
+		err = perf_session__process_events(session, &event_ops);
+
 	perf_session__delete(session);
 	return err;
 }
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index b42f337c17d9..825283794985 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1034,17 +1034,19 @@ static struct perf_event_ops event_ops = {
 	.process_fork_event	= process_fork_event,
 	.process_exit_event	= process_exit_event,
 	.process_sample_event	= queue_sample_event,
-	.sample_type_check	= perf_session__has_traces,
 };
 
 static int __cmd_timechart(void)
 {
 	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
-	int ret;
+	int ret = -EINVAL;
 
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (!perf_session__has_traces(session, "timechart record"))
+		goto out_delete;
+
 	ret = perf_session__process_events(session, &event_ops);
 	if (ret)
 		goto out_delete;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b0ba2ac37e2c..e94f34631585 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -106,7 +106,6 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 static struct perf_event_ops event_ops = {
 	.process_sample_event	= process_sample_event,
 	.process_comm_event	= event__process_comm,
-	.sample_type_check	= perf_session__has_traces,
 };
 
 static int __cmd_trace(struct perf_session *session)
@@ -580,6 +579,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (!perf_session__has_traces(session, "record -R"))
+		return -EINVAL;
+
 	if (generate_script_lang) {
 		struct stat perf_stat;
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 60eab8b3ff34..bc84a5217955 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -73,6 +73,8 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
 
 	if (mode == O_RDONLY && perf_session__open(self, force) < 0)
 		goto out_delete;
+
+	self->sample_type = perf_header__sample_type(&self->header);
 out:
 	return self;
 out_free:
@@ -302,11 +304,6 @@ int perf_session__process_events(struct perf_session *self,
 	page_size = getpagesize();
 
 	head = self->header.data_offset;
-	self->sample_type = perf_header__sample_type(&self->header);
-
-	err = -EINVAL;
-	if (ops->sample_type_check && ops->sample_type_check(self) < 0)
-		goto out_err;
 
 	if (!ops->full_paths) {
 		char bf[PATH_MAX];
@@ -394,13 +391,12 @@ out_err:
 	return err;
 }
 
-int perf_session__has_traces(struct perf_session *self)
+bool perf_session__has_traces(struct perf_session *self, const char *msg)
 {
 	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
-		pr_err("No trace sample to read. Did you call perf record "
-		       "without -R?");
-		return -1;
+		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+		return false;
 	}
 
-	return 0;
+	return true;
 }
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index a6951d2f700f..5771ccb3fe03 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -40,7 +40,6 @@ struct perf_event_ops {
 	event_op	process_read_event;
 	event_op	process_throttle_event;
 	event_op	process_unthrottle_event;
-	int		(*sample_type_check)(struct perf_session *session);
 	unsigned long	total_unknown;
 	bool		full_paths;
 };
@@ -56,7 +55,7 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
 						struct ip_callchain *chain,
 						struct symbol **parent);
 
-int perf_session__has_traces(struct perf_session *self);
+bool perf_session__has_traces(struct perf_session *self, const char *msg);
 
 int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
 

From 31d337c4ee3152b7271897eae576251643f5a3b5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:37:03 -0200
Subject: [PATCH 006/640] perf session: Move total_unknown to
 perf_session->unknown events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As this is a session property, not belonging to perf_event_ops,
that can be shared by many perf_session instances.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-7-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/session.c | 3 ++-
 tools/perf/util/session.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index bc84a5217955..4ca427f73994 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -66,6 +66,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
 	self->mmap_window = 32;
 	self->cwd = NULL;
 	self->cwdlen = 0;
+	self->unknown_events = 0;
 	map_groups__init(&self->kmaps);
 
 	if (perf_session__create_kernel_maps(self) < 0)
@@ -239,7 +240,7 @@ static int perf_session__process_event(struct perf_session *self,
 	case PERF_RECORD_UNTHROTTLE:
 		return ops->process_unthrottle_event(event, self);
 	default:
-		ops->total_unknown++;
+		self->unknown_events++;
 		return -1;
 	}
 }
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 5771ccb3fe03..585937b6f9ee 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -20,6 +20,7 @@ struct perf_session {
 	struct thread		*last_match;
 	struct events_stats	events_stats;
 	unsigned long		event_total[PERF_RECORD_MAX];
+	unsigned long		unknown_events;
 	struct rb_root		hists;
 	u64			sample_type;
 	int			fd;
@@ -40,7 +41,6 @@ struct perf_event_ops {
 	event_op	process_read_event;
 	event_op	process_throttle_event;
 	event_op	process_unthrottle_event;
-	unsigned long	total_unknown;
 	bool		full_paths;
 };
 

From f7d87444e6ee6f4a19634e5412664c1c529a2370 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:37:04 -0200
Subject: [PATCH 007/640] perf session: Move full_paths config to symbol_conf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now perf_event_ops has just that, event handlers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-8-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-diff.c   | 2 +-
 tools/perf/builtin-report.c | 2 +-
 tools/perf/util/session.c   | 2 +-
 tools/perf/util/session.h   | 1 -
 tools/perf/util/symbol.h    | 3 ++-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index bd71b8ceafb7..e164b3d45cd4 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -204,7 +204,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
-	OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+	OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
 		    "Don't shorten the pathnames taking into account the cwd"),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 08259184cedb..f695084910c0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -321,7 +321,7 @@ static const struct option options[] = {
 		   "pretty printing style key: normal raw"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
 		   "sort by key(s): pid, comm, dso, symbol, parent"),
-	OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+	OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
 		    "Don't shorten the pathnames taking into account the cwd"),
 	OPT_STRING('p', "parent", &parent_pattern, "regex",
 		   "regex filter to identify parent, see: '--sort parent'"),
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4ca427f73994..4f2eeb584da8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -306,7 +306,7 @@ int perf_session__process_events(struct perf_session *self,
 
 	head = self->header.data_offset;
 
-	if (!ops->full_paths) {
+	if (!symbol_conf.full_paths) {
 		char bf[PATH_MAX];
 
 		if (getcwd(bf, sizeof(bf)) == NULL) {
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 585937b6f9ee..2ff77fea06ef 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -41,7 +41,6 @@ struct perf_event_ops {
 	event_op	process_read_event;
 	event_op	process_throttle_event;
 	event_op	process_unthrottle_event;
-	bool		full_paths;
 };
 
 struct perf_session *perf_session__new(const char *filename, int mode, bool force);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 8aded2356f79..9eabd60f819d 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -58,7 +58,8 @@ struct symbol_conf {
 			sort_by_name,
 			show_nr_samples,
 			use_callchain,
-			exclude_other;
+			exclude_other,
+			full_paths;
 	const char	*vmlinux_name,
 			*field_sep;
 	char            *dso_list_str,

From 55aa640f54280da25046acd2075842d464f451e6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:37:05 -0200
Subject: [PATCH 008/640] perf session: Remove redundant prefix & suffix from
 perf_event_ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since now all that we have are perf event handlers, leave just
the name of the event.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-9-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-annotate.c  |  8 ++---
 tools/perf/builtin-diff.c      | 12 ++++----
 tools/perf/builtin-kmem.c      |  4 +--
 tools/perf/builtin-report.c    | 14 ++++-----
 tools/perf/builtin-sched.c     |  6 ++--
 tools/perf/builtin-timechart.c |  8 ++---
 tools/perf/builtin-trace.c     |  4 +--
 tools/perf/util/session.c      | 54 +++++++++++++++++-----------------
 tools/perf/util/session.h      | 18 ++++++------
 9 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 593ff25006de..117bbae844bf 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -451,10 +451,10 @@ static void perf_session__find_annotations(struct perf_session *self)
 }
 
 static struct perf_event_ops event_ops = {
-	.process_sample_event	= process_sample_event,
-	.process_mmap_event	= event__process_mmap,
-	.process_comm_event	= event__process_comm,
-	.process_fork_event	= event__process_task,
+	.sample	= process_sample_event,
+	.mmap	= event__process_mmap,
+	.comm	= event__process_comm,
+	.fork	= event__process_task,
 };
 
 static int __cmd_annotate(void)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index e164b3d45cd4..1cbecaf029fa 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -66,12 +66,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
 }
 
 static struct perf_event_ops event_ops = {
-	.process_sample_event = diff__process_sample_event,
-	.process_mmap_event   = event__process_mmap,
-	.process_comm_event   = event__process_comm,
-	.process_exit_event   = event__process_task,
-	.process_fork_event   = event__process_task,
-	.process_lost_event   = event__process_lost,
+	.sample	= diff__process_sample_event,
+	.mmap	= event__process_mmap,
+	.comm	= event__process_comm,
+	.exit	= event__process_task,
+	.fork	= event__process_task,
+	.lost	= event__process_lost,
 };
 
 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 73b065022e27..4c06828fe39d 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -343,8 +343,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 }
 
 static struct perf_event_ops event_ops = {
-	.process_sample_event	= process_sample_event,
-	.process_comm_event	= event__process_comm,
+	.sample	= process_sample_event,
+	.comm	= event__process_comm,
 };
 
 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f695084910c0..508934b0140a 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -184,13 +184,13 @@ static int perf_session__setup_sample_type(struct perf_session *self)
 }
 
 static struct perf_event_ops event_ops = {
-	.process_sample_event	= process_sample_event,
-	.process_mmap_event	= event__process_mmap,
-	.process_comm_event	= event__process_comm,
-	.process_exit_event	= event__process_task,
-	.process_fork_event	= event__process_task,
-	.process_lost_event	= event__process_lost,
-	.process_read_event	= process_read_event,
+	.sample	= process_sample_event,
+	.mmap	= event__process_mmap,
+	.comm	= event__process_comm,
+	.exit	= event__process_task,
+	.fork	= event__process_task,
+	.lost	= event__process_lost,
+	.read	= process_read_event,
 };
 
 static int __cmd_report(void)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e862e71f4e68..702322f8fec1 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1654,9 +1654,9 @@ static int process_lost_event(event_t *event __used,
 }
 
 static struct perf_event_ops event_ops = {
-	.process_sample_event	= process_sample_event,
-	.process_comm_event	= event__process_comm,
-	.process_lost_event	= process_lost_event,
+	.sample	= process_sample_event,
+	.comm	= event__process_comm,
+	.lost	= process_lost_event,
 };
 
 static int read_events(void)
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 825283794985..5b68d81d93a1 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1030,10 +1030,10 @@ static void process_samples(struct perf_session *session)
 }
 
 static struct perf_event_ops event_ops = {
-	.process_comm_event	= process_comm_event,
-	.process_fork_event	= process_fork_event,
-	.process_exit_event	= process_exit_event,
-	.process_sample_event	= queue_sample_event,
+	.comm	= process_comm_event,
+	.fork	= process_fork_event,
+	.exit	= process_exit_event,
+	.sample	= queue_sample_event,
 };
 
 static int __cmd_timechart(void)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e94f34631585..1831434aa938 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -104,8 +104,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 }
 
 static struct perf_event_ops event_ops = {
-	.process_sample_event	= process_sample_event,
-	.process_comm_event	= event__process_comm,
+	.sample	= process_sample_event,
+	.comm	= event__process_comm,
 };
 
 static int __cmd_trace(struct perf_session *session)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4f2eeb584da8..7f0537d1add8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -161,24 +161,24 @@ static int process_event_stub(event_t *event __used,
 
 static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
 {
-	if (handler->process_sample_event == NULL)
-		handler->process_sample_event = process_event_stub;
-	if (handler->process_mmap_event == NULL)
-		handler->process_mmap_event = process_event_stub;
-	if (handler->process_comm_event == NULL)
-		handler->process_comm_event = process_event_stub;
-	if (handler->process_fork_event == NULL)
-		handler->process_fork_event = process_event_stub;
-	if (handler->process_exit_event == NULL)
-		handler->process_exit_event = process_event_stub;
-	if (handler->process_lost_event == NULL)
-		handler->process_lost_event = process_event_stub;
-	if (handler->process_read_event == NULL)
-		handler->process_read_event = process_event_stub;
-	if (handler->process_throttle_event == NULL)
-		handler->process_throttle_event = process_event_stub;
-	if (handler->process_unthrottle_event == NULL)
-		handler->process_unthrottle_event = process_event_stub;
+	if (handler->sample == NULL)
+		handler->sample = process_event_stub;
+	if (handler->mmap == NULL)
+		handler->mmap = process_event_stub;
+	if (handler->comm == NULL)
+		handler->comm = process_event_stub;
+	if (handler->fork == NULL)
+		handler->fork = process_event_stub;
+	if (handler->exit == NULL)
+		handler->exit = process_event_stub;
+	if (handler->lost == NULL)
+		handler->lost = process_event_stub;
+	if (handler->read == NULL)
+		handler->read = process_event_stub;
+	if (handler->throttle == NULL)
+		handler->throttle = process_event_stub;
+	if (handler->unthrottle == NULL)
+		handler->unthrottle = process_event_stub;
 }
 
 static const char *event__name[] = {
@@ -222,23 +222,23 @@ static int perf_session__process_event(struct perf_session *self,
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
-		return ops->process_sample_event(event, self);
+		return ops->sample(event, self);
 	case PERF_RECORD_MMAP:
-		return ops->process_mmap_event(event, self);
+		return ops->mmap(event, self);
 	case PERF_RECORD_COMM:
-		return ops->process_comm_event(event, self);
+		return ops->comm(event, self);
 	case PERF_RECORD_FORK:
-		return ops->process_fork_event(event, self);
+		return ops->fork(event, self);
 	case PERF_RECORD_EXIT:
-		return ops->process_exit_event(event, self);
+		return ops->exit(event, self);
 	case PERF_RECORD_LOST:
-		return ops->process_lost_event(event, self);
+		return ops->lost(event, self);
 	case PERF_RECORD_READ:
-		return ops->process_read_event(event, self);
+		return ops->read(event, self);
 	case PERF_RECORD_THROTTLE:
-		return ops->process_throttle_event(event, self);
+		return ops->throttle(event, self);
 	case PERF_RECORD_UNTHROTTLE:
-		return ops->process_unthrottle_event(event, self);
+		return ops->unthrottle(event, self);
 	default:
 		self->unknown_events++;
 		return -1;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 2ff77fea06ef..77c5ee2993c2 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -32,15 +32,15 @@ struct perf_session {
 typedef int (*event_op)(event_t *self, struct perf_session *session);
 
 struct perf_event_ops {
-	event_op	process_sample_event;
-	event_op	process_mmap_event;
-	event_op	process_comm_event;
-	event_op	process_fork_event;
-	event_op	process_exit_event;
-	event_op	process_lost_event;
-	event_op	process_read_event;
-	event_op	process_throttle_event;
-	event_op	process_unthrottle_event;
+	event_op sample,
+		 mmap,
+		 comm,
+		 fork,
+		 exit,
+		 lost,
+		 read,
+		 throttle,
+		 unthrottle;
 };
 
 struct perf_session *perf_session__new(const char *filename, int mode, bool force);

From 4cf40131a5cf4918e83b3756e58a1fc9e984f8ef Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 27 Dec 2009 21:37:06 -0200
Subject: [PATCH 009/640] perf record: Introduce a symtab cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now a cache will be created in a ~/.debug debuginfo like
hierarchy, so that at the end of a 'perf record' session all the
binaries (with build-ids) involved get collected and indexed by
their build-ids, so that perf report can find them.

This is interesting when developing software where you want to
do a 'perf diff' with the previous build and opens avenues for
lots more interesting tools, like a 'perf diff --graph' that
takes more than two binaries into account.

Tunables for collecting just the symtabs can be added if one
doesn't want to have the full binary, but having the full binary
allows things like 'perf rerecord' or other tools that can
re-run the tests by having access to the exact binary in some
perf.data file, so it may well be interesting to keep the full
binary there.

Space consumption is minimised by trying to use hard links, a
'perf cache' tool to manage the space used, a la ccache is
required to purge older entries.

With this in place it will be possible also to introduce new
commands, 'perf archive' and 'perf restore' (or some more
suitable and future proof names) to create a cpio/tar file with
the perf data and the files in the cache that _had_ perf hits of
interest.

There are more aspects to polish, like finding the right vmlinux
file to cache, etc, but this is enough for a first step.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1261957026-15580-10-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile      |  1 +
 tools/perf/util/header.c | 82 +++++++++++++++++++++++++++++++++++++---
 tools/perf/util/symbol.c | 17 +++++++--
 tools/perf/util/symbol.h |  2 +
 tools/perf/util/util.c   | 69 +++++++++++++++++++++++++++++++++
 tools/perf/util/util.h   |  3 ++
 6 files changed, 165 insertions(+), 9 deletions(-)
 create mode 100644 tools/perf/util/util.c

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index fafea0b6f323..7c846424aebf 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -425,6 +425,7 @@ LIB_OBJS += util/svghelper.o
 LIB_OBJS += util/sort.o
 LIB_OBJS += util/hist.o
 LIB_OBJS += util/probe-event.o
+LIB_OBJS += util/util.o
 
 BUILTIN_OBJS += builtin-annotate.o
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8a0bca55106f..df237c3a041b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -169,20 +169,23 @@ static int do_write(int fd, const void *buf, size_t size)
 	return 0;
 }
 
+#define dsos__for_each_with_build_id(pos, head)	\
+	list_for_each_entry(pos, head, node)	\
+		if (!pos->has_build_id)		\
+			continue;		\
+		else
+
 static int __dsos__write_buildid_table(struct list_head *head, int fd)
 {
 #define NAME_ALIGN	64
 	struct dso *pos;
 	static const char zero_buf[NAME_ALIGN];
 
-	list_for_each_entry(pos, head, node) {
+	dsos__for_each_with_build_id(pos, head) {
 		int err;
 		struct build_id_event b;
-		size_t len;
+		size_t len = pos->long_name_len + 1;
 
-		if (!pos->has_build_id)
-			continue;
-		len = pos->long_name_len + 1;
 		len = ALIGN(len, NAME_ALIGN);
 		memset(&b, 0, sizeof(b));
 		memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
@@ -209,6 +212,74 @@ static int dsos__write_buildid_table(int fd)
 	return err;
 }
 
+static int dso__cache_build_id(struct dso *self, const char *debugdir)
+{
+	const size_t size = PATH_MAX;
+	char *filename = malloc(size),
+	     *linkname = malloc(size), *targetname, *sbuild_id;
+	int len, err = -1;
+
+	if (filename == NULL || linkname == NULL)
+		goto out_free;
+
+	len = snprintf(filename, size, "%s%s", debugdir, self->long_name);
+	if (mkdir_p(filename, 0755))
+		goto out_free;
+
+	len += snprintf(filename + len, sizeof(filename) - len, "/");
+	sbuild_id = filename + len;
+	build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id);
+
+	if (access(filename, F_OK) && link(self->long_name, filename) &&
+	    copyfile(self->long_name, filename))
+		goto out_free;
+
+	len = snprintf(linkname, size, "%s/.build-id/%.2s",
+		       debugdir, sbuild_id);
+
+	if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+		goto out_free;
+
+	snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
+	targetname = filename + strlen(debugdir) - 5;
+	memcpy(targetname, "../..", 5);
+
+	if (symlink(targetname, linkname) == 0)
+		err = 0;
+out_free:
+	free(filename);
+	free(linkname);
+	return err;
+}
+
+static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
+{
+	struct dso *pos;
+	int err = 0;
+
+	dsos__for_each_with_build_id(pos, head)
+		if (dso__cache_build_id(pos, debugdir))
+			err = -1;
+
+	return err;
+}
+
+static int dsos__cache_build_ids(void)
+{
+	int err_kernel, err_user;
+	char debugdir[PATH_MAX];
+
+	snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
+		 DEBUG_CACHE_DIR);
+
+	if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
+		return -1;
+
+	err_kernel = __dsos__cache_build_ids(&dsos__kernel, debugdir);
+	err_user   = __dsos__cache_build_ids(&dsos__user, debugdir);
+	return err_kernel || err_user ? -1 : 0;
+}
+
 static int perf_header__adds_write(struct perf_header *self, int fd)
 {
 	int nr_sections;
@@ -258,6 +329,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 			goto out_free;
 		}
 		buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
+		dsos__cache_build_ids();
 	}
 
 	lseek(fd, sec_start, SEEK_SET);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ab92763edb03..79ca6a099f96 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -22,6 +22,7 @@
 enum dso_origin {
 	DSO__ORIG_KERNEL = 0,
 	DSO__ORIG_JAVA_JIT,
+	DSO__ORIG_BUILD_ID_CACHE,
 	DSO__ORIG_FEDORA,
 	DSO__ORIG_UBUNTU,
 	DSO__ORIG_BUILDID,
@@ -1191,6 +1192,7 @@ char dso__symtab_origin(const struct dso *self)
 	static const char origin[] = {
 		[DSO__ORIG_KERNEL] =   'k',
 		[DSO__ORIG_JAVA_JIT] = 'j',
+		[DSO__ORIG_BUILD_ID_CACHE] = 'B',
 		[DSO__ORIG_FEDORA] =   'f',
 		[DSO__ORIG_UBUNTU] =   'u',
 		[DSO__ORIG_BUILDID] =  'b',
@@ -1209,6 +1211,7 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session,
 	int size = PATH_MAX;
 	char *name;
 	u8 build_id[BUILD_ID_SIZE];
+	char build_id_hex[BUILD_ID_SIZE * 2 + 1];
 	int ret = -1;
 	int fd;
 
@@ -1230,8 +1233,16 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session,
 		return ret;
 	}
 
-	self->origin = DSO__ORIG_FEDORA - 1;
+	self->origin = DSO__ORIG_BUILD_ID_CACHE;
 
+	if (self->has_build_id) {
+		build_id__sprintf(self->build_id, sizeof(self->build_id),
+				  build_id_hex);
+		snprintf(name, size, "%s/%s/.build-id/%.2s/%s",
+			 getenv("HOME"), DEBUG_CACHE_DIR,
+			 build_id_hex, build_id_hex + 2);
+		goto open_file;
+	}
 more:
 	do {
 		self->origin++;
@@ -1247,8 +1258,6 @@ more:
 		case DSO__ORIG_BUILDID:
 			if (filename__read_build_id(self->long_name, build_id,
 						    sizeof(build_id))) {
-				char build_id_hex[BUILD_ID_SIZE * 2 + 1];
-
 				build_id__sprintf(build_id, sizeof(build_id),
 						  build_id_hex);
 				snprintf(name, size,
@@ -1276,7 +1285,7 @@ compare_build_id:
 			if (!dso__build_id_equal(self, build_id))
 				goto more;
 		}
-
+open_file:
 		fd = open(name, O_RDONLY);
 	} while (fd < 0);
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 9eabd60f819d..f27e158943e9 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -8,6 +8,8 @@
 #include <linux/rbtree.h>
 #include "event.h"
 
+#define DEBUG_CACHE_DIR ".debug"
+
 #ifdef HAVE_CPLUS_DEMANGLE
 extern char *cplus_demangle(const char *, int);
 
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
new file mode 100644
index 000000000000..f3c0798a5e78
--- /dev/null
+++ b/tools/perf/util/util.c
@@ -0,0 +1,69 @@
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include "util.h"
+
+int mkdir_p(char *path, mode_t mode)
+{
+	struct stat st;
+	int err;
+	char *d = path;
+
+	if (*d != '/')
+		return -1;
+
+	if (stat(path, &st) == 0)
+		return 0;
+
+	while (*++d == '/');
+
+	while ((d = strchr(d, '/'))) {
+		*d = '\0';
+		err = stat(path, &st) && mkdir(path, mode);
+		*d++ = '/';
+		if (err)
+			return -1;
+		while (*d == '/')
+			++d;
+	}
+	return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
+}
+
+int copyfile(const char *from, const char *to)
+{
+	int fromfd, tofd;
+	struct stat st;
+	void *addr;
+	int err = -1;
+
+	if (stat(from, &st))
+		goto out;
+
+	fromfd = open(from, O_RDONLY);
+	if (fromfd < 0)
+		goto out;
+
+	tofd = creat(to, 0755);
+	if (tofd < 0)
+		goto out_close_from;
+
+	addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0);
+	if (addr == MAP_FAILED)
+		goto out_close_to;
+
+	if (write(tofd, addr, st.st_size) == st.st_size)
+		err = 0;
+
+	munmap(addr, st.st_size);
+out_close_to:
+	close(tofd);
+	if (err)
+		unlink(to);
+out_close_from:
+	close(fromfd);
+out:
+	return err;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c673d8825883..0f5b2a6f1080 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -403,4 +403,7 @@ void git_qsort(void *base, size_t nmemb, size_t size,
 #endif
 #endif
 
+int mkdir_p(char *path, mode_t mode);
+int copyfile(const char *from, const char *to);
+
 #endif

From 49f474331e563a6ecf3b1e87ec27ec5482b3e4f1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sun, 27 Dec 2009 11:51:52 +0100
Subject: [PATCH 010/640] perf events: Remove arg from perf sched hooks

Since we only ever schedule the local cpu, there is no need to pass the
cpu number to the perf sched hooks.

This micro-optimizes things a bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 12 ++++++------
 kernel/perf_event.c        | 27 ++++++++++++++-------------
 kernel/sched.c             |  6 +++---
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f75eea..a494e7501292 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -746,10 +746,10 @@ extern int perf_max_events;
 
 extern const struct pmu *hw_perf_event_init(struct perf_event *event);
 
-extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task,
-					struct task_struct *next, int cpu);
-extern void perf_event_task_tick(struct task_struct *task, int cpu);
+					struct task_struct *next);
+extern void perf_event_task_tick(struct task_struct *task);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -870,12 +870,12 @@ extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 #else
 static inline void
-perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
+perf_event_task_sched_in(struct task_struct *task)			{ }
 static inline void
 perf_event_task_sched_out(struct task_struct *task,
-			    struct task_struct *next, int cpu)		{ }
+			    struct task_struct *next)			{ }
 static inline void
-perf_event_task_tick(struct task_struct *task, int cpu)			{ }
+perf_event_task_tick(struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 03cc061398d1..099bd662daa6 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1170,9 +1170,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
  * not restart the event.
  */
 void perf_event_task_sched_out(struct task_struct *task,
-				 struct task_struct *next, int cpu)
+				 struct task_struct *next)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = task->perf_event_ctxp;
 	struct perf_event_context *next_ctx;
 	struct perf_event_context *parent;
@@ -1252,8 +1252,9 @@ static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
 
 static void
 __perf_event_sched_in(struct perf_event_context *ctx,
-			struct perf_cpu_context *cpuctx, int cpu)
+			struct perf_cpu_context *cpuctx)
 {
+	int cpu = smp_processor_id();
 	struct perf_event *event;
 	int can_add_hw = 1;
 
@@ -1326,24 +1327,24 @@ __perf_event_sched_in(struct perf_event_context *ctx,
  * accessing the event control register. If a NMI hits, then it will
  * keep the event running.
  */
-void perf_event_task_sched_in(struct task_struct *task, int cpu)
+void perf_event_task_sched_in(struct task_struct *task)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = task->perf_event_ctxp;
 
 	if (likely(!ctx))
 		return;
 	if (cpuctx->task_ctx == ctx)
 		return;
-	__perf_event_sched_in(ctx, cpuctx, cpu);
+	__perf_event_sched_in(ctx, cpuctx);
 	cpuctx->task_ctx = ctx;
 }
 
-static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx)
 {
 	struct perf_event_context *ctx = &cpuctx->ctx;
 
-	__perf_event_sched_in(ctx, cpuctx, cpu);
+	__perf_event_sched_in(ctx, cpuctx);
 }
 
 #define MAX_INTERRUPTS (~0ULL)
@@ -1461,7 +1462,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	raw_spin_unlock(&ctx->lock);
 }
 
-void perf_event_task_tick(struct task_struct *curr, int cpu)
+void perf_event_task_tick(struct task_struct *curr)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
@@ -1469,7 +1470,7 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
 	if (!atomic_read(&nr_events))
 		return;
 
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	cpuctx = &__get_cpu_var(perf_cpu_context);
 	ctx = curr->perf_event_ctxp;
 
 	perf_ctx_adjust_freq(&cpuctx->ctx);
@@ -1484,9 +1485,9 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
 	if (ctx)
 		rotate_ctx(ctx);
 
-	perf_event_cpu_sched_in(cpuctx, cpu);
+	perf_event_cpu_sched_in(cpuctx);
 	if (ctx)
-		perf_event_task_sched_in(curr, cpu);
+		perf_event_task_sched_in(curr);
 }
 
 /*
@@ -1527,7 +1528,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 
 	raw_spin_unlock(&ctx->lock);
 
-	perf_event_task_sched_in(task, smp_processor_id());
+	perf_event_task_sched_in(task);
  out:
 	local_irq_restore(flags);
 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 18cceeecce35..d6527ac0f6e7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2752,7 +2752,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
-	perf_event_task_sched_in(current, cpu_of(rq));
+	perf_event_task_sched_in(current);
 	finish_lock_switch(rq, prev);
 
 	fire_sched_in_preempt_notifiers(current);
@@ -5266,7 +5266,7 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	raw_spin_unlock(&rq->lock);
 
-	perf_event_task_tick(curr, cpu);
+	perf_event_task_tick(curr);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
@@ -5480,7 +5480,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
-		perf_event_task_sched_out(prev, next, cpu);
+		perf_event_task_sched_out(prev, next);
 
 		rq->nr_switches++;
 		rq->curr = next;

From fd2a50a0240f5f5b59070474eabd83a85720a406 Mon Sep 17 00:00:00 2001
From: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Date: Thu, 24 Dec 2009 01:54:47 +0000
Subject: [PATCH 011/640] x86, perfctr: Remove unused func
 avail_to_resrv_perfctr_nmi()

avail_to_resrv_perfctr_nmi() is neither EXPORT'd, nor used in
the file. So remove it.

Signed-off-by: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: oprofile-list@lists.sf.net
LKML-Reference: <20091224015441.6005.4408.sendpatchset@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/nmi.h             |  1 -
 arch/x86/kernel/cpu/perfctr-watchdog.c | 11 -----------
 2 files changed, 12 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 139d4c1a33a7..93da9c3f3341 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
 extern int nmi_watchdog_enabled;
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
-extern int avail_to_resrv_perfctr_nmi(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df9719afb..74f4e85a5727 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 
 	return !test_bit(counter, perfctr_nmi_owner);
 }
-
-/* checks the an msr for availability */
-int avail_to_resrv_perfctr_nmi(unsigned int msr)
-{
-	unsigned int counter;
-
-	counter = nmi_perfctr_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
-	return !test_bit(counter, perfctr_nmi_owner);
-}
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 
 int reserve_perfctr_nmi(unsigned int msr)

From 659d8cfbb225f1fa5a4f8671a847ef3ab5a89660 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Sat, 19 Dec 2009 16:40:28 -0500
Subject: [PATCH 012/640] perf tools: Do a few more directory handling
 optimizations

A few more optimizations for perf when dealing with directories.

Some of them significantly cut down the work which has to be
done. d_type should always be set; otherwise fix the kernel
code. And there are functions available to parse fstab-like
files, so use them.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: a.p.zijlstra@chello.nl
Cc: acme@redhat.com
Cc: eranian@google.com
Cc: fweisbec@gmail.com
Cc: lizf@cn.fujitsu.com
Cc: paulus@samba.org
Cc: xiaoguangrong@cn.fujitsu.com
LKML-Reference: <200912192140.nBJLeSfA028905@hs20-bc2-1.build.redhat.com>
[ v2: two small stylistic fixlets ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-kmem.c          | 17 ++++------
 tools/perf/util/trace-event-info.c | 50 ++++++++++++++----------------
 2 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4c06828fe39d..05dc5a735039 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -92,23 +92,18 @@ static void setup_cpunode_map(void)
 	if (!dir1)
 		return;
 
-	while (true) {
-		dent1 = readdir(dir1);
-		if (!dent1)
-			break;
-
-		if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+	while ((dent1 = readdir(dir1)) != NULL) {
+		if (dent1->d_type != DT_DIR ||
+		    sscanf(dent1->d_name, "node%u", &mem) < 1)
 			continue;
 
 		snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
 		dir2 = opendir(buf);
 		if (!dir2)
 			continue;
-		while (true) {
-			dent2 = readdir(dir2);
-			if (!dent2)
-				break;
-			if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+		while ((dent2 = readdir(dir2)) != NULL) {
+			if (dent2->d_type != DT_LNK ||
+			    sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
 				continue;
 			cpunode_map[cpu] = mem;
 		}
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index cace35595530..dfef238ce158 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -20,6 +20,7 @@
  */
 #define _GNU_SOURCE
 #include <dirent.h>
+#include <mntent.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -103,29 +104,29 @@ static const char *find_debugfs(void)
 {
 	static char debugfs[MAX_PATH+1];
 	static int debugfs_found;
-	char type[100];
 	FILE *fp;
+	struct mntent *m;
 
 	if (debugfs_found)
 		return debugfs;
 
-	if ((fp = fopen("/proc/mounts","r")) == NULL)
+	fp = setmntent("/proc/mounts", "r");
+	if (!fp)
 		die("Can't open /proc/mounts for read");
 
-	while (fscanf(fp, "%*s %"
-		      STR(MAX_PATH)
-		      "s %99s %*s %*d %*d\n",
-		      debugfs, type) == 2) {
-		if (strcmp(type, "debugfs") == 0)
+	while ((m = getmntent(fp)) != NULL) {
+		if (strcmp(m->mnt_type, "debugfs") == 0) {
+			strcpy(debugfs, m->mnt_dir);
+			debugfs_found = 1;
 			break;
+		}
 	}
-	fclose(fp);
 
-	if (strcmp(type, "debugfs") != 0)
+	endmntent(fp);
+
+	if (!debugfs_found)
 		die("debugfs not mounted, please mount");
 
-	debugfs_found = 1;
-
 	return debugfs;
 }
 
@@ -317,7 +318,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
 		die("can't read directory '%s'", sys);
 
 	while ((dent = readdir(dir))) {
-		if (strcmp(dent->d_name, ".") == 0 ||
+		if (dent->d_type != DT_DIR ||
+		    strcmp(dent->d_name, ".") == 0 ||
 		    strcmp(dent->d_name, "..") == 0 ||
 		    !name_in_tp_list(dent->d_name, tps))
 			continue;
@@ -334,7 +336,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
 
 	rewinddir(dir);
 	while ((dent = readdir(dir))) {
-		if (strcmp(dent->d_name, ".") == 0 ||
+		if (dent->d_type != DT_DIR ||
+		    strcmp(dent->d_name, ".") == 0 ||
 		    strcmp(dent->d_name, "..") == 0 ||
 		    !name_in_tp_list(dent->d_name, tps))
 			continue;
@@ -394,26 +397,21 @@ static void read_event_files(struct tracepoint_path *tps)
 		die("can't read directory '%s'", path);
 
 	while ((dent = readdir(dir))) {
-		if (strcmp(dent->d_name, ".") == 0 ||
+		if (dent->d_type != DT_DIR ||
+		    strcmp(dent->d_name, ".") == 0 ||
 		    strcmp(dent->d_name, "..") == 0 ||
 		    strcmp(dent->d_name, "ftrace") == 0 ||
 		    !system_in_tp_list(dent->d_name, tps))
 			continue;
-		sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2);
-		sprintf(sys, "%s/%s", path, dent->d_name);
-		ret = stat(sys, &st);
-		free(sys);
-		if (ret < 0)
-			continue;
-		if (S_ISDIR(st.st_mode))
-			count++;
+		count++;
 	}
 
 	write_or_die(&count, 4);
 
 	rewinddir(dir);
 	while ((dent = readdir(dir))) {
-		if (strcmp(dent->d_name, ".") == 0 ||
+		if (dent->d_type != DT_DIR ||
+		    strcmp(dent->d_name, ".") == 0 ||
 		    strcmp(dent->d_name, "..") == 0 ||
 		    strcmp(dent->d_name, "ftrace") == 0 ||
 		    !system_in_tp_list(dent->d_name, tps))
@@ -422,10 +420,8 @@ static void read_event_files(struct tracepoint_path *tps)
 		sprintf(sys, "%s/%s", path, dent->d_name);
 		ret = stat(sys, &st);
 		if (ret >= 0) {
-			if (S_ISDIR(st.st_mode)) {
-				write_or_die(dent->d_name, strlen(dent->d_name) + 1);
-				copy_event_system(sys, tps);
-			}
+			write_or_die(dent->d_name, strlen(dent->d_name) + 1);
+			copy_event_system(sys, tps);
 		}
 		free(sys);
 	}

From 07b139c8c81b97bbe55c68daf0cbeca8b1c609ca Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 21 Dec 2009 14:27:35 +0800
Subject: [PATCH 013/640] perf events: Remove CONFIG_EVENT_PROFILE

Quoted from Ingo:

| This reminds me - i think we should eliminate CONFIG_EVENT_PROFILE -
| it's an unnecessary Kconfig complication. If both PERF_EVENTS and
| EVENT_TRACING is enabled we should expose generic tracepoints.
|
| Nor is it limited to event 'profiling', so it has become a misnomer as
| well.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <4B2F1557.2050705@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  2 +-
 include/linux/perf_event.h         |  2 +-
 include/linux/syscalls.h           |  4 ++--
 include/trace/ftrace.h             | 12 ++++++------
 include/trace/syscall.h            |  4 ++--
 init/Kconfig                       | 13 -------------
 kernel/perf_event.c                |  4 ++--
 kernel/trace/Makefile              |  4 +++-
 kernel/trace/trace_events_filter.c |  4 ++--
 kernel/trace/trace_kprobe.c        | 14 +++++++-------
 kernel/trace/trace_syscalls.c      |  5 ++---
 11 files changed, 28 insertions(+), 40 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2233c98d80df..0a09e758c7d3 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -188,7 +188,7 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 struct perf_event;
 extern int ftrace_profile_enable(int event_id);
 extern void ftrace_profile_disable(int event_id);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a494e7501292..9a1d276db754 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -658,7 +658,7 @@ struct perf_event {
 
 	perf_overflow_handler_t		overflow_handler;
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
 	struct event_filter		*filter;
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e90d6f6..b7c7fcf7790b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,7 +99,7 @@ struct perf_event_attr;
 #define __SC_TEST5(t5, a5, ...)	__SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
 #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
 	.profile_enable = prof_sysenter_enable,				       \
@@ -113,7 +113,7 @@ struct perf_event_attr;
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)
 #define TRACE_SYS_EXIT_PROFILE(sname)
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 #define __SC_STR_ADECL1(t, a)		#a
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 73523151a731..2fdd36df41f6 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -498,7 +498,7 @@ static inline int ftrace_get_offsets_##call(				\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 /*
  * Generate the functions needed for tracepoint perf_event support.
@@ -541,7 +541,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#endif
+#endif /* CONFIG_PERF_EVENTS */
 
 /*
  * Stage 4 of the trace events.
@@ -626,7 +626,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  */
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 #define _TRACE_PROFILE_INIT(call)					\
 	.profile_enable = ftrace_profile_enable_##call,			\
@@ -634,7 +634,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 
 #else
 #define _TRACE_PROFILE_INIT(call)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
 
 #undef __entry
 #define __entry entry
@@ -834,7 +834,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  * }
  */
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 #undef __perf_addr
 #define __perf_addr(a) __addr = (a)
@@ -926,7 +926,7 @@ static void ftrace_profile_##call(proto)			\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
 
 #undef _TRACE_PROFILE_INIT
 
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 961fda3556bb..3d463dcef298 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -49,12 +49,12 @@ ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
-#ifdef CONFIG_EVENT_PROFILE
+
+#ifdef CONFIG_PERF_EVENTS
 int prof_sysenter_enable(struct ftrace_event_call *call);
 void prof_sysenter_disable(struct ftrace_event_call *call);
 int prof_sysexit_enable(struct ftrace_event_call *call);
 void prof_sysexit_disable(struct ftrace_event_call *call);
-
 #endif
 
 #endif /* _TRACE_SYSCALL_H */
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..06dab27c18d9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -966,19 +966,6 @@ config PERF_EVENTS
 
 	  Say Y if unsure.
 
-config EVENT_PROFILE
-	bool "Tracepoint profiling sources"
-	depends on PERF_EVENTS && EVENT_TRACING
-	default y
-	help
-	 Allow the use of tracepoints as software performance events.
-
-	 When this is enabled, you can create perf events based on
-	 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
-	 found in debugfs://tracing/events/*/*/id. (The -e/--events
-	 option to the perf tool can parse and interpret symbolic
-	 tracepoints, in the subsystem:tracepoint_name format.)
-
 config PERF_COUNTERS
 	bool "Kernel performance counters (old config option)"
 	depends on HAVE_PERF_EVENTS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 099bd662daa6..5b987b4a98a8 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4177,7 +4177,7 @@ static const struct pmu perf_ops_task_clock = {
 	.read		= task_clock_perf_event_read,
 };
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
 
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
@@ -4282,7 +4282,7 @@ static void perf_event_free_filter(struct perf_event *event)
 {
 }
 
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_EVENT_TRACING */
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 static void bp_perf_event_destroy(struct perf_event *event)
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
-obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
+ifeq ($(CONFIG_PERF_EVENTS),y)
+obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..74563d7e102e 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1360,7 +1360,7 @@ out_unlock:
 	return err;
 }
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 void ftrace_profile_free_filter(struct perf_event *event)
 {
@@ -1428,5 +1428,5 @@ out_unlock:
 	return err;
 }
 
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 375f81a568dc..75d75dec226a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1249,7 +1249,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
 					 ", REC->" FIELD_STRING_RETIP);
 }
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
 static __kprobes int kprobe_profile_func(struct kprobe *kp,
@@ -1407,7 +1407,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
 			disable_kprobe(&tp->rp.kp);
 	}
 }
-#endif	/* CONFIG_EVENT_PROFILE */
+#endif	/* CONFIG_PERF_EVENTS */
 
 
 static __kprobes
@@ -1417,10 +1417,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 
 	if (tp->flags & TP_FLAG_TRACE)
 		kprobe_trace_func(kp, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
 		kprobe_profile_func(kp, regs);
-#endif	/* CONFIG_EVENT_PROFILE */
+#endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
 
@@ -1431,10 +1431,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 
 	if (tp->flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(ri, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
 		kretprobe_profile_func(ri, regs);
-#endif	/* CONFIG_EVENT_PROFILE */
+#endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
 
@@ -1463,7 +1463,7 @@ static int register_probe_event(struct trace_probe *tp)
 	call->regfunc = probe_event_enable;
 	call->unregfunc = probe_event_disable;
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 	call->profile_enable = probe_profile_enable;
 	call->profile_disable = probe_profile_disable;
 #endif
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..f694f66d75b0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void)
 }
 core_initcall(init_ftrace_syscalls);
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -626,6 +626,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
-#endif
-
+#endif /* CONFIG_PERF_EVENTS */
 

From 29c52aa2300173dd45df04dae1f5acc81a2c93b1 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 28 Dec 2009 16:47:12 +0800
Subject: [PATCH 014/640] perf tools: Mount debugfs automatically

Mount debugfs filesystem under '/sys/kernel/debug', if it's not
mounted.

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Clark Williams <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <4B387090.7080407@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/perf.c         |  2 +-
 tools/perf/util/debugfs.c | 16 +++++++---------
 tools/perf/util/debugfs.h |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 873e55fab375..fc89005c3e51 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -388,7 +388,7 @@ static int run_argv(int *argcp, const char ***argv)
 /* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
 static void get_debugfs_mntpt(void)
 {
-	const char *path = debugfs_find_mountpoint();
+	const char *path = debugfs_mount(NULL);
 
 	if (path)
 		strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index 06b73ee02c49..1f805fde5fd4 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -106,16 +106,14 @@ int debugfs_valid_entry(const char *path)
 	return 0;
 }
 
-/* mount the debugfs somewhere */
+/* mount the debugfs somewhere if it's not mounted */
 
-int debugfs_mount(const char *mountpoint)
+char *debugfs_mount(const char *mountpoint)
 {
-	char mountcmd[128];
-
 	/* see if it's already mounted */
 	if (debugfs_find_mountpoint()) {
 		debugfs_premounted = 1;
-		return 0;
+		return debugfs_mountpoint;
 	}
 
 	/* if not mounted and no argument */
@@ -127,13 +125,13 @@ int debugfs_mount(const char *mountpoint)
 			mountpoint = "/sys/kernel/debug";
 	}
 
+	if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0)
+		return NULL;
+
 	/* save the mountpoint */
 	strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
 
-	/* mount it */
-	snprintf(mountcmd, sizeof(mountcmd),
-		 "/bin/mount -t debugfs debugfs %s", mountpoint);
-	return system(mountcmd);
+	return debugfs_mountpoint;
 }
 
 /* umount the debugfs */
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 3cd14f9ae784..83a02879745f 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -15,7 +15,7 @@
 extern const char *debugfs_find_mountpoint(void);
 extern int debugfs_valid_mountpoint(const char *debugfs);
 extern int debugfs_valid_entry(const char *path);
-extern int debugfs_mount(const char *mountpoint);
+extern char *debugfs_mount(const char *mountpoint);
 extern int debugfs_umount(void);
 extern int debugfs_write(const char *entry, const char *value);
 extern int debugfs_read(const char *entry, char *buffer, size_t size);

From 61be3e59ba7a6dbd39f92fd1f107285a0caeb008 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 28 Dec 2009 16:48:30 +0800
Subject: [PATCH 015/640] perf trace: Clean up find_debugfs()

Remove redundant code for 'perf trace'

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Clark Williams <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <4B3870DE.7090500@cn.fujitsu.com>
[ v2: resolved conflicts with recent changes ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/debugfs.c          |  1 +
 tools/perf/util/trace-event-info.c | 29 +++++------------------------
 2 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index 1f805fde5fd4..a88fefc0cc0a 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -130,6 +130,7 @@ char *debugfs_mount(const char *mountpoint)
 
 	/* save the mountpoint */
 	strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
+	debugfs_found = 1;
 
 	return debugfs_mountpoint;
 }
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index dfef238ce158..535176dc95b6 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -38,6 +38,7 @@
 
 #include "../perf.h"
 #include "trace-event.h"
+#include "debugfs.h"
 
 #define VERSION "0.5"
 
@@ -102,32 +103,12 @@ void *malloc_or_die(unsigned int size)
 
 static const char *find_debugfs(void)
 {
-	static char debugfs[MAX_PATH+1];
-	static int debugfs_found;
-	FILE *fp;
-	struct mntent *m;
+	const char *path = debugfs_mount(NULL);
 
-	if (debugfs_found)
-		return debugfs;
+	if (!path)
+		die("Your kernel not support debugfs filesystem");
 
-	fp = setmntent("/proc/mounts", "r");
-	if (!fp)
-		die("Can't open /proc/mounts for read");
-
-	while ((m = getmntent(fp)) != NULL) {
-		if (strcmp(m->mnt_type, "debugfs") == 0) {
-			strcpy(debugfs, m->mnt_dir);
-			debugfs_found = 1;
-			break;
-		}
-	}
-
-	endmntent(fp);
-
-	if (!debugfs_found)
-		die("debugfs not mounted, please mount");
-
-	return debugfs;
+	return path;
 }
 
 /*

From 9967411e5b324a908e344d6ce66b77bd5d372c3e Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 28 Dec 2009 16:49:38 +0800
Subject: [PATCH 016/640] perf trace: Fix forgotten close of file/dir

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Clark Williams <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <4B387122.7090801@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/trace-event-info.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 535176dc95b6..407fd65b6cdb 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -253,6 +253,8 @@ static void read_header_files(void)
 	write_or_die("header_page", 12);
 	write_or_die(&size, 8);
 	check_size = copy_file_fd(fd);
+	close(fd);
+
 	if (size != check_size)
 		die("wrong size for '%s' size=%lld read=%lld",
 		    path, size, check_size);
@@ -271,6 +273,7 @@ static void read_header_files(void)
 	if (size != check_size)
 		die("wrong size for '%s'", path);
 	put_tracing_file(path);
+	close(fd);
 }
 
 static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -337,6 +340,7 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
 
 		free(format);
 	}
+	closedir(dir);
 }
 
 static void read_ftrace_files(struct tracepoint_path *tps)
@@ -407,6 +411,7 @@ static void read_event_files(struct tracepoint_path *tps)
 		free(sys);
 	}
 
+	closedir(dir);
 	put_tracing_file(path);
 }
 

From 754a00aeb26bcc8bf82897538a078bc84a6d95c7 Mon Sep 17 00:00:00 2001
From: Peter Huewe <PeterHuewe@gmx.de>
Date: Fri, 25 Dec 2009 20:08:45 +0100
Subject: [PATCH 017/640] arch/avr32: Fix build failure for avr32 caused by
 typo

This patch fixes a build failure introduced by the patch
  atmel-mci: change use of dma slave interface by Nicolas Ferre
by changing mci_dma_slave to the correct name of mci_dma_data

This should make the avr32 tree build again.

References:
http://kisskb.ellerman.id.au/kisskb/buildresult/1893610/
http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git;a=commitdiff;h=2635d1ba711560d521f6218c585a3e0401f566e1

Patch against Linus' tree.

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 arch/avr32/mach-at32ap/at32ap700x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 1aa1ea5e9212..7d4ed4291985 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1325,7 +1325,7 @@ struct platform_device *__init
 at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 {
 	struct platform_device		*pdev;
-	struct mci_dma_slave		*slave;
+	struct mci_dma_data	        *slave;
 	u32				pioa_mask;
 	u32				piob_mask;
 
@@ -1344,7 +1344,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 				ARRAY_SIZE(atmel_mci0_resource)))
 		goto fail;
 
-	slave = kzalloc(sizeof(struct mci_dma_slave), GFP_KERNEL);
+	slave = kzalloc(sizeof(struct mci_dma_data), GFP_KERNEL);
 
 	slave->sdata.dma_dev = &dw_dmac0_device.dev;
 	slave->sdata.reg_width = DW_DMA_SLAVE_WIDTH_32BIT;

From cbf8de1620cdb1abb5b0618ff561004f816064fc Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Date: Mon, 28 Dec 2009 12:22:06 +0100
Subject: [PATCH 018/640] avr32: clean up memory allocation in
 at32_add_device_mci

This patch will check if the kzalloc for the MCI DMA struct actually
returns a valid address, and also clean up properly if it fails or the
function fails at a later stage.

This also silences a compiler warning about using the slave variable
uninitialized.

Signed-off-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 arch/avr32/mach-at32ap/at32ap700x.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 7d4ed4291985..b13d1879e51b 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1345,6 +1345,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 		goto fail;
 
 	slave = kzalloc(sizeof(struct mci_dma_data), GFP_KERNEL);
+	if (!slave)
+		goto fail;
 
 	slave->sdata.dma_dev = &dw_dmac0_device.dev;
 	slave->sdata.reg_width = DW_DMA_SLAVE_WIDTH_32BIT;
@@ -1357,7 +1359,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct mci_platform_data)))
-		goto fail;
+		goto fail_free;
 
 	/* CLK line is common to both slots */
 	pioa_mask = 1 << 10;
@@ -1381,7 +1383,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 		/* Slot is unused */
 		break;
 	default:
-		goto fail;
+		goto fail_free;
 	}
 
 	select_peripheral(PIOA, pioa_mask, PERIPH_A, 0);
@@ -1408,7 +1410,7 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 		break;
 	default:
 		if (!data->slot[0].bus_width)
-			goto fail;
+			goto fail_free;
 
 		data->slot[1].bus_width = 0;
 		break;
@@ -1419,9 +1421,10 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 	platform_device_add(pdev);
 	return pdev;
 
+fail_free:
+	kfree(slave);
 fail:
 	data->dma_slave = NULL;
-	kfree(slave);
 	platform_device_put(pdev);
 	return NULL;
 }

From 41bdcb23dab22bf27361c5f2d89fe895d8904915 Mon Sep 17 00:00:00 2001
From: Liming Wang <liming.wang@windriver.com>
Date: Tue, 29 Dec 2009 16:37:07 +0800
Subject: [PATCH 019/640] perf tools: Unify event type description

make event type description to a unified array and
the array index consistent to perf_type_id.

Signed-off-by: Liming Wang <liming.wang@windriver.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1262075829-16257-1-git-send-email-liming.wang@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e5bc0fb016b2..dc585a835cab 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -835,11 +835,12 @@ int parse_filter(const struct option *opt __used, const char *str,
 }
 
 static const char * const event_type_descriptors[] = {
-	"",
 	"Hardware event",
 	"Software event",
 	"Tracepoint event",
 	"Hardware cache event",
+	"Raw hardware event descriptor",
+	"Hardware breakpoint",
 };
 
 /*
@@ -872,7 +873,7 @@ static void print_tracepoint_events(void)
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent.d_name, evt_dirent.d_name);
 			printf("  %-42s [%s]\n", evt_path,
-				event_type_descriptors[PERF_TYPE_TRACEPOINT+1]);
+				event_type_descriptors[PERF_TYPE_TRACEPOINT]);
 		}
 		closedir(evt_dir);
 	}
@@ -892,9 +893,7 @@ void print_events(void)
 	printf("List of pre-defined events (to be used in -e):\n");
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
-		type = syms->type + 1;
-		if (type >= ARRAY_SIZE(event_type_descriptors))
-			type = 0;
+		type = syms->type;
 
 		if (type != prev_type)
 			printf("\n");
@@ -919,17 +918,19 @@ void print_events(void)
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
 				printf("  %-42s [%s]\n",
 					event_cache_name(type, op, i),
-					event_type_descriptors[4]);
+					event_type_descriptors[PERF_TYPE_HW_CACHE]);
 			}
 		}
 	}
 
 	printf("\n");
-	printf("  %-42s [raw hardware event descriptor]\n",
-		"rNNN");
+	printf("  %-42s [%s]\n",
+		"rNNN", event_type_descriptors[PERF_TYPE_RAW]);
 	printf("\n");
 
-	printf("  %-42s [hardware breakpoint]\n", "mem:<addr>[:access]");
+	printf("  %-42s [%s]\n",
+			"mem:<addr>[:access]",
+			event_type_descriptors[PERF_TYPE_BREAKPOINT]);
 	printf("\n");
 
 	print_tracepoint_events();

From 63bbd5e2d539c9290b229c832f62d42aac23db94 Mon Sep 17 00:00:00 2001
From: Liming Wang <liming.wang@windriver.com>
Date: Tue, 29 Dec 2009 16:37:09 +0800
Subject: [PATCH 020/640] perf probe: Change CONFIG_KPROBE_TRACER to
 CONFIG_KPROBE_EVENT

make the config name consistent

Signed-off-by: Liming Wang <liming.wang@windriver.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1262075829-16257-3-git-send-email-liming.wang@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/probe-event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 29465d440043..8e532d9824f0 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -368,7 +368,7 @@ static int open_kprobe_events(int flags, int mode)
 	if (ret < 0) {
 		if (errno == ENOENT)
 			die("kprobe_events file does not exist -"
-			    " please rebuild with CONFIG_KPROBE_TRACER.");
+			    " please rebuild with CONFIG_KPROBE_EVENT.");
 		else
 			die("Could not open kprobe_events file: %s",
 			    strerror(errno));

From 769885f372300a7fcfb9e54e4e2990718d40b529 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 Dec 2009 22:48:32 -0200
Subject: [PATCH 021/640] perf header: Do_read shouldn't die
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Propagate the errors instead, its callers already propagate
other errors.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262047716-23171-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/header.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index df237c3a041b..6b3cb94e8a2b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -432,19 +432,19 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
 	return 0;
 }
 
-static void do_read(int fd, void *buf, size_t size)
+static int do_read(int fd, void *buf, size_t size)
 {
 	while (size) {
 		int ret = read(fd, buf, size);
 
-		if (ret < 0)
-			die("failed to read");
-		if (ret == 0)
-			die("failed to read: missing data");
+		if (ret <= 0)
+			return -1;
 
 		size -= ret;
 		buf += ret;
 	}
+
+	return 0;
 }
 
 int perf_header__process_sections(struct perf_header *self, int fd,
@@ -455,7 +455,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 	int nr_sections;
 	int sec_size;
 	int idx = 0;
-	int err = 0, feat = 1;
+	int err = -1, feat = 1;
 
 	nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
 	if (!nr_sections)
@@ -469,8 +469,10 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 
 	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
 
-	do_read(fd, feat_sec, sec_size);
+	if (do_read(fd, feat_sec, sec_size))
+		goto out_free;
 
+	err = 0;
 	while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
 		if (perf_header__has_feat(self, feat)) {
 			struct perf_file_section *sec = &feat_sec[idx++];
@@ -481,18 +483,18 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 		}
 		++feat;
 	}
-
+out_free:
 	free(feat_sec);
 	return err;
-};
+}
 
 int perf_file_header__read(struct perf_file_header *self,
 			   struct perf_header *ph, int fd)
 {
 	lseek(fd, 0, SEEK_SET);
-	do_read(fd, self, sizeof(*self));
 
-	if (self->magic     != PERF_MAGIC ||
+	if (do_read(fd, self, sizeof(*self)) ||
+	    self->magic     != PERF_MAGIC ||
 	    self->attr_size != sizeof(struct perf_file_attr))
 		return -1;
 
@@ -558,7 +560,8 @@ int perf_header__read(struct perf_header *self, int fd)
 		struct perf_header_attr *attr;
 		off_t tmp;
 
-		do_read(fd, &f_attr, sizeof(f_attr));
+		if (do_read(fd, &f_attr, sizeof(f_attr)))
+			goto out_errno;
 		tmp = lseek(fd, 0, SEEK_CUR);
 
 		attr = perf_header_attr__new(&f_attr.attr);
@@ -569,7 +572,8 @@ int perf_header__read(struct perf_header *self, int fd)
 		lseek(fd, f_attr.ids.offset, SEEK_SET);
 
 		for (j = 0; j < nr_ids; j++) {
-			do_read(fd, &f_id, sizeof(f_id));
+			if (do_read(fd, &f_id, sizeof(f_id)))
+				goto out_errno;
 
 			if (perf_header_attr__add_id(attr, f_id) < 0) {
 				perf_header_attr__delete(attr);
@@ -589,7 +593,8 @@ int perf_header__read(struct perf_header *self, int fd)
 		events = malloc(f_header.event_types.size);
 		if (events == NULL)
 			return -ENOMEM;
-		do_read(fd, events, f_header.event_types.size);
+		if (do_read(fd, events, f_header.event_types.size))
+			goto out_errno;
 		event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);
 	}
 
@@ -599,6 +604,8 @@ int perf_header__read(struct perf_header *self, int fd)
 
 	self->frozen = 1;
 	return 0;
+out_errno:
+	return -errno;
 }
 
 u64 perf_header__sample_type(struct perf_header *header)

From ae99fb2c335ef018520950ddc9692faacab39cf2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 Dec 2009 22:48:33 -0200
Subject: [PATCH 022/640] perf header: perf_header__push_event() shouldn't die
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just propagate eventual errors.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262047716-23171-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/header.c       | 16 ++++++++++------
 tools/perf/util/header.h       |  2 +-
 tools/perf/util/parse-events.c | 18 +++++++++++-------
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 6b3cb94e8a2b..709e3252f049 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -105,24 +105,28 @@ struct perf_trace_event_type {
 static int event_count;
 static struct perf_trace_event_type *events;
 
-void perf_header__push_event(u64 id, const char *name)
+int perf_header__push_event(u64 id, const char *name)
 {
 	if (strlen(name) > MAX_EVENT_NAME)
 		pr_warning("Event %s will be truncated\n", name);
 
 	if (!events) {
 		events = malloc(sizeof(struct perf_trace_event_type));
-		if (!events)
-			die("nomem");
+		if (events == NULL)
+			return -ENOMEM;
 	} else {
-		events = realloc(events, (event_count + 1) * sizeof(struct perf_trace_event_type));
-		if (!events)
-			die("nomem");
+		struct perf_trace_event_type *nevents;
+
+		nevents = realloc(events, (event_count + 1) * sizeof(*events));
+		if (nevents == NULL)
+			return -ENOMEM;
+		events = nevents;
 	}
 	memset(&events[event_count], 0, sizeof(struct perf_trace_event_type));
 	events[event_count].event_id = id;
 	strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1);
 	event_count++;
+	return 0;
 }
 
 char *perf_header__find_event(u64 id)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d118d05d3abe..2b69aab67e35 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -64,7 +64,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit);
 int perf_header__add_attr(struct perf_header *self,
 			  struct perf_header_attr *attr);
 
-void perf_header__push_event(u64 id, const char *name);
+int perf_header__push_event(u64 id, const char *name);
 char *perf_header__find_event(u64 id);
 
 struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dc585a835cab..609d5a9470c5 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -753,11 +753,11 @@ modifier:
 	return ret;
 }
 
-static void store_event_type(const char *orgname)
+static int store_event_type(const char *orgname)
 {
 	char filename[PATH_MAX], *c;
 	FILE *file;
-	int id;
+	int id, n;
 
 	sprintf(filename, "%s/", debugfs_path);
 	strncat(filename, orgname, strlen(orgname));
@@ -769,11 +769,14 @@ static void store_event_type(const char *orgname)
 
 	file = fopen(filename, "r");
 	if (!file)
-		return;
-	if (fscanf(file, "%i", &id) < 1)
-		die("cannot store event ID");
+		return 0;
+	n = fscanf(file, "%i", &id);
 	fclose(file);
-	perf_header__push_event(id, orgname);
+	if (n < 1) {
+		pr_err("cannot store event ID\n");
+		return -EINVAL;
+	}
+	return perf_header__push_event(id, orgname);
 }
 
 int parse_events(const struct option *opt __used, const char *str, int unset __used)
@@ -782,7 +785,8 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
 	enum event_result ret;
 
 	if (strchr(str, ':'))
-		store_event_type(str);
+		if (store_event_type(str) < 0)
+			return -1;
 
 	for (;;) {
 		if (nr_counters == MAX_COUNTERS)

From 71289be7630fb97f2de6bb2e18a50289dc869f9d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 Dec 2009 22:48:34 -0200
Subject: [PATCH 023/640] perf report: Add --hide-unresolved/-U command line
 option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Useful to match the 'overhead' column in 'perf report' with the
'baseline' one in 'perf diff'.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262047716-23171-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-report.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 508934b0140a..4292d7afcd60 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -34,6 +34,7 @@
 static char		const *input_name = "perf.data";
 
 static int		force;
+static bool		hide_unresolved;
 
 static int		show_threads;
 static struct perf_read_values	show_threads_values;
@@ -121,7 +122,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 		return -1;
 	}
 
-	if (al.filtered)
+	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
 	if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) {
@@ -342,6 +343,8 @@ static const struct option options[] = {
 	OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
 		   "separator for columns, no spaces will be added between "
 		   "columns '.' is reserved."),
+	OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
+		    "Only display entries resolved to a symbol"),
 	OPT_END()
 };
 

From cdbae31408cf39372402076cf2e189ec693daa71 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 Dec 2009 22:48:35 -0200
Subject: [PATCH 024/640] perf diff: Don't add the period for unresolved
 symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we don't add histograms buckets for them, this way the sum
of baselines should be 100%.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262047716-23171-4-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-diff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 1cbecaf029fa..876a4b981be8 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -51,12 +51,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
 		return -1;
 	}
 
-	if (al.filtered)
+	if (al.filtered || al.sym == NULL)
 		return 0;
 
 	event__parse_sample(event, session->sample_type, &data);
 
-	if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) {
+	if (perf_session__add_hist_entry(session, &al, data.period)) {
 		pr_warning("problem incrementing symbol count, skipping event\n");
 		return -1;
 	}

From 9c443dfdd31eddea6cbe6ee0ca469fbcc4e1dc3b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 28 Dec 2009 22:48:36 -0200
Subject: [PATCH 025/640] perf diff: Fix support for all --sort combinations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we finish creating the hist_entries we _already_ have them
sorted "by name", in fact by what is in --sort, that is exactly
how we can find the pairs in perf_session__match_hists as
'comm', 'dso' & 'symbol' all are strings we need to find the
matches in the baseline session.

So only do the sort by hits followed by a resort by --sort if we
need to find the position for shwowing the --displacement of
hist entries.

Now all these modes work correctly:

Example is a simple 'perf record -f find / > /dev/null' ran
twice then followed by the following commands:

  $ perf diff -f --sort comm
  # Baseline  Delta      Command
  # ........ ..........  .......
  #
       0.00%   +100.00%     find
  $ perf diff -f --sort dso
  # Baseline  Delta           Shared Object
  # ........ ..........  ..................
  #
      59.97%     -0.44%  [kernel]
      21.17%     +0.28%  libc-2.5.so
      18.49%     +0.16%  [ext3]
       0.37%             find
  $ perf diff -f --sort symbol | head -8
  # Baseline  Delta      Symbol
  # ........ ..........  ......
  #
       6.21%     +0.36%  [k] ext3fs_dirhash
       3.43%     +0.41%  [.] __GI_strlen
       3.53%     +0.16%  [k] __kmalloc
       3.17%     +0.49%  [k] system_call
       3.06%     +0.37%  [k] ext3_htree_store_dirent
  $ perf diff -f --sort dso,symbol | head -8
  # Baseline  Delta           Shared Object  Symbol
  # ........ ..........  ..................  ......
  #
       6.21%     +0.36%  [ext3]              [k] ext3fs_dirhash
       3.43%     +0.41%  libc-2.5.so         [.] __GI_strlen
       3.53%     +0.16%  [kernel]            [k] __kmalloc
       3.17%     +0.49%  [kernel]            [k] system_call
       3.06%     +0.37%  [ext3]              [k] ext3_htree_store_dirent
  $

And we don't have to do two expensive resorts in the common, non
--displacement case.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262047716-23171-5-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-diff.c | 52 ++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 31 deletions(-)

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 876a4b981be8..924bfb77a6ab 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -82,29 +82,19 @@ static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
 	struct hist_entry *iter;
 
 	while (*p != NULL) {
-		int cmp;
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node);
-
-		cmp = strcmp(he->map->dso->name, iter->map->dso->name);
-		if (cmp > 0)
+		if (hist_entry__cmp(he, iter) < 0)
 			p = &(*p)->rb_left;
-		else if (cmp < 0)
+		else
 			p = &(*p)->rb_right;
-		else {
-			cmp = strcmp(he->sym->name, iter->sym->name);
-			if (cmp > 0)
-				p = &(*p)->rb_left;
-			else
-				p = &(*p)->rb_right;
-		}
 	}
 
 	rb_link_node(&he->rb_node, parent, p);
 	rb_insert_color(&he->rb_node, root);
 }
 
-static void perf_session__resort_by_name(struct perf_session *self)
+static void perf_session__resort_hist_entries(struct perf_session *self)
 {
 	unsigned long position = 1;
 	struct rb_root tmp = RB_ROOT;
@@ -122,29 +112,28 @@ static void perf_session__resort_by_name(struct perf_session *self)
 	self->hists = tmp;
 }
 
+static void perf_session__set_hist_entries_positions(struct perf_session *self)
+{
+	perf_session__output_resort(self, self->events_stats.total);
+	perf_session__resort_hist_entries(self);
+}
+
 static struct hist_entry *
-perf_session__find_hist_entry_by_name(struct perf_session *self,
-				      struct hist_entry *he)
+perf_session__find_hist_entry(struct perf_session *self,
+			      struct hist_entry *he)
 {
 	struct rb_node *n = self->hists.rb_node;
 
 	while (n) {
 		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
-		int cmp = strcmp(he->map->dso->name, iter->map->dso->name);
+		int64_t cmp = hist_entry__cmp(he, iter);
 
-		if (cmp > 0)
+		if (cmp < 0)
 			n = n->rb_left;
-		else if (cmp < 0)
+		else if (cmp > 0)
 			n = n->rb_right;
-		else {
-			cmp = strcmp(he->sym->name, iter->sym->name);
-			if (cmp > 0)
-				n = n->rb_left;
-			else if (cmp < 0)
-				n = n->rb_right;
-			else
-				return iter;
-		}
+		else 
+			return iter;
 	}
 
 	return NULL;
@@ -155,11 +144,9 @@ static void perf_session__match_hists(struct perf_session *old_session,
 {
 	struct rb_node *nd;
 
-	perf_session__resort_by_name(old_session);
-
 	for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
 		struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
-		pos->pair = perf_session__find_hist_entry_by_name(old_session, pos);
+		pos->pair = perf_session__find_hist_entry(old_session, pos);
 	}
 }
 
@@ -177,9 +164,12 @@ static int __cmd_diff(void)
 		ret = perf_session__process_events(session[i], &event_ops);
 		if (ret)
 			goto out_delete;
-		perf_session__output_resort(session[i], session[i]->events_stats.total);
 	}
 
+	perf_session__output_resort(session[1], session[1]->events_stats.total);
+	if (show_displacement)
+		perf_session__set_hist_entries_positions(session[0]);
+
 	perf_session__match_hists(session[0], session[1]);
 	perf_session__fprintf_hists(session[1], session[0],
 				    show_displacement, stdout);

From 6a5fa2362b628ee950080bef8895a6fb62f58ab4 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 1 Jan 2010 01:28:43 +0000
Subject: [PATCH 026/640] [CIFS] Add support for TCP_NODELAY

mount option sockopt=TCP_NODELAY helpful for faster networks
boosting performance.  Kernel bugzilla bug number 14032.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES    |  4 ++++
 fs/cifs/cifsfs.h   |  2 +-
 fs/cifs/cifsglob.h |  1 +
 fs/cifs/connect.c  | 30 ++++++++++++++++++++++++++----
 4 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 7b2600b380d7..49503d2edc7e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
+Version 1.62
+------------
+Add sockopt=TCP_NODELAY mount option.
+
 Version 1.61
 ------------
 Fix append problem to Samba servers (files opened with O_APPEND could
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index ac2b24c192f8..78c1b86d55f6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
 
-#define CIFS_VERSION   "1.61"
+#define CIFS_VERSION   "1.62"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4b35f7ec0583..ed751bb657db 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -149,6 +149,7 @@ struct TCP_Server_Info {
 	bool svlocal:1;			/* local server or remote */
 	bool noblocksnd;		/* use blocking sendmsg */
 	bool noautotune;		/* do not autotune send buf sizes */
+	bool tcp_nodelay;
 	atomic_t inFlight;  /* number of requests on the wire to server */
 #ifdef CONFIG_CIFS_STATS2
 	atomic_t inSend; /* requests trying to send */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3bbcaa716b3c..2e9e09ca0e30 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -98,7 +98,7 @@ struct smb_vol {
 	bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
 	unsigned int rsize;
 	unsigned int wsize;
-	unsigned int sockopt;
+	bool sockopt_tcp_nodelay:1;
 	unsigned short int port;
 	char *prepath;
 };
@@ -1142,9 +1142,11 @@ cifs_parse_mount_options(char *options, const char *devname,
 					simple_strtoul(value, &value, 0);
 			}
 		} else if (strnicmp(data, "sockopt", 5) == 0) {
-			if (value && *value) {
-				vol->sockopt =
-					simple_strtoul(value, &value, 0);
+			if (!value || !*value) {
+				cERROR(1, ("no socket option specified"));
+				continue;
+			} else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
+				vol->sockopt_tcp_nodelay = 1;
 			}
 		} else if (strnicmp(data, "netbiosname", 4) == 0) {
 			if (!value || !*value || (*value == ' ')) {
@@ -1514,6 +1516,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 
 	tcp_ses->noblocksnd = volume_info->noblocksnd;
 	tcp_ses->noautotune = volume_info->noautotune;
+	tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
 	atomic_set(&tcp_ses->inFlight, 0);
 	init_waitqueue_head(&tcp_ses->response_q);
 	init_waitqueue_head(&tcp_ses->request_q);
@@ -1764,6 +1767,7 @@ static int
 ipv4_connect(struct TCP_Server_Info *server)
 {
 	int rc = 0;
+	int val;
 	bool connected = false;
 	__be16 orig_port = 0;
 	struct socket *socket = server->ssocket;
@@ -1845,6 +1849,14 @@ ipv4_connect(struct TCP_Server_Info *server)
 			socket->sk->sk_rcvbuf = 140 * 1024;
 	}
 
+	if (server->tcp_nodelay) {
+		val = 1;
+		rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
+				(char *)&val, sizeof(val));
+		if (rc)
+			cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
+	}
+
 	 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
 		 socket->sk->sk_sndbuf,
 		 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo));
@@ -1916,6 +1928,7 @@ static int
 ipv6_connect(struct TCP_Server_Info *server)
 {
 	int rc = 0;
+	int val;
 	bool connected = false;
 	__be16 orig_port = 0;
 	struct socket *socket = server->ssocket;
@@ -1987,6 +2000,15 @@ ipv6_connect(struct TCP_Server_Info *server)
 	 */
 	socket->sk->sk_rcvtimeo = 7 * HZ;
 	socket->sk->sk_sndtimeo = 5 * HZ;
+
+	if (server->tcp_nodelay) {
+		val = 1;
+		rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
+				(char *)&val, sizeof(val));
+		if (rc)
+			cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
+	}
+
 	server->ssocket = socket;
 
 	return rc;

From 9f0727f5f41d21eb3da67e50965d1f30e054795f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Jan 2010 18:24:50 +0000
Subject: [PATCH 027/640] mx31ads: Allow enable/disable of switchable supplies

They will be automatically powered off at startup so users will
need to enable them for use.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx3/mx31ads.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c
index 3e7bafa2ddbb..609d402cadba 100644
--- a/arch/arm/mach-mx3/mx31ads.c
+++ b/arch/arm/mach-mx3/mx31ads.c
@@ -302,6 +302,7 @@ static struct regulator_init_data ldo1_data = {
 		.min_uV = 2800000,
 		.max_uV = 2800000,
 		.valid_modes_mask = REGULATOR_MODE_NORMAL,
+		.valid_ops_mask = REGULATOR_CHANGE_STATUS,
 		.apply_uV = 1,
 	},
 };
@@ -322,6 +323,7 @@ static struct regulator_init_data ldo2_data = {
 		.min_uV = 3300000,
 		.max_uV = 3300000,
 		.valid_modes_mask = REGULATOR_MODE_NORMAL,
+		.valid_ops_mask = REGULATOR_CHANGE_STATUS,
 		.apply_uV = 1,
 	},
 	.num_consumer_supplies = ARRAY_SIZE(ldo2_consumers),

From bd02acdbb28be99ed87ec11c3ef61a5eec4e2dd7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 16:05:15 +0000
Subject: [PATCH 028/640] mx31ads: Provide a name for EXPIO interrupt chip

This makes it a bit more obvious in genirq diagnostics that they
aren't handled by the i.MX interrupt controller.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx3/mx31ads.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c
index 609d402cadba..e2054563027f 100644
--- a/arch/arm/mach-mx3/mx31ads.c
+++ b/arch/arm/mach-mx3/mx31ads.c
@@ -173,6 +173,7 @@ static void expio_unmask_irq(u32 irq)
 }
 
 static struct irq_chip expio_irq_chip = {
+	.name = "EXPIO(CPLD)",
 	.ack = expio_ack_irq,
 	.mask = expio_mask_irq,
 	.unmask = expio_unmask_irq,

From 3d661ac187e72af71d3bb7d48a46012180a6fc46 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 16:05:16 +0000
Subject: [PATCH 029/640] mx31ads: Provide an IRQ range to the WM835x on the
 1133-EV1 module

The WM8350 core won't actually use the range yet, but it will in
future and the platform data to configure it is there now.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx3/mx31ads.c           | 1 +
 arch/arm/plat-mxc/include/mach/irqs.h | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/arch/arm/mach-mx3/mx31ads.c b/arch/arm/mach-mx3/mx31ads.c
index e2054563027f..938c549767dc 100644
--- a/arch/arm/mach-mx3/mx31ads.c
+++ b/arch/arm/mach-mx3/mx31ads.c
@@ -462,6 +462,7 @@ static int mx31_wm8350_init(struct wm8350 *wm8350)
 
 static struct wm8350_platform_data __initdata mx31_wm8350_pdata = {
 	.init = mx31_wm8350_init,
+	.irq_base = MXC_BOARD_IRQ_START + MXC_MAX_EXP_IO_LINES,
 };
 #endif
 
diff --git a/arch/arm/plat-mxc/include/mach/irqs.h b/arch/arm/plat-mxc/include/mach/irqs.h
index ead9d592168d..0cb347645db4 100644
--- a/arch/arm/plat-mxc/include/mach/irqs.h
+++ b/arch/arm/plat-mxc/include/mach/irqs.h
@@ -37,7 +37,12 @@
  * within sensible limits.
  */
 #define MXC_BOARD_IRQ_START	(MXC_INTERNAL_IRQS + MXC_GPIO_IRQS)
+
+#ifdef CONFIG_MACH_MX31ADS_WM1133_EV1
+#define MXC_BOARD_IRQS  80
+#else
 #define MXC_BOARD_IRQS	16
+#endif
 
 #define MXC_IPU_IRQ_START	(MXC_BOARD_IRQ_START + MXC_BOARD_IRQS)
 

From 1dd473fdf1d8a7531e0955480cd129f9c1e8b8a3 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Tue, 12 Jan 2010 03:37:45 +0900
Subject: [PATCH 030/640] ocfs2: Fix refcnt leak on ocfs2_fast_follow_link()
 error path

If ->follow_link handler returns an error, it should decrement
nd->path refcnt. But ocfs2_fast_follow_link() doesn't decrement.

This patch fixes the problem by using nd_set_link() style error handling
instead of playing with nd->path.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/symlink.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 49b133ccbf11..32499d213fc4 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -137,20 +137,20 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
 	}
 
 	memcpy(link, target, len);
-	nd_set_link(nd, link);
 
 bail:
+	nd_set_link(nd, status ? ERR_PTR(status) : link);
 	brelse(bh);
 
 	mlog_exit(status);
-	return status ? ERR_PTR(status) : link;
+	return NULL;
 }
 
 static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 {
-	char *link = cookie;
-
-	kfree(link);
+	char *link = nd_get_link(nd);
+	if (!IS_ERR(link))
+		kfree(link);
 }
 
 const struct inode_operations ocfs2_symlink_inode_operations = {

From 7b4e08a77f0cbfe31b47faf082caa02f9c252266 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 11 Jan 2010 16:33:18 +0000
Subject: [PATCH 031/640] MXC: Add AUDMUXv2 register decode to debugfs

Since AUDMUX configuration appears to be one of the common stumbling
blocks for people setting up i.MX audio try to provide some diagnostic
information describing the current setup to assisist people in working
out what's going on.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/plat-mxc/audmux-v2.c | 137 ++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/arch/arm/plat-mxc/audmux-v2.c b/arch/arm/plat-mxc/audmux-v2.c
index 6f21096086fd..b06954a84436 100644
--- a/arch/arm/plat-mxc/audmux-v2.c
+++ b/arch/arm/plat-mxc/audmux-v2.c
@@ -23,6 +23,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/clk.h>
+#include <linux/debugfs.h>
 #include <mach/audmux.h>
 #include <mach/hardware.h>
 
@@ -32,6 +33,140 @@ static void __iomem *audmux_base;
 #define MXC_AUDMUX_V2_PTCR(x)		((x) * 8)
 #define MXC_AUDMUX_V2_PDCR(x)		((x) * 8 + 4)
 
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *audmux_debugfs_root;
+
+static int audmux_open_file(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+/* There is an annoying discontinuity in the SSI numbering with regard
+ * to the Linux number of the devices */
+static const char *audmux_port_string(int port)
+{
+	switch (port) {
+	case MX31_AUDMUX_PORT1_SSI0:
+		return "imx-ssi.0";
+	case MX31_AUDMUX_PORT2_SSI1:
+		return "imx-ssi.1";
+	case MX31_AUDMUX_PORT3_SSI_PINS_3:
+		return "SSI3";
+	case MX31_AUDMUX_PORT4_SSI_PINS_4:
+		return "SSI4";
+	case MX31_AUDMUX_PORT5_SSI_PINS_5:
+		return "SSI5";
+	case MX31_AUDMUX_PORT6_SSI_PINS_6:
+		return "SSI6";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static ssize_t audmux_read_file(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	ssize_t ret;
+	char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	int port = (int)file->private_data;
+	u32 pdcr, ptcr;
+
+	if (!buf)
+		return -ENOMEM;
+
+	if (audmux_clk)
+		clk_enable(audmux_clk);
+
+	ptcr = readl(audmux_base + MXC_AUDMUX_V2_PTCR(port));
+	pdcr = readl(audmux_base + MXC_AUDMUX_V2_PDCR(port));
+
+	if (audmux_clk)
+		clk_disable(audmux_clk);
+
+	ret = snprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n",
+		       pdcr, ptcr);
+
+	if (ptcr & MXC_AUDMUX_V2_PTCR_TFSDIR)
+		ret += snprintf(buf + ret, PAGE_SIZE - ret,
+				"TxFS output from %s, ",
+				audmux_port_string((ptcr >> 27) & 0x7));
+	else
+		ret += snprintf(buf + ret, PAGE_SIZE - ret,
+				"TxFS input, ");
+
+	if (ptcr & MXC_AUDMUX_V2_PTCR_TCLKDIR)
+		ret += snprintf(buf + ret, PAGE_SIZE - ret,
+				"TxClk output from %s",
+				audmux_port_string((ptcr >> 22) & 0x7));
+	else
+		ret += snprintf(buf + ret, PAGE_SIZE - ret,
+				"TxClk input");
+
+	ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+	if (ptcr & MXC_AUDMUX_V2_PTCR_SYN) {
+		ret += snprintf(buf + ret, PAGE_SIZE - ret,
+				"Port is symmetric");
+	} else {
+		if (ptcr & MXC_AUDMUX_V2_PTCR_RFSDIR)
+			ret += snprintf(buf + ret, PAGE_SIZE - ret,
+					"RxFS output from %s, ",
+					audmux_port_string((ptcr >> 17) & 0x7));
+		else
+			ret += snprintf(buf + ret, PAGE_SIZE - ret,
+					"RxFS input, ");
+
+		if (ptcr & MXC_AUDMUX_V2_PTCR_RCLKDIR)
+			ret += snprintf(buf + ret, PAGE_SIZE - ret,
+					"RxClk output from %s",
+					audmux_port_string((ptcr >> 12) & 0x7));
+		else
+			ret += snprintf(buf + ret, PAGE_SIZE - ret,
+					"RxClk input");
+	}
+
+	ret += snprintf(buf + ret, PAGE_SIZE - ret,
+			"\nData received from %s\n",
+			audmux_port_string((pdcr >> 13) & 0x7));
+
+	ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
+
+	kfree(buf);
+
+	return ret;
+}
+
+static const struct file_operations audmux_debugfs_fops = {
+	.open = audmux_open_file,
+	.read = audmux_read_file,
+};
+
+static void audmux_debugfs_init(void)
+{
+	int i;
+	char buf[20];
+
+	audmux_debugfs_root = debugfs_create_dir("audmux", NULL);
+	if (!audmux_debugfs_root) {
+		pr_warning("Failed to create AUDMUX debugfs root\n");
+		return;
+	}
+
+	for (i = 1; i < 8; i++) {
+		snprintf(buf, sizeof(buf), "ssi%d", i);
+		if (!debugfs_create_file(buf, 0444, audmux_debugfs_root,
+					 (void *)i, &audmux_debugfs_fops))
+			pr_warning("Failed to create AUDMUX port %d debugfs file\n",
+				   i);
+	}
+}
+#else
+static inline void audmux_debugfs_init(void)
+{
+}
+#endif
+
 int mxc_audmux_v2_configure_port(unsigned int port, unsigned int ptcr,
 		unsigned int pdcr)
 {
@@ -68,6 +203,8 @@ static int mxc_audmux_v2_init(void)
 	if (cpu_is_mx31() || cpu_is_mx35())
 		audmux_base = IO_ADDRESS(AUDMUX_BASE_ADDR);
 
+	audmux_debugfs_init();
+
 	return 0;
 }
 

From 0fb8ee48d9dfff6a0913ceb0be2068d8be203763 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 31 Dec 2009 05:53:03 +0100
Subject: [PATCH 032/640] perf: Drop useless check for ignored frame

The check that ignores the debug and nmi stack frames is useless
now that we have a frame pointer that makes us start at the
right place. We don't anymore have to deal with these.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262235183-5320-2-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/stacktrace.h | 2 --
 arch/x86/kernel/cpu/perf_event.c  | 8 --------
 arch/x86/kernel/dumpstack_32.c    | 5 -----
 arch/x86/kernel/dumpstack_64.c    | 5 -----
 4 files changed, 20 deletions(-)

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 35e89122a42f..4dab78edbad9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,8 +3,6 @@
 
 extern int kstack_depth_to_print;
 
-int x86_is_stack_id(int id, char *name);
-
 struct thread_info;
 struct stacktrace_ops;
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index d616c06e99b4..b1bb8c550526 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2297,7 +2297,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
 
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_ignored_frame);
 
 
 static void
@@ -2313,10 +2312,6 @@ static void backtrace_warning(void *data, char *msg)
 
 static int backtrace_stack(void *data, char *name)
 {
-	per_cpu(in_ignored_frame, smp_processor_id()) =
-			x86_is_stack_id(NMI_STACK, name) ||
-			x86_is_stack_id(DEBUG_STACK, name);
-
 	return 0;
 }
 
@@ -2324,9 +2319,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
 	struct perf_callchain_entry *entry = data;
 
-	if (per_cpu(in_ignored_frame, smp_processor_id()))
-		return;
-
 	if (reliable)
 		callchain_store(entry, addr);
 }
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index ae775ca47b25..11540a189d93 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -18,11 +18,6 @@
 
 #include "dumpstack.h"
 
-/* Just a stub for now */
-int x86_is_stack_id(int id, char *name)
-{
-	return 0;
-}
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 0ad9597073f5..676bc051252e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = {
 #endif
 };
 
-int x86_is_stack_id(int id, char *name)
-{
-	return x86_stack_ids[id - 1] == name;
-}
-
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 					 unsigned *usedp, char **idp)
 {

From 60666c630bdb33983a894b050b588b663f38f368 Mon Sep 17 00:00:00 2001
From: Liming Wang <liming.wang@windriver.com>
Date: Thu, 31 Dec 2009 16:05:50 +0800
Subject: [PATCH 033/640] perf tools: Fix --pid option for stat

current pid option doesn't work for perf stat. Change it to what
perf record --pid acts as.

Signed-off-by: Liming Wang <liming.wang@windriver.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1262246750-2191-1-git-send-email-liming.wang@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 106 ++++++++++++++++++++++----------------
 1 file changed, 61 insertions(+), 45 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c70d72003557..e8c85d5aec41 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -44,6 +44,7 @@
 #include "util/parse-events.h"
 #include "util/event.h"
 #include "util/debug.h"
+#include "util/header.h"
 
 #include <sys/prctl.h>
 #include <math.h>
@@ -79,6 +80,8 @@ static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int			event_scaled[MAX_COUNTERS];
 
+static volatile int done = 0;
+
 struct stats
 {
 	double n, mean, M2;
@@ -247,61 +250,64 @@ static int run_perf_stat(int argc __used, const char **argv)
 	unsigned long long t0, t1;
 	int status = 0;
 	int counter;
-	int pid;
+	int pid = target_pid;
 	int child_ready_pipe[2], go_pipe[2];
+	const bool forks = (target_pid == -1 && argc > 0);
 	char buf;
 
 	if (!system_wide)
 		nr_cpus = 1;
 
-	if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
+	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
 		perror("failed to create pipes");
 		exit(1);
 	}
 
-	if ((pid = fork()) < 0)
-		perror("failed to fork");
+	if (forks) {
+		if ((pid = fork()) < 0)
+			perror("failed to fork");
 
-	if (!pid) {
-		close(child_ready_pipe[0]);
-		close(go_pipe[1]);
-		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+		if (!pid) {
+			close(child_ready_pipe[0]);
+			close(go_pipe[1]);
+			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+			/*
+			 * Do a dummy execvp to get the PLT entry resolved,
+			 * so we avoid the resolver overhead on the real
+			 * execvp call.
+			 */
+			execvp("", (char **)argv);
+
+			/*
+			 * Tell the parent we're ready to go
+			 */
+			close(child_ready_pipe[1]);
+
+			/*
+			 * Wait until the parent tells us to go.
+			 */
+			if (read(go_pipe[0], &buf, 1) == -1)
+				perror("unable to read pipe");
+
+			execvp(argv[0], (char **)argv);
+
+			perror(argv[0]);
+			exit(-1);
+		}
+
+		child_pid = pid;
 
 		/*
-		 * Do a dummy execvp to get the PLT entry resolved,
-		 * so we avoid the resolver overhead on the real
-		 * execvp call.
-		 */
-		execvp("", (char **)argv);
-
-		/*
-		 * Tell the parent we're ready to go
+		 * Wait for the child to be ready to exec.
 		 */
 		close(child_ready_pipe[1]);
-
-		/*
-		 * Wait until the parent tells us to go.
-		 */
-		if (read(go_pipe[0], &buf, 1) == -1)
+		close(go_pipe[0]);
+		if (read(child_ready_pipe[0], &buf, 1) == -1)
 			perror("unable to read pipe");
-
-		execvp(argv[0], (char **)argv);
-
-		perror(argv[0]);
-		exit(-1);
+		close(child_ready_pipe[0]);
 	}
 
-	child_pid = pid;
-
-	/*
-	 * Wait for the child to be ready to exec.
-	 */
-	close(child_ready_pipe[1]);
-	close(go_pipe[0]);
-	if (read(child_ready_pipe[0], &buf, 1) == -1)
-		perror("unable to read pipe");
-	close(child_ready_pipe[0]);
-
 	for (counter = 0; counter < nr_counters; counter++)
 		create_perf_stat_counter(counter, pid);
 
@@ -310,8 +316,12 @@ static int run_perf_stat(int argc __used, const char **argv)
 	 */
 	t0 = rdclock();
 
-	close(go_pipe[1]);
-	wait(&status);
+	if (forks) {
+		close(go_pipe[1]);
+		wait(&status);
+	} else {
+		while(!done);
+	}
 
 	t1 = rdclock();
 
@@ -417,10 +427,13 @@ static void print_stat(int argc, const char **argv)
 	fflush(stdout);
 
 	fprintf(stderr, "\n");
-	fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
-
-	for (i = 1; i < argc; i++)
-		fprintf(stderr, " %s", argv[i]);
+	fprintf(stderr, " Performance counter stats for ");
+	if(target_pid == -1) {
+		fprintf(stderr, "\'%s", argv[0]);
+		for (i = 1; i < argc; i++)
+			fprintf(stderr, " %s", argv[i]);
+	}else
+		fprintf(stderr, "task pid \'%d", target_pid);
 
 	fprintf(stderr, "\'");
 	if (run_count > 1)
@@ -445,6 +458,9 @@ static volatile int signr = -1;
 
 static void skip_signal(int signo)
 {
+	if(target_pid != -1)
+		done = 1;
+
 	signr = signo;
 }
 
@@ -461,7 +477,7 @@ static void sig_atexit(void)
 }
 
 static const char * const stat_usage[] = {
-	"perf stat [<options>] <command>",
+	"perf stat [<options>] [<command>]",
 	NULL
 };
 
@@ -492,7 +508,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 
 	argc = parse_options(argc, argv, options, stat_usage,
 		PARSE_OPT_STOP_AT_NON_OPTION);
-	if (!argc)
+	if (!argc && target_pid == -1)
 		usage_with_options(stat_usage, options);
 	if (run_count <= 0)
 		usage_with_options(stat_usage, options);

From 682b335a5bccf9e5b7e74380784aa2f145d04444 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 4 Jan 2010 16:19:26 -0200
Subject: [PATCH 034/640] perf symbols: Generalise the kallsyms parsing routine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Will be used to find an specific symbol by name on 'perf record'
to support relocation reference symbols to support relocatable
kernels.

Still have to conver the perf trace tools to use it instead of
their current reimplementation.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262629169-22797-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 74 ++++++++++++++++++++++++++--------------
 tools/perf/util/symbol.h |  2 ++
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 79ca6a099f96..b9e0da57d84b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -383,16 +383,12 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
 	return ret;
 }
 
-/*
- * Loads the function entries in /proc/kallsyms into kernel_map->dso,
- * so that we can in the next step set the symbol ->end address and then
- * call kernel_maps__split_kallsyms.
- */
-static int dso__load_all_kallsyms(struct dso *self, struct map *map)
+int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name,
+						     char type, u64 start))
 {
 	char *line = NULL;
 	size_t n;
-	struct rb_root *root = &self->symbols[map->type];
+	int err = 0;
 	FILE *file = fopen("/proc/kallsyms", "r");
 
 	if (file == NULL)
@@ -400,7 +396,6 @@ static int dso__load_all_kallsyms(struct dso *self, struct map *map)
 
 	while (!feof(file)) {
 		u64 start;
-		struct symbol *sym;
 		int line_len, len;
 		char symbol_type;
 		char *symbol_name;
@@ -421,35 +416,62 @@ static int dso__load_all_kallsyms(struct dso *self, struct map *map)
 			continue;
 
 		symbol_type = toupper(line[len]);
-		if (!symbol_type__is_a(symbol_type, map->type))
-			continue;
-
 		symbol_name = line + len + 2;
-		/*
-		 * Will fix up the end later, when we have all symbols sorted.
-		 */
-		sym = symbol__new(start, 0, symbol_name);
 
-		if (sym == NULL)
-			goto out_delete_line;
-		/*
-		 * We will pass the symbols to the filter later, in
-		 * map__split_kallsyms, when we have split the maps per module
-		 */
-		symbols__insert(root, sym);
+		err = process_symbol(arg, symbol_name, symbol_type, start);
+		if (err)
+			break;
 	}
 
 	free(line);
 	fclose(file);
+	return err;
 
-	return 0;
-
-out_delete_line:
-	free(line);
 out_failure:
 	return -1;
 }
 
+struct process_kallsyms_args {
+	struct map *map;
+	struct dso *dso;
+};
+
+static int map__process_kallsym_symbol(void *arg, const char *name,
+				       char type, u64 start)
+{
+	struct symbol *sym;
+	struct process_kallsyms_args *a = arg;
+	struct rb_root *root = &a->dso->symbols[a->map->type];
+
+	if (!symbol_type__is_a(type, a->map->type))
+		return 0;
+
+	/*
+	 * Will fix up the end later, when we have all symbols sorted.
+	 */
+	sym = symbol__new(start, 0, name);
+
+	if (sym == NULL)
+		return -ENOMEM;
+	/*
+	 * We will pass the symbols to the filter later, in
+	 * map__split_kallsyms, when we have split the maps per module
+	 */
+	symbols__insert(root, sym);
+	return 0;
+}
+
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *self, struct map *map)
+{
+	struct process_kallsyms_args args = { .map = map, .dso = self, };
+	return kallsyms__parse(&args, map__process_kallsym_symbol);
+}
+
 /*
  * Split the symbols into maps, making sure there are no overlaps, i.e. the
  * kernel range is broken in several maps, named [kernel].N, as we don't have
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f27e158943e9..21313e87c37b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -135,6 +135,8 @@ int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
 bool dsos__read_build_ids(void);
 int build_id__sprintf(u8 *self, int len, char *bf);
+int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name,
+						     char type, u64 start));
 
 int symbol__init(void);
 int perf_session__create_kernel_maps(struct perf_session *self);

From 36a3e6461a0dac8e84b8c94877365324010c151b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 4 Jan 2010 16:19:27 -0200
Subject: [PATCH 035/640] perf symbols: Export symbol_type__is_a
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Will be needed by the new HEADER_DSO_INFO feature that will be a
HEADER_BUILD_ID superset, replacing it.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262629169-22797-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 2 +-
 tools/perf/util/symbol.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b9e0da57d84b..5dffcd132d15 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -64,7 +64,7 @@ static void dso__set_sorted_by_name(struct dso *self, enum map_type type)
 	self->sorted_by_name |= (1 << type);
 }
 
-static bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+bool symbol_type__is_a(char symbol_type, enum map_type map_type)
 {
 	switch (map_type) {
 	case MAP__FUNCTION:
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 21313e87c37b..b2b5330a82a0 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -139,6 +139,8 @@ int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name
 						     char type, u64 start));
 
 int symbol__init(void);
+bool symbol_type__is_a(char symbol_type, enum map_type map_type);
+
 int perf_session__create_kernel_maps(struct perf_session *self);
 
 extern struct list_head dsos__user, dsos__kernel;

From f92cb24c78a7c853435e46a20d1bd5c894378132 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 4 Jan 2010 16:19:28 -0200
Subject: [PATCH 036/640] perf tools: Create write_padded routine out of
 __dsos__write_buildid_table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Will be used by other options where padding is needed.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262629169-22797-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/header.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 709e3252f049..942f7da8bf84 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -173,6 +173,20 @@ static int do_write(int fd, const void *buf, size_t size)
 	return 0;
 }
 
+#define NAME_ALIGN 64
+
+static int write_padded(int fd, const void *bf, size_t count,
+			size_t count_aligned)
+{
+	static const char zero_buf[NAME_ALIGN];
+	int err = do_write(fd, bf, count);
+
+	if (!err)
+		err = do_write(fd, zero_buf, count_aligned - count);
+
+	return err;
+}
+
 #define dsos__for_each_with_build_id(pos, head)	\
 	list_for_each_entry(pos, head, node)	\
 		if (!pos->has_build_id)		\
@@ -181,9 +195,7 @@ static int do_write(int fd, const void *buf, size_t size)
 
 static int __dsos__write_buildid_table(struct list_head *head, int fd)
 {
-#define NAME_ALIGN	64
 	struct dso *pos;
-	static const char zero_buf[NAME_ALIGN];
 
 	dsos__for_each_with_build_id(pos, head) {
 		int err;
@@ -197,10 +209,8 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
 		err = do_write(fd, &b, sizeof(b));
 		if (err < 0)
 			return err;
-		err = do_write(fd, pos->long_name, pos->long_name_len + 1);
-		if (err < 0)
-			return err;
-		err = do_write(fd, zero_buf, len - pos->long_name_len - 1);
+		err = write_padded(fd, pos->long_name,
+				   pos->long_name_len + 1, len);
 		if (err < 0)
 			return err;
 	}

From de1764892a61a3ed212973cc028c80dd083179dd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 4 Jan 2010 16:19:29 -0200
Subject: [PATCH 037/640] perf session: Keep pointers to the vmlinux maps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So that tools such as 'perf probe' don't have to lookup
'[kernel.kallsyms]' but instead access them directly after
perf_session__create_kernel_maps or
map_groups__create_kernel_maps.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262629169-22797-4-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-probe.c |  4 +---
 tools/perf/util/session.h  |  1 +
 tools/perf/util/symbol.c   | 29 +++++++++++++----------------
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index c1e6774fd3ed..ffdd3fe87b4a 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -235,9 +235,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 	session.psession = perf_session__new(NULL, O_WRONLY, false);
 	if (session.psession == NULL)
 		die("Failed to init perf_session.");
-	session.kmap = map_groups__find_by_name(&session.psession->kmaps,
-						MAP__FUNCTION,
-						"[kernel.kallsyms]");
+	session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
 	if (!session.kmap)
 		die("Could not find kernel map.\n");
 
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 77c5ee2993c2..8db37bbf0e62 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -18,6 +18,7 @@ struct perf_session {
 	struct map_groups	kmaps;
 	struct rb_root		threads;
 	struct thread		*last_match;
+	struct map		*vmlinux_maps[MAP__NR_TYPES];
 	struct events_stats	events_stats;
 	unsigned long		event_total[PERF_RECORD_MAX];
 	unsigned long		unknown_events;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5dffcd132d15..e290429e9c00 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1662,7 +1662,7 @@ size_t dsos__fprintf_buildid(FILE *fp)
 		__dsos__fprintf_buildid(&dsos__user, fp));
 }
 
-static struct dso *dsos__create_kernel( const char *vmlinux)
+static struct dso *dsos__create_kernel(const char *vmlinux)
 {
 	struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
 
@@ -1691,29 +1691,26 @@ out_delete_kernel_dso:
 	return NULL;
 }
 
-static int map_groups__create_kernel_maps(struct map_groups *self, const char *vmlinux)
+static int map_groups__create_kernel_maps(struct map_groups *self,
+					  struct map *vmlinux_maps[MAP__NR_TYPES],
+					  const char *vmlinux)
 {
-	struct map *functions, *variables;
 	struct dso *kernel = dsos__create_kernel(vmlinux);
+	enum map_type type;
 
 	if (kernel == NULL)
 		return -1;
 
-	functions = map__new2(0, kernel, MAP__FUNCTION);
-	if (functions == NULL)
-		return -1;
+	for (type = 0; type < MAP__NR_TYPES; ++type) {
+		vmlinux_maps[type] = map__new2(0, kernel, type);
+		if (vmlinux_maps[type] == NULL)
+			return -1;
 
-	variables = map__new2(0, kernel, MAP__VARIABLE);
-	if (variables == NULL) {
-		map__delete(functions);
-		return -1;
+		vmlinux_maps[type]->map_ip =
+			vmlinux_maps[type]->unmap_ip = identity__map_ip;
+		map_groups__insert(self, vmlinux_maps[type]);
 	}
 
-	functions->map_ip = functions->unmap_ip =
-		variables->map_ip = variables->unmap_ip = identity__map_ip;
-	map_groups__insert(self, functions);
-	map_groups__insert(self, variables);
-
 	return 0;
 }
 
@@ -1824,7 +1821,7 @@ out_free_comm_list:
 
 int perf_session__create_kernel_maps(struct perf_session *self)
 {
-	if (map_groups__create_kernel_maps(&self->kmaps,
+	if (map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps,
 					   symbol_conf.vmlinux_name) < 0)
 		return -1;
 

From b9a63b9b56d6910a25e3d4905525aef150420a9b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 5 Jan 2010 11:54:45 -0200
Subject: [PATCH 038/640] perf report: Fix --no-call-chain option handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To avoid the funny:

 [root@doppio ~]# perf record -a -f sleep 2s
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.334 MB perf.data (~14572 samples) ]
 [root@doppio ~]# perf report --no-call-graph
 selected -g but no callchain data. Did you call perf record without -g?

And fix the bug reported by peterz when we do indeed record with
callchains and then ask for a report without:

[root@doppio ~]# perf record -a -g -f sleep 2s
[root@doppio ~]# perf report --no-call-graph
Segmentation fault
[root@doppio ~]#

Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262699685-27820-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-report.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4292d7afcd60..80d691a4191f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -35,6 +35,7 @@ static char		const *input_name = "perf.data";
 
 static int		force;
 static bool		hide_unresolved;
+static bool		dont_use_callchains;
 
 static int		show_threads;
 static struct perf_read_values	show_threads_values;
@@ -172,7 +173,8 @@ static int perf_session__setup_sample_type(struct perf_session *self)
 					" -g?\n");
 			return -1;
 		}
-	} else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) {
+	} else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
+		   !symbol_conf.use_callchain) {
 			symbol_conf.use_callchain = true;
 			if (register_callchain_param(&callchain_param) < 0) {
 				fprintf(stderr, "Can't register callchain"
@@ -246,11 +248,19 @@ out_delete:
 
 static int
 parse_callchain_opt(const struct option *opt __used, const char *arg,
-		    int unset __used)
+		    int unset)
 {
 	char *tok;
 	char *endptr;
 
+	/*
+	 * --no-call-graph
+	 */
+	if (unset) {
+		dont_use_callchains = true;
+		return 0;
+	}
+
 	symbol_conf.use_callchain = true;
 
 	if (!arg)

From 56b03f3c4d641dbdbce2e52a2969712e85b0e030 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 5 Jan 2010 16:50:31 -0200
Subject: [PATCH 039/640] perf tools: Handle relocatable kernels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DSOs don't have this problem because the kernel emits a
PERF_MMAP for each new executable mapping it performs on
monitored threads.

To fix the kernel case we simulate the same behaviour, by having
'perf record' to synthesize a PERF_MMAP for the kernel, encoded
like this:

[root@doppio ~]# perf record -a -f sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.344 MB perf.data (~15038 samples) ]
[root@doppio ~]# perf report -D | head -10

0xd0 [0x40]: event: 1
.
. ... raw event: size 64 bytes
.  0000:  01 00 00 00 00 00 40 00 00 00 00 00 00 00 00 00 ......@........
.  0010:  00 00 00 81 ff ff ff ff 00 00 00 00 00 00 00 00 ...............
.  0020:  00 00 00 00 00 00 00 00 5b 6b 65 72 6e 65 6c 2e ........  [kernel
.  0030:  6b 61 6c 6c 73 79 6d 73 2e 5f 74 65 78 74 5d 00  kallsyms._text]
.  0xd0
[0x40]: PERF_RECORD_MMAP 0/0: [0xffffffff81000000((nil)) @ (nil)]: [kernel.kallsyms._text]

I.e. we identify such event as having:

 .pid      = 0
 .filename = [kernel.kallsyms.REFNAME]
 .start    = REFNAME addr in /proc/kallsyms at 'perf record' time

and use now a hardcoded value of '.text' for REFNAME.

Then, later, in 'perf report', if there are any kernel hits and
thus we need to resolve kernel symbols, we search for REFNAME
and if its address changed, relocation happened and we thus must
change the kernel mapping routines to one that uses .pgoff as
the relocation to apply.

This way we use the same mechanism used for the other DSOs and
don't have to do a two pass in all the kernel symbols.

Reported-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
LKML-Reference: <1262717431-1246-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c |  7 ++++
 tools/perf/util/event.c     | 64 +++++++++++++++++++++++++++++++++++--
 tools/perf/util/event.h     |  4 +++
 tools/perf/util/session.c   | 46 ++++++++++++++++++++++++++
 tools/perf/util/session.h   | 10 ++++++
 tools/perf/util/symbol.c    |  7 ++--
 6 files changed, 133 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 265425322734..8f88420e066b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -551,6 +551,13 @@ static int __cmd_record(int argc, const char **argv)
 			return err;
 	}
 
+	err = event__synthesize_kernel_mmap(process_synthesized_event,
+					    session, "_text");
+	if (err < 0) {
+		pr_err("Couldn't record kernel reference relocation symbol.\n");
+		return err;
+	}
+
 	if (!system_wide && profile_cpu == -1)
 		event__synthesize_thread(pid, process_synthesized_event,
 					 session);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bb0fd6da2d56..1a31feb9999f 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -189,6 +189,50 @@ void event__synthesize_threads(int (*process)(event_t *event,
 	closedir(proc);
 }
 
+struct process_symbol_args {
+	const char *name;
+	u64	   start;
+};
+
+static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct process_symbol_args *args = arg;
+
+	if (!symbol_type__is_a(type, MAP__FUNCTION) || strcmp(name, args->name))
+		return 0;
+
+	args->start = start;
+	return 1;
+}
+
+int event__synthesize_kernel_mmap(int (*process)(event_t *event,
+						 struct perf_session *session),
+				  struct perf_session *session,
+				  const char *symbol_name)
+{
+	size_t size;
+	event_t ev = {
+		.header = { .type = PERF_RECORD_MMAP },
+	};
+	/*
+	 * We should get this from /sys/kernel/sections/.text, but till that is
+	 * available use this, and after it is use this as a fallback for older
+	 * kernels.
+	 */
+	struct process_symbol_args args = { .name = symbol_name, };
+
+	if (kallsyms__parse(&args, find_symbol_cb) <= 0)
+		return -ENOENT;
+
+	size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+			"[kernel.kallsyms.%s]", symbol_name) + 1;
+	size = ALIGN(size, sizeof(u64));
+	ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size));
+	ev.mmap.start = args.start;
+
+	return process(&ev, session);
+}
+
 static void thread__comm_adjust(struct thread *self)
 {
 	char *comm = self->comm;
@@ -240,9 +284,9 @@ int event__process_lost(event_t *self, struct perf_session *session)
 
 int event__process_mmap(event_t *self, struct perf_session *session)
 {
-	struct thread *thread = perf_session__findnew(session, self->mmap.pid);
-	struct map *map = map__new(&self->mmap, MAP__FUNCTION,
-				   session->cwd, session->cwdlen);
+	struct thread *thread;
+	struct map *map;
+	static const char kmmap_prefix[] = "[kernel.kallsyms.";
 
 	dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
 		    self->mmap.pid, self->mmap.tid,
@@ -251,6 +295,20 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 		    (void *)(long)self->mmap.pgoff,
 		    self->mmap.filename);
 
+	if (self->mmap.pid == 0 &&
+	    memcmp(self->mmap.filename, kmmap_prefix,
+		   sizeof(kmmap_prefix) - 1) == 0) {
+		const char *symbol_name = (self->mmap.filename +
+					   sizeof(kmmap_prefix) - 1);
+		perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name,
+							 self->mmap.start);
+		return 0;
+	}
+
+	thread = perf_session__findnew(session, self->mmap.pid);
+	map = map__new(&self->mmap, MAP__FUNCTION,
+		       session->cwd, session->cwdlen);
+
 	if (thread == NULL || map == NULL)
 		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 	else
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 80fb3653c809..61fc0dc658c2 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -110,6 +110,10 @@ int event__synthesize_thread(pid_t pid,
 void event__synthesize_threads(int (*process)(event_t *event,
 					      struct perf_session *session),
 			       struct perf_session *session);
+int event__synthesize_kernel_mmap(int (*process)(event_t *event,
+						 struct perf_session *session),
+				  struct perf_session *session,
+				  const char *symbol_name);
 
 int event__process_comm(event_t *self, struct perf_session *session);
 int event__process_lost(event_t *self, struct perf_session *session);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7f0537d1add8..e0e6a075489e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -401,3 +401,49 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg)
 
 	return true;
 }
+
+int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
+					     const char *symbol_name,
+					     u64 addr)
+{
+	char *bracket;
+
+	self->ref_reloc_sym.name = strdup(symbol_name);
+	if (self->ref_reloc_sym.name == NULL)
+		return -ENOMEM;
+
+	bracket = strchr(self->ref_reloc_sym.name, ']');
+	if (bracket)
+		*bracket = '\0';
+
+	self->ref_reloc_sym.addr = addr;
+	return 0;
+}
+
+static u64 map__reloc_map_ip(struct map *map, u64 ip)
+{
+	return ip + (s64)map->pgoff;
+}
+
+static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
+{
+	return ip - (s64)map->pgoff;
+}
+
+void perf_session__reloc_vmlinux_maps(struct perf_session *self,
+				      u64 unrelocated_addr)
+{
+	enum map_type type;
+	s64 reloc = unrelocated_addr - self->ref_reloc_sym.addr;
+
+	if (!reloc)
+		return;
+
+	for (type = 0; type < MAP__NR_TYPES; ++type) {
+		struct map *map = self->vmlinux_maps[type];
+
+		map->map_ip = map__reloc_map_ip;
+		map->unmap_ip = map__reloc_unmap_ip;
+		map->pgoff = reloc;
+	}
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 8db37bbf0e62..d4a9d20f8d44 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -24,6 +24,10 @@ struct perf_session {
 	unsigned long		unknown_events;
 	struct rb_root		hists;
 	u64			sample_type;
+	struct {
+		const char	*name;
+		u64		addr;
+	}			ref_reloc_sym;
 	int			fd;
 	int			cwdlen;
 	char			*cwd;
@@ -59,4 +63,10 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg);
 
 int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
 
+int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
+					     const char *symbol_name,
+					     u64 addr);
+void perf_session__reloc_vmlinux_maps(struct perf_session *self,
+				      u64 unrelocated_addr);
+
 #endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e290429e9c00..da2f07f1af8f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -956,11 +956,15 @@ static int dso__load_sym(struct dso *self, struct map *map,
 
 	elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
 		struct symbol *f;
-		const char *elf_name;
+		const char *elf_name = elf_sym__name(&sym, symstrs);
 		char *demangled = NULL;
 		int is_label = elf_sym__is_label(&sym);
 		const char *section_name;
 
+		if (kernel && session->ref_reloc_sym.name != NULL &&
+		    strcmp(elf_name, session->ref_reloc_sym.name) == 0)
+			perf_session__reloc_vmlinux_maps(session, sym.st_value);
+
 		if (!is_label && !elf_sym__is_a(&sym, map->type))
 			continue;
 
@@ -973,7 +977,6 @@ static int dso__load_sym(struct dso *self, struct map *map,
 		if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
 			continue;
 
-		elf_name = elf_sym__name(&sym, symstrs);
 		section_name = elf_sec__name(&shdr, secstrs);
 
 		if (kernel || kmodule) {

From ec3a9039601af210fca4650d229621fe5a21df0b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 5 Jan 2010 17:46:41 -0500
Subject: [PATCH 040/640] tracing/kprobe: Update example output in
 documentation

Update example output in documentation according to current
implementation.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20100105224641.19431.34967.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/kprobetrace.txt | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 47aabeebbdf6..c3eff6ff945f 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -97,23 +97,24 @@ recording return value as "myretprobe" event.
 
   cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
 name: myprobe
-ID: 75
+ID: 780
 format:
-	field:unsigned short common_type;	offset:0;	size:2;
-	field:unsigned char common_flags;	offset:2;	size:1;
-	field:unsigned char common_preempt_count;	offset:3;	size:1;
-	field:int common_pid;	offset:4;	size:4;
-	field:int common_tgid;	offset:8;	size:4;
+        field:unsigned short common_type;       offset:0;       size:2; signed:0;
+        field:unsigned char common_flags;       offset:2;       size:1; signed:0;
+        field:unsigned char common_preempt_count;       offset:3; size:1;signed:0;
+        field:int common_pid;   offset:4;       size:4; signed:1;
+        field:int common_lock_depth;    offset:8;       size:4; signed:1;
 
-	field: unsigned long ip;	offset:16;tsize:8;
-	field: int nargs;	offset:24;tsize:4;
-	field: unsigned long dfd;	offset:32;tsize:8;
-	field: unsigned long filename;	offset:40;tsize:8;
-	field: unsigned long flags;	offset:48;tsize:8;
-	field: unsigned long mode;	offset:56;tsize:8;
+        field:unsigned long __probe_ip; offset:12;      size:4; signed:0;
+        field:int __probe_nargs;        offset:16;      size:4; signed:1;
+        field:unsigned long dfd;        offset:20;      size:4; signed:0;
+        field:unsigned long filename;   offset:24;      size:4; signed:0;
+        field:unsigned long flags;      offset:28;      size:4; signed:0;
+        field:unsigned long mode;       offset:32;      size:4; signed:0;
 
-print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
 
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip,
+REC->dfd, REC->filename, REC->flags, REC->mode
 
  You can see that the event has 4 arguments as in the expressions you specified.
 

From 14640106f243a3b29944d7198569090fa6546f2d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 5 Jan 2010 17:46:48 -0500
Subject: [PATCH 041/640] tracing/kprobe: Drop function argument access syntax

Drop function argument access syntax, because the function
arguments depend on not only architecture but also
compile-options and function API. And now, we have perf-probe
for finding register/memory assigned to each argument.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Neuling <mikey@neuling.org>
Cc: linuxppc-dev@ozlabs.org
LKML-Reference: <20100105224648.19431.52309.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/kprobetrace.txt | 21 ++++++++++-----------
 kernel/trace/trace_kprobe.c         | 18 +-----------------
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index c3eff6ff945f..f30978e001f8 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -37,15 +37,12 @@ Synopsis of kprobe_events
   @SYM[+|-offs]	: Fetch memory at SYM +|- offs (SYM should be a data symbol)
   $stackN	: Fetch Nth entry of stack (N >= 0)
   $stack	: Fetch stack address.
-  $argN		: Fetch function argument. (N >= 0)(*)
-  $retval	: Fetch return value.(**)
-  +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+  $retval	: Fetch return value.(*)
+  +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
 
-  (*) aN may not correct on asmlinkaged functions and at the middle of
-      function body.
-  (**) only for return probe.
-  (***) this is useful for fetching a field of data structures.
+  (*) only for return probe.
+  (**) this is useful for fetching a field of data structures.
 
 
 Per-Probe Event Filtering
@@ -82,11 +79,14 @@ Usage examples
 To add a probe as a new event, write a new definition to kprobe_events
 as below.
 
-  echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
+  echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events
 
  This sets a kprobe on the top of do_sys_open() function with recording
-1st to 4th arguments as "myprobe" event. As this example shows, users can
-choose more familiar names for each arguments.
+1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
+assigned to each function argument depends on arch-specific ABI. If you unsure
+the ABI, please try to use probe subcommand of perf-tools (you can find it
+under tools/perf/).
+As this example shows, users can choose more familiar names for each arguments.
 
   echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
 
@@ -147,4 +147,3 @@ events, you need to enable it.
 returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
 returns from do_sys_open to sys_open+0x1b).
 
-
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 47f54ab57b68..7ac728ded964 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
 	return retval;
 }
 
-static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
-{
-	return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
-}
-
 static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
 					      void *dummy)
 {
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
 {
 	int ret = -EINVAL;
 
-	if (ff->func == fetch_argument)
-		ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
-	else if (ff->func == fetch_register) {
+	if (ff->func == fetch_register) {
 		const char *name;
 		name = regs_query_register_name((unsigned int)((long)ff->data));
 		ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
 			}
 		} else
 			ret = -EINVAL;
-	} else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
-		ret = strict_strtoul(arg + 3, 10, &param);
-		if (ret || param > PARAM_MAX_ARGS)
-			ret = -EINVAL;
-		else {
-			ff->func = fetch_argument;
-			ff->data = (void *)param;
-		}
 	} else
 		ret = -EINVAL;
 	return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
 	 *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
 	 *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
 	 * Fetch args:
-	 *  $argN	: fetch Nth of function argument. (N:0-)
 	 *  $retval	: fetch return value
 	 *  $stack	: fetch stack address
 	 *  $stackN	: fetch Nth of stack (N:0-)

From aa5add93e92019018e905146f8c3d3f8e3c08300 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 5 Jan 2010 17:46:56 -0500
Subject: [PATCH 042/640] x86/ptrace: Remove unused regs_get_argument_nth API

Because of dropping function argument syntax from kprobe-tracer,
we don't need this API anymore.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linuxppc-dev@ozlabs.org
LKML-Reference: <20100105224656.19431.92588.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ptrace.h |  4 ----
 arch/x86/kernel/ptrace.c      | 24 ------------------------
 2 files changed, 28 deletions(-)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 9d369f680321..20102808b191 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 		return 0;
 }
 
-/* Get Nth argument at function call */
-extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
-					   unsigned int n);
-
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 017d937639fe..73554a3aae8c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -140,30 +140,6 @@ static const int arg_offs_table[] = {
 #endif
 };
 
-/**
- * regs_get_argument_nth() - get Nth argument at function call
- * @regs:	pt_regs which contains registers at function entry.
- * @n:		argument number.
- *
- * regs_get_argument_nth() returns @n th argument of a function call.
- * Since usually the kernel stack will be changed right after function entry,
- * you must use this at function entry. If the @n th entry is NOT in the
- * kernel stack or pt_regs, this returns 0.
- */
-unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
-{
-	if (n < ARRAY_SIZE(arg_offs_table))
-		return *(unsigned long *)((char *)regs + arg_offs_table[n]);
-	else {
-		/*
-		 * The typical case: arg n is on the stack.
-		 * (Note: stack[0] = return address, so skip it)
-		 */
-		n -= ARRAY_SIZE(arg_offs_table);
-		return regs_get_kernel_stack_nth(regs, 1 + n);
-	}
-}
-
 /*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.

From bbaa46fac6d1c652bfa6282420d36a44bdc53b64 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 5 Jan 2010 17:47:03 -0500
Subject: [PATCH 043/640] perf probe: Remove newline from die()

Remove newline from die(), because it is automatically added.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20100105224703.19431.42475.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/probe-finder.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4b852c0d16a5..6402798337c8 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -402,11 +402,11 @@ static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
 	} else if (op == DW_OP_regx) {
 		regn = loc->lr_number;
 	} else
-		die("Dwarf_OP %d is not supported.\n", op);
+		die("Dwarf_OP %d is not supported.", op);
 
 	regs = get_arch_regstr(regn);
 	if (!regs)
-		die("%lld exceeds max register number.\n", regn);
+		die("%lld exceeds max register number.", regn);
 
 	if (deref)
 		ret = snprintf(pf->buf, pf->len,
@@ -438,7 +438,7 @@ static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
 	return ;
 error:
 	die("Failed to find the location of %s at this address.\n"
-	    " Perhaps, it has been optimized out.\n", pf->var);
+	    " Perhaps, it has been optimized out.", pf->var);
 }
 
 static int variable_callback(struct die_link *dlink, void *data)
@@ -476,7 +476,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
 	/* Search child die for local variables and parameters. */
 	ret = search_die_from_children(sp_die, variable_callback, pf);
 	if (!ret)
-		die("Failed to find '%s' in this function.\n", pf->var);
+		die("Failed to find '%s' in this function.", pf->var);
 }
 
 /* Get a frame base on the address */
@@ -602,7 +602,7 @@ static void find_by_line(struct probe_finder *pf)
 		ret = search_die_from_children(pf->cu_die,
 					       probeaddr_callback, pf);
 		if (ret == 0)
-			die("Probe point is not found in subprograms.\n");
+			die("Probe point is not found in subprograms.");
 		/* Continuing, because target line might be inlined. */
 	}
 	dwarf_srclines_dealloc(__dw_debug, lines, cnt);
@@ -661,7 +661,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
 				    !die_inlined_subprogram(lk->die))
 					goto found;
 			}
-			die("Failed to find real subprogram.\n");
+			die("Failed to find real subprogram.");
 found:
 			/* Get offset from subprogram */
 			ret = die_within_subprogram(lk->die, pf->addr, &offs);

From 72041334b8c75ae7e1da2f17ba2b7afee8f2abd7 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 5 Jan 2010 17:47:10 -0500
Subject: [PATCH 044/640] perf probe: Show probe list in pager

Show probe list in pager, because the list can be longer than
a page.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20100105224710.19431.61542.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/probe-event.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8e532d9824f0..a22141a773bc 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -37,6 +37,7 @@
 #include "string.h"
 #include "strlist.h"
 #include "debug.h"
+#include "cache.h"
 #include "parse-events.h"  /* For debugfs_path */
 #include "probe-event.h"
 
@@ -455,6 +456,8 @@ void show_perf_probe_events(void)
 	struct strlist *rawlist;
 	struct str_node *ent;
 
+	setup_pager();
+
 	fd = open_kprobe_events(O_RDONLY, 0);
 	rawlist = get_trace_kprobe_event_rawlist(fd);
 	close(fd);

From fb1d2edf7ee25a26ad0b238d0ee335a3b28b7aa3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 5 Jan 2010 17:47:17 -0500
Subject: [PATCH 045/640] perf tools: Support tracepoint glob matching

Support glob wildcard when selecting tracepoint events by -e
option. Without this patch, perf-tools supports 'GROUP:*:record'
syntax for selecting all tracepoints under GROUP group.

With this patch, user can choose tracepoints more flexibly by using
partial wildcards, e.g. 'block:*bio*:record'.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20100105224717.19431.68972.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 609d5a9470c5..05d0c5c2030c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -450,7 +450,8 @@ parse_single_tracepoint_event(char *sys_name,
 /* sys + ':' + event + ':' + flags*/
 #define MAX_EVOPT_LEN	(MAX_EVENT_LENGTH * 2 + 2 + 128)
 static enum event_result
-parse_subsystem_tracepoint_event(char *sys_name, char *flags)
+parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
+				char *flags)
 {
 	char evt_path[MAXPATHLEN];
 	struct dirent *evt_ent;
@@ -474,6 +475,9 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
 		    || !strcmp(evt_ent->d_name, "filter"))
 			continue;
 
+		if (!strglobmatch(evt_ent->d_name, evt_exp))
+			continue;
+
 		len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
 			       evt_ent->d_name, flags ? ":" : "",
 			       flags ?: "");
@@ -522,9 +526,10 @@ static enum event_result parse_tracepoint_event(const char **strp,
 	if (evt_length >= MAX_EVENT_LENGTH)
 		return EVT_FAILED;
 
-	if (!strcmp(evt_name, "*")) {
+	if (strpbrk(evt_name, "*?")) {
 		*strp = evt_name + evt_length;
-		return parse_subsystem_tracepoint_event(sys_name, flags);
+		return parse_multiple_tracepoint_event(sys_name, evt_name,
+						       flags);
 	} else
 		return parse_single_tracepoint_event(sys_name, evt_name,
 						     evt_length, flags,

From 6964cd2c8efe6e048401f1fe3952a06c563c34c1 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 5 Jan 2010 17:47:24 -0500
Subject: [PATCH 046/640] perf tools: Enhance glob string matching

Enhance strglobmatch() for supporting character classes([CHARS],
complementation and ranges are also supported) and escaped
special characters (\*, \? etc).

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20100105224724.19431.56271.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/string.c | 65 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 5352d7dccc61..c397d4f6f748 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -227,16 +227,73 @@ fail:
 	return NULL;
 }
 
-/* Glob expression pattern matching */
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+	bool complement = false, ret = true;
+
+	if (*pat == '!') {
+		complement = true;
+		pat++;
+	}
+	if (*pat++ == c)	/* First character is special */
+		goto end;
+
+	while (*pat && *pat != ']') {	/* Matching */
+		if (*pat == '-' && *(pat + 1) != ']') {	/* Range */
+			if (*(pat - 1) <= c && c <= *(pat + 1))
+				goto end;
+			if (*(pat - 1) > *(pat + 1))
+				goto error;
+			pat += 2;
+		} else if (*pat++ == c)
+			goto end;
+	}
+	if (!*pat)
+		goto error;
+	ret = false;
+
+end:
+	while (*pat && *pat != ']')	/* Searching closing */
+		pat++;
+	if (!*pat)
+		goto error;
+	*npat = pat + 1;
+	return complement ? !ret : ret;
+
+error:
+	return false;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
 bool strglobmatch(const char *str, const char *pat)
 {
 	while (*str && *pat && *pat != '*') {
-		if (*pat == '?') {
+		if (*pat == '?') {	/* Matches any single character */
 			str++;
 			pat++;
-		} else
-			if (*str++ != *pat++)
+			continue;
+		} else if (*pat == '[')	/* Character classes/Ranges */
+			if (__match_charclass(pat + 1, *str, &pat)) {
+				str++;
+				continue;
+			} else
 				return false;
+		else if (*pat == '\\') /* Escaped char match as normal char */
+			pat++;
+		if (*str++ != *pat++)
+			return false;
 	}
 	/* Check wild card */
 	if (*pat == '*') {

From 631c9def804b2c92b5cca04fb9ff7b5df9e35094 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Wed, 6 Jan 2010 09:45:34 -0500
Subject: [PATCH 047/640] perf probe: Support --line option to show probable
 source-code lines

Add --line option to support showing probable source-code lines.

  perf probe --line SRC:LN[-LN|+NUM]
   or
  perf probe --line FUNC[:LN[-LN|+NUM]]

This option shows source-code with line number if the line can
be probed. Lines without line number (and blue color) means that
the line can not be probed, because debuginfo doesn't have the
information of those lines.

The argument specifies the range of lines, "source.c:100-120"
shows lines between 100th to l20th in source.c file. And
"func:10+20" shows 20 lines from 10th line of func function.

e.g.
 # ./perf probe --line kernel/sched.c:1080
 <kernel/sched.c:1080>
          *
          * called with rq->lock held and irqs disabled
          */
         static void hrtick_start(struct rq *rq, u64 delay)
         {
                struct hrtimer *timer = &rq->hrtick_timer;
   1086         ktime_t time = ktime_add_ns(timer->base->get_time(), delay);

                hrtimer_set_expires(timer, time);

   1090         if (rq == this_rq()) {
   1091                 hrtimer_restart(timer);
   1092         } else if (!rq->hrtick_csd_pending) {
   1093                 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd,
   1094                 rq->hrtick_csd_pending = 1;

If you specifying function name, this shows function-relative
line number.

 # ./perf probe --line schedule
 <schedule:0>
         asmlinkage void __sched schedule(void)
      1  {
                struct task_struct *prev, *next;
                unsigned long *switch_count;
                struct rq *rq;
                int cpu;

         need_resched:
                preempt_disable();
      9         cpu = smp_processor_id();
     10         rq = cpu_rq(cpu);
     11         rcu_sched_qs(cpu);
     12         prev = rq->curr;
     13         switch_count = &prev->nivcsw;

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20100106144534.27218.77939.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-probe.txt |  20 +++
 tools/perf/builtin-probe.c              |  76 ++++++++--
 tools/perf/util/probe-event.c           | 100 +++++++++++++
 tools/perf/util/probe-event.h           |   2 +
 tools/perf/util/probe-finder.c          | 191 +++++++++++++++++++++++-
 tools/perf/util/probe-finder.h          |  31 ++++
 6 files changed, 402 insertions(+), 18 deletions(-)

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 250e391b4bc8..2de34075f6a4 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -15,6 +15,8 @@ or
 'perf probe' [options] --del='[GROUP:]EVENT' [...]
 or
 'perf probe' --list
+or
+'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
 
 DESCRIPTION
 -----------
@@ -45,6 +47,11 @@ OPTIONS
 --list::
 	List up current probe events.
 
+-L::
+--line=::
+	Show source code lines which can be probed. This needs an argument
+	which specifies a range of the source code.
+
 PROBE SYNTAX
 ------------
 Probe points are defined by following syntax.
@@ -56,6 +63,19 @@ Probe points are defined by following syntax.
 It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
 'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
 
+LINE SYNTAX
+-----------
+Line range is descripted by following syntax.
+
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
 SEE ALSO
 --------
 linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index ffdd3fe87b4a..1d3a99ea5ce1 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -55,11 +55,13 @@ static struct {
 	bool need_dwarf;
 	bool list_events;
 	bool force_add;
+	bool show_lines;
 	int nr_probe;
 	struct probe_point probes[MAX_PROBES];
 	struct strlist *dellist;
 	struct perf_session *psession;
 	struct map *kmap;
+	struct line_range line_range;
 } session;
 
 
@@ -116,6 +118,15 @@ static int opt_del_probe_event(const struct option *opt __used,
 	return 0;
 }
 
+static int opt_show_lines(const struct option *opt __used,
+			  const char *str, int unset __used)
+{
+	if (str)
+		parse_line_range_desc(str, &session.line_range);
+	INIT_LIST_HEAD(&session.line_range.line_list);
+	session.show_lines = true;
+	return 0;
+}
 /* Currently just checking function name from symbol map */
 static void evaluate_probe_point(struct probe_point *pp)
 {
@@ -144,6 +155,7 @@ static const char * const probe_usage[] = {
 	"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
 	"perf probe [<options>] --del '[GROUP:]EVENT' ...",
 	"perf probe --list",
+	"perf probe --line 'LINEDESC'",
 	NULL
 };
 
@@ -182,9 +194,32 @@ static const struct option options[] = {
 		opt_add_probe_event),
 	OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events"
 		    " with existing name"),
+#ifndef NO_LIBDWARF
+	OPT_CALLBACK('L', "line", NULL,
+		     "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
+		     "Show source code lines.", opt_show_lines),
+#endif
 	OPT_END()
 };
 
+/* Initialize symbol maps for vmlinux */
+static void init_vmlinux(void)
+{
+	symbol_conf.sort_by_name = true;
+	if (symbol_conf.vmlinux_name == NULL)
+		symbol_conf.try_vmlinux_path = true;
+	else
+		pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+	if (symbol__init() < 0)
+		die("Failed to init symbol map.");
+	session.psession = perf_session__new(NULL, O_WRONLY, false);
+	if (session.psession == NULL)
+		die("Failed to init perf_session.");
+	session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
+	if (!session.kmap)
+		die("Could not find kernel map.\n");
+}
+
 int cmd_probe(int argc, const char **argv, const char *prefix __used)
 {
 	int i, ret;
@@ -203,7 +238,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 		parse_probe_event_argv(argc, argv);
 	}
 
-	if ((!session.nr_probe && !session.dellist && !session.list_events))
+	if ((!session.nr_probe && !session.dellist && !session.list_events &&
+	     !session.show_lines))
 		usage_with_options(probe_usage, options);
 
 	if (debugfs_valid_mountpoint(debugfs_path) < 0)
@@ -215,10 +251,34 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 				   " --add/--del.\n");
 			usage_with_options(probe_usage, options);
 		}
+		if (session.show_lines) {
+			pr_warning("  Error: Don't use --list with --line.\n");
+			usage_with_options(probe_usage, options);
+		}
 		show_perf_probe_events();
 		return 0;
 	}
 
+#ifndef NO_LIBDWARF
+	if (session.show_lines) {
+		if (session.nr_probe != 0 || session.dellist) {
+			pr_warning("  Error: Don't use --line with"
+				   " --add/--del.\n");
+			usage_with_options(probe_usage, options);
+		}
+		init_vmlinux();
+		fd = open_vmlinux();
+		if (fd < 0)
+			die("Could not open debuginfo file.");
+		ret = find_line_range(fd, &session.line_range);
+		if (ret <= 0)
+			die("Source line is not found.\n");
+		close(fd);
+		show_line_range(&session.line_range);
+		return 0;
+	}
+#endif
+
 	if (session.dellist) {
 		del_trace_kprobe_events(session.dellist);
 		strlist__delete(session.dellist);
@@ -226,18 +286,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 			return 0;
 	}
 
-	/* Initialize symbol maps for vmlinux */
-	symbol_conf.sort_by_name = true;
-	if (symbol_conf.vmlinux_name == NULL)
-		symbol_conf.try_vmlinux_path = true;
-	if (symbol__init() < 0)
-		die("Failed to init symbol map.");
-	session.psession = perf_session__new(NULL, O_WRONLY, false);
-	if (session.psession == NULL)
-		die("Failed to init perf_session.");
-	session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
-	if (!session.kmap)
-		die("Could not find kernel map.\n");
+	/* Add probes */
+	init_vmlinux();
 
 	if (session.need_dwarf)
 #ifdef NO_LIBDWARF
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a22141a773bc..71b0dd590a37 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -38,6 +38,7 @@
 #include "strlist.h"
 #include "debug.h"
 #include "cache.h"
+#include "color.h"
 #include "parse-events.h"  /* For debugfs_path */
 #include "probe-event.h"
 
@@ -63,6 +64,42 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
 	return ret;
 }
 
+void parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+	const char *ptr;
+	char *tmp;
+	/*
+	 * <Syntax>
+	 * SRC:SLN[+NUM|-ELN]
+	 * FUNC[:SLN[+NUM|-ELN]]
+	 */
+	ptr = strchr(arg, ':');
+	if (ptr) {
+		lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0);
+		if (*tmp == '+')
+			lr->end = lr->start + (unsigned int)strtoul(tmp + 1,
+								    &tmp, 0);
+		else if (*tmp == '-')
+			lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0);
+		else
+			lr->end = 0;
+		pr_debug("Line range is %u to %u\n", lr->start, lr->end);
+		if (lr->end && lr->start > lr->end)
+			semantic_error("Start line must be smaller"
+				       " than end line.");
+		if (*tmp != '\0')
+			semantic_error("Tailing with invalid character '%d'.",
+				       *tmp);
+		tmp = strndup(arg, (ptr - arg));
+	} else
+		tmp = strdup(arg);
+
+	if (strchr(tmp, '.'))
+		lr->file = tmp;
+	else
+		lr->function = tmp;
+}
+
 /* Check the name is good for event/group */
 static bool check_event_name(const char *name)
 {
@@ -678,3 +715,66 @@ void del_trace_kprobe_events(struct strlist *dellist)
 	close(fd);
 }
 
+#define LINEBUF_SIZE 256
+
+static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
+{
+	char buf[LINEBUF_SIZE];
+	const char *color = PERF_COLOR_BLUE;
+
+	if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+		goto error;
+	if (!skip) {
+		if (show_num)
+			fprintf(stdout, "%7u  %s", l, buf);
+		else
+			color_fprintf(stdout, color, "         %s", buf);
+	}
+
+	while (strlen(buf) == LINEBUF_SIZE - 1 &&
+	       buf[LINEBUF_SIZE - 2] != '\n') {
+		if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+			goto error;
+		if (!skip) {
+			if (show_num)
+				fprintf(stdout, "%s", buf);
+			else
+				color_fprintf(stdout, color, "%s", buf);
+		}
+	}
+	return;
+error:
+	if (feof(fp))
+		die("Source file is shorter than expected.");
+	else
+		die("File read error: %s", strerror(errno));
+}
+
+void show_line_range(struct line_range *lr)
+{
+	unsigned int l = 1;
+	struct line_node *ln;
+	FILE *fp;
+
+	setup_pager();
+
+	if (lr->function)
+		fprintf(stdout, "<%s:%d>\n", lr->function,
+			lr->start - lr->offset);
+	else
+		fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+
+	fp = fopen(lr->path, "r");
+	if (fp == NULL)
+		die("Failed to open %s: %s", lr->path, strerror(errno));
+	/* Skip to starting line number */
+	while (l < lr->start)
+		show_one_line(fp, l++, true, false);
+
+	list_for_each_entry(ln, &lr->line_list, list) {
+		while (ln->line > l)
+			show_one_line(fp, (l++) - lr->offset, false, false);
+		show_one_line(fp, (l++) - lr->offset, false, true);
+	}
+	fclose(fp);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 7f1d499118c0..711287d4baea 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -5,6 +5,7 @@
 #include "probe-finder.h"
 #include "strlist.h"
 
+extern void parse_line_range_desc(const char *arg, struct line_range *lr);
 extern void parse_perf_probe_event(const char *str, struct probe_point *pp,
 				   bool *need_dwarf);
 extern int synthesize_perf_probe_point(struct probe_point *pp);
@@ -15,6 +16,7 @@ extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes,
 				    bool force_add);
 extern void del_trace_kprobe_events(struct strlist *dellist);
 extern void show_perf_probe_events(void);
+extern void show_line_range(struct line_range *lr);
 
 /* Maximum index number of event-name postfix */
 #define MAX_EVENT_INDEX	1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 6402798337c8..1b2124d12f68 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -140,6 +140,31 @@ static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
 	return found;
 }
 
+static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf)
+{
+	Dwarf_Signed cnt, i;
+	char **srcs;
+	int ret = 0;
+
+	if (!buf || !fno)
+		return -EINVAL;
+
+	ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
+	if (ret == DW_DLV_OK) {
+		if ((Dwarf_Unsigned)cnt > fno - 1) {
+			*buf = strdup(srcs[fno - 1]);
+			ret = 0;
+			pr_debug("found filename: %s\n", *buf);
+		} else
+			ret = -ENOENT;
+		for (i = 0; i < cnt; i++)
+			dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
+		dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
+	} else
+		ret = -EINVAL;
+	return ret;
+}
+
 /* Compare diename and tname */
 static int die_compare_name(Dwarf_Die dw_die, const char *tname)
 {
@@ -567,7 +592,7 @@ static int probeaddr_callback(struct die_link *dlink, void *data)
 }
 
 /* Find probe point from its line number */
-static void find_by_line(struct probe_finder *pf)
+static void find_probe_point_by_line(struct probe_finder *pf)
 {
 	Dwarf_Signed cnt, i, clm;
 	Dwarf_Line *lines;
@@ -626,7 +651,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
 				pf->fno = die_get_decl_file(dlink->die);
 				pf->lno = die_get_decl_line(dlink->die)
 					 + pp->line;
-				find_by_line(pf);
+				find_probe_point_by_line(pf);
 				return 1;
 			}
 			if (die_inlined_subprogram(dlink->die)) {
@@ -673,7 +698,7 @@ found:
 	return 0;
 }
 
-static void find_by_func(struct probe_finder *pf)
+static void find_probe_point_by_func(struct probe_finder *pf)
 {
 	search_die_from_children(pf->cu_die, probefunc_callback, pf);
 }
@@ -714,10 +739,10 @@ int find_probepoint(int fd, struct probe_point *pp)
 			if (ret == DW_DLV_NO_ENTRY)
 				pf.cu_base = 0;
 			if (pp->function)
-				find_by_func(&pf);
+				find_probe_point_by_func(&pf);
 			else {
 				pf.lno = pp->line;
-				find_by_line(&pf);
+				find_probe_point_by_line(&pf);
 			}
 		}
 		dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
@@ -728,3 +753,159 @@ int find_probepoint(int fd, struct probe_point *pp)
 	return pp->found;
 }
 
+
+static void line_range_add_line(struct line_range *lr, unsigned int line)
+{
+	struct line_node *ln;
+	struct list_head *p;
+
+	/* Reverse search, because new line will be the last one */
+	list_for_each_entry_reverse(ln, &lr->line_list, list) {
+		if (ln->line < line) {
+			p = &ln->list;
+			goto found;
+		} else if (ln->line == line)	/* Already exist */
+			return ;
+	}
+	/* List is empty, or the smallest entry */
+	p = &lr->line_list;
+found:
+	pr_debug("Debug: add a line %u\n", line);
+	ln = zalloc(sizeof(struct line_node));
+	DIE_IF(ln == NULL);
+	ln->line = line;
+	INIT_LIST_HEAD(&ln->list);
+	list_add(&ln->list, p);
+}
+
+/* Find line range from its line number */
+static void find_line_range_by_line(struct line_finder *lf)
+{
+	Dwarf_Signed cnt, i;
+	Dwarf_Line *lines;
+	Dwarf_Unsigned lineno = 0;
+	Dwarf_Unsigned fno;
+	Dwarf_Addr addr;
+	int ret;
+
+	ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+
+	for (i = 0; i < cnt; i++) {
+		ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+		if (fno != lf->fno)
+			continue;
+
+		ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+		if (lf->lno_s > lineno || lf->lno_e < lineno)
+			continue;
+
+		/* Filter line in the function address range */
+		if (lf->addr_s && lf->addr_e) {
+			ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
+			DIE_IF(ret != DW_DLV_OK);
+			if (lf->addr_s > addr || lf->addr_e <= addr)
+				continue;
+		}
+		line_range_add_line(lf->lr, (unsigned int)lineno);
+	}
+	dwarf_srclines_dealloc(__dw_debug, lines, cnt);
+	if (!list_empty(&lf->lr->line_list))
+		lf->found = 1;
+}
+
+/* Search function from function name */
+static int linefunc_callback(struct die_link *dlink, void *data)
+{
+	struct line_finder *lf = (struct line_finder *)data;
+	struct line_range *lr = lf->lr;
+	Dwarf_Half tag;
+	int ret;
+
+	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+	DIE_IF(ret == DW_DLV_ERROR);
+	if (tag == DW_TAG_subprogram &&
+	    die_compare_name(dlink->die, lr->function) == 0) {
+		/* Get the address range of this function */
+		ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error);
+		if (ret == DW_DLV_OK)
+			ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error);
+		DIE_IF(ret == DW_DLV_ERROR);
+		if (ret == DW_DLV_NO_ENTRY) {
+			lf->addr_s = 0;
+			lf->addr_e = 0;
+		}
+
+		lf->fno = die_get_decl_file(dlink->die);
+		lr->offset = die_get_decl_line(dlink->die);;
+		lf->lno_s = lr->offset + lr->start;
+		if (!lr->end)
+			lf->lno_e = (Dwarf_Unsigned)-1;
+		else
+			lf->lno_e = lr->offset + lr->end;
+		lr->start = lf->lno_s;
+		lr->end = lf->lno_e;
+		find_line_range_by_line(lf);
+		/* If we find a target function, this should be end. */
+		lf->found = 1;
+		return 1;
+	}
+	return 0;
+}
+
+static void find_line_range_by_func(struct line_finder *lf)
+{
+	search_die_from_children(lf->cu_die, linefunc_callback, lf);
+}
+
+int find_line_range(int fd, struct line_range *lr)
+{
+	Dwarf_Half addr_size = 0;
+	Dwarf_Unsigned next_cuh = 0;
+	int ret;
+	struct line_finder lf = {.lr = lr};
+
+	ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
+	if (ret != DW_DLV_OK)
+		return -ENOENT;
+
+	while (!lf.found) {
+		/* Search CU (Compilation Unit) */
+		ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
+			&addr_size, &next_cuh, &__dw_error);
+		DIE_IF(ret == DW_DLV_ERROR);
+		if (ret == DW_DLV_NO_ENTRY)
+			break;
+
+		/* Get the DIE(Debugging Information Entry) of this CU */
+		ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error);
+		DIE_IF(ret != DW_DLV_OK);
+
+		/* Check if target file is included. */
+		if (lr->file)
+			lf.fno = cu_find_fileno(lf.cu_die, lr->file);
+
+		if (!lr->file || lf.fno) {
+			if (lr->function)
+				find_line_range_by_func(&lf);
+			else {
+				lf.lno_s = lr->start;
+				if (!lr->end)
+					lf.lno_e = (Dwarf_Unsigned)-1;
+				else
+					lf.lno_e = lr->end;
+				find_line_range_by_line(&lf);
+			}
+			/* Get the real file path */
+			if (lf.found)
+				cu_get_filename(lf.cu_die, lf.fno, &lr->path);
+		}
+		dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE);
+	}
+	ret = dwarf_finish(__dw_debug, &__dw_error);
+	DIE_IF(ret != DW_DLV_OK);
+	return lf.found;
+}
+
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index e3f396806e6e..972b386116f1 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,8 +34,26 @@ struct probe_point {
 	char			*probes[MAX_PROBES];	/* Output buffers (will be allocated)*/
 };
 
+/* Line number container */
+struct line_node {
+	struct list_head	list;
+	unsigned int		line;
+};
+
+/* Line range */
+struct line_range {
+	char			*file;			/* File name */
+	char			*function;		/* Function name */
+	unsigned int		start;			/* Start line number */
+	unsigned int		end;			/* End line number */
+	unsigned int		offset;			/* Start line offset */
+	char			*path;			/* Real path name */
+	struct list_head	line_list;		/* Visible lines */
+};
+
 #ifndef NO_LIBDWARF
 extern int find_probepoint(int fd, struct probe_point *pp);
+extern int find_line_range(int fd, struct line_range *lr);
 
 /* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */
 #ifndef _MIPS_SZLONG
@@ -62,6 +80,19 @@ struct probe_finder {
 	char			*buf;			/* Current output buffer */
 	int			len;			/* Length of output buffer */
 };
+
+struct line_finder {
+	struct line_range	*lr;			/* Target line range */
+
+	Dwarf_Unsigned		fno;			/* File number */
+	Dwarf_Unsigned		lno_s;			/* Start line number */
+	Dwarf_Unsigned		lno_e;			/* End line number */
+	Dwarf_Addr		addr_s;			/* Start address */
+	Dwarf_Addr		addr_e;			/* End address */
+	Dwarf_Die		cu_die;			/* Current CU */
+	int			found;
+};
+
 #endif /* NO_LIBDWARF */
 
 #endif /*_PROBE_FINDER_H */

From 8d9e503928638fc95317be42c416fb7907322aff Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Thu, 7 Jan 2010 19:40:47 +0300
Subject: [PATCH 048/640] perf: Fix memory leak: counterwidth

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Cc: a.p.zijlstra@chello.nl
Cc: paulus@samba.org
LKML-Reference: <1262882447-23776-2-git-send-email-a.beregalov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/values.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index 1c15e39f99e3..cfa55d686e3b 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -169,6 +169,7 @@ static void perf_read_values__display_pretty(FILE *fp,
 				counterwidth[j], values->value[i][j]);
 		fprintf(fp, "\n");
 	}
+	free(counterwidth);
 }
 
 static void perf_read_values__display_raw(FILE *fp,

From fed5af61dc0d9402d26e7fb8fb9731a60a8e05ca Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jan 2010 19:59:38 -0200
Subject: [PATCH 049/640] perf buildid-list: No need to process the header
 sections again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As it is already processed by:

	perf_session__new
		perf_session__open
			perf_session__read

This was harmless, because we use dsos__findnew, that would
already find it, but is unnecessary work and removing it makes
builtin-buildid-list.c even shorter.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262901583-8074-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-buildid-list.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 1e99ac806913..4229c2c213cc 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -31,26 +31,6 @@ static const struct option options[] = {
 	OPT_END()
 };
 
-static int perf_file_section__process_buildids(struct perf_file_section *self,
-					       int feat, int fd)
-{
-	if (feat != HEADER_BUILD_ID)
-		return 0;
-
-	if (lseek(fd, self->offset, SEEK_SET) < 0) {
-		pr_warning("Failed to lseek to %Ld offset for buildids!\n",
-			   self->offset);
-		return -1;
-	}
-
-	if (perf_header__read_build_ids(fd, self->offset, self->size)) {
-		pr_warning("Failed to read buildids!\n");
-		return -1;
-	}
-
-	return 0;
-}
-
 static int __cmd_buildid_list(void)
 {
 	int err = -1;
@@ -60,10 +40,7 @@ static int __cmd_buildid_list(void)
 	if (session == NULL)
 		return -1;
 
-	err = perf_header__process_sections(&session->header, session->fd,
-				         perf_file_section__process_buildids);
-	if (err >= 0)
-		dsos__fprintf_buildid(stdout);
+	dsos__fprintf_buildid(stdout);
 
 	perf_session__delete(session);
 	return err;

From a89e5abe3efcc7facc666d3985769278937f86b0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jan 2010 19:59:39 -0200
Subject: [PATCH 050/640] perf symbols: Record the domain of DSOs in
 HEADER_BUILD_ID header table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So that we can restore them to the right DSO list (either
dsos__kernel or dsos__user).

We do that just like the kernel does for the other events,
encoding PERF_RECORD_MISC_{KERNEL,USER} in perf_event_header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262901583-8074-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/header.c  |  9 ++++++---
 tools/perf/util/session.c |  6 +++++-
 tools/perf/util/symbol.c  |  6 +++---
 tools/perf/util/symbol.h  | 11 +++++++++--
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 942f7da8bf84..ec96321eb9e4 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -193,7 +193,7 @@ static int write_padded(int fd, const void *bf, size_t count,
 			continue;		\
 		else
 
-static int __dsos__write_buildid_table(struct list_head *head, int fd)
+static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd)
 {
 	struct dso *pos;
 
@@ -205,6 +205,7 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
 		len = ALIGN(len, NAME_ALIGN);
 		memset(&b, 0, sizeof(b));
 		memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
+		b.header.misc = misc;
 		b.header.size = sizeof(b) + len;
 		err = do_write(fd, &b, sizeof(b));
 		if (err < 0)
@@ -220,9 +221,11 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
 
 static int dsos__write_buildid_table(int fd)
 {
-	int err = __dsos__write_buildid_table(&dsos__kernel, fd);
+	int err = __dsos__write_buildid_table(&dsos__kernel,
+					      PERF_RECORD_MISC_KERNEL, fd);
 	if (err == 0)
-		err = __dsos__write_buildid_table(&dsos__user, fd);
+		err = __dsos__write_buildid_table(&dsos__user,
+						  PERF_RECORD_MISC_USER, fd);
 	return err;
 }
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e0e6a075489e..378ac5422bcf 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -255,6 +255,7 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size)
 	while (offset < limit) {
 		struct dso *dso;
 		ssize_t len;
+		struct list_head *head = &dsos__user;
 
 		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
 			goto out;
@@ -263,7 +264,10 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size)
 		if (read(input, filename, len) != len)
 			goto out;
 
-		dso = dsos__findnew(filename);
+		if (bev.header.misc & PERF_RECORD_MISC_KERNEL)
+			head = &dsos__kernel;
+
+		dso = __dsos__findnew(head, filename);
 		if (dso != NULL)
 			dso__set_build_id(dso, &bev.build_id);
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index da2f07f1af8f..8e6627e6b778 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1615,14 +1615,14 @@ static struct dso *dsos__find(struct list_head *head, const char *name)
 	return NULL;
 }
 
-struct dso *dsos__findnew(const char *name)
+struct dso *__dsos__findnew(struct list_head *head, const char *name)
 {
-	struct dso *dso = dsos__find(&dsos__user, name);
+	struct dso *dso = dsos__find(head, name);
 
 	if (!dso) {
 		dso = dso__new(name);
 		if (dso != NULL) {
-			dsos__add(&dsos__user, dso);
+			dsos__add(head, dso);
 			dso__set_basename(dso);
 		}
 	}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index b2b5330a82a0..ee0b4593db7b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -115,9 +115,17 @@ bool dso__sorted_by_name(const struct dso *self, enum map_type type);
 
 void dso__sort_by_name(struct dso *self, enum map_type type);
 
+extern struct list_head dsos__user, dsos__kernel;
+
+struct dso *__dsos__findnew(struct list_head *head, const char *name);
+
+static inline struct dso *dsos__findnew(const char *name)
+{
+	return __dsos__findnew(&dsos__user, name);
+}
+
 struct perf_session;
 
-struct dso *dsos__findnew(const char *name);
 int dso__load(struct dso *self, struct map *map, struct perf_session *session,
 	      symbol_filter_t filter);
 void dsos__fprintf(FILE *fp);
@@ -143,6 +151,5 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 
 int perf_session__create_kernel_maps(struct perf_session *self);
 
-extern struct list_head dsos__user, dsos__kernel;
 extern struct dso *vdso;
 #endif /* __PERF_SYMBOL */

From cf5531148ff34938840d6da775c0a4ace442d573 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jan 2010 19:59:40 -0200
Subject: [PATCH 051/640] perf tools: Create typedef for common event
 synthesizing callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1262901583-8074-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/event.c | 16 +++++-----------
 tools/perf/util/event.h | 12 +++++-------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 1a31feb9999f..bfb3d872b9f5 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -8,8 +8,7 @@
 #include "thread.h"
 
 static pid_t event__synthesize_comm(pid_t pid, int full,
-				    int (*process)(event_t *event,
-						   struct perf_session *session),
+				    event__handler_t process,
 				    struct perf_session *session)
 {
 	event_t ev;
@@ -91,8 +90,7 @@ out_failure:
 }
 
 static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
-					 int (*process)(event_t *event,
-							struct perf_session *session),
+					 event__handler_t process,
 					 struct perf_session *session)
 {
 	char filename[PATH_MAX];
@@ -156,9 +154,7 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
 	return 0;
 }
 
-int event__synthesize_thread(pid_t pid,
-			     int (*process)(event_t *event,
-					    struct perf_session *session),
+int event__synthesize_thread(pid_t pid, event__handler_t process,
 			     struct perf_session *session)
 {
 	pid_t tgid = event__synthesize_comm(pid, 1, process, session);
@@ -167,8 +163,7 @@ int event__synthesize_thread(pid_t pid,
 	return event__synthesize_mmap_events(pid, tgid, process, session);
 }
 
-void event__synthesize_threads(int (*process)(event_t *event,
-					      struct perf_session *session),
+void event__synthesize_threads(event__handler_t process,
 			       struct perf_session *session)
 {
 	DIR *proc;
@@ -205,8 +200,7 @@ static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
 	return 1;
 }
 
-int event__synthesize_kernel_mmap(int (*process)(event_t *event,
-						 struct perf_session *session),
+int event__synthesize_kernel_mmap(event__handler_t process,
 				  struct perf_session *session,
 				  const char *symbol_name)
 {
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 61fc0dc658c2..80356da8216c 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -103,15 +103,13 @@ void event__print_totals(void);
 
 struct perf_session;
 
-int event__synthesize_thread(pid_t pid,
-			     int (*process)(event_t *event,
-					    struct perf_session *session),
+typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
+
+int event__synthesize_thread(pid_t pid, event__handler_t process,
 			     struct perf_session *session);
-void event__synthesize_threads(int (*process)(event_t *event,
-					      struct perf_session *session),
+void event__synthesize_threads(event__handler_t process,
 			       struct perf_session *session);
-int event__synthesize_kernel_mmap(int (*process)(event_t *event,
-						 struct perf_session *session),
+int event__synthesize_kernel_mmap(event__handler_t process,
 				  struct perf_session *session,
 				  const char *symbol_name);
 

From 8381f65d097dad90416808314737dd7d3ae38ea9 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie.iles@picochip.com>
Date: Fri, 8 Jan 2010 15:27:33 +0000
Subject: [PATCH 052/640] sched/perf: Make sure irqs are disabled for
 perf_event_task_sched_in()

perf_event_task_sched_in() expects interrupts to be disabled,
but on architectures with __ARCH_WANT_INTERRUPTS_ON_CTXSW
defined, this isn't true. If this is defined, disable irqs
around the call in finish_task_switch().

Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
LKML-Reference: <1262964453-27370-1-git-send-email-jamie.iles@picochip.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/sched.c b/kernel/sched.c
index e507af086b42..c3ad3427a2a5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2783,7 +2783,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+	local_irq_disable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 	perf_event_task_sched_in(current);
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+	local_irq_enable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 	finish_lock_switch(rq, prev);
 
 	fire_sched_in_preempt_notifiers(current);

From ff314d3903c2843de65c2148f66f277f2440ed26 Mon Sep 17 00:00:00 2001
From: Wenji Huang <wenji.huang@oracle.com>
Date: Wed, 13 Jan 2010 17:01:38 +0800
Subject: [PATCH 053/640] perf: Make cmd_to_page() function more compact

Remove branch for is_perf_command.

Signed-off-by: Wenji Huang <wenji.huang@oracle.com>
Cc: fweisbec@gmail.com
Cc: jkacur@redhat.com
Cc: acme@redhat.com
LKML-Reference: <1263373298-13282-1-git-send-email-wenji.huang@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-help.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index e427d6965e0c..215b584007b1 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -313,8 +313,6 @@ static const char *cmd_to_page(const char *perf_cmd)
 		return "perf";
 	else if (!prefixcmp(perf_cmd, "perf"))
 		return perf_cmd;
-	else if (is_perf_command(perf_cmd))
-		return prepend("perf-", perf_cmd);
 	else
 		return prepend("perf-", perf_cmd);
 }

From 292e0041c3b22c5347092152504d814119554b57 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 9 Dec 2009 06:56:40 -0500
Subject: [PATCH 054/640] [CPUFREQ] fix default value for ondemand governor

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 Documentation/cpu-freq/governors.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index aed082f49d09..737988fca64d 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -145,8 +145,8 @@ show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT.
 up_threshold: defines what the average CPU usage between the samplings
 of 'sampling_rate' needs to be for the kernel to make a decision on
 whether it should increase the frequency.  For example when it is set
-to its default value of '80' it means that between the checking
-intervals the CPU needs to be on average more than 80% in use to then
+to its default value of '95' it means that between the checking
+intervals the CPU needs to be on average more than 95% in use to then
 decide that the CPU frequency needs to be increased.  
 
 ignore_nice_load: this parameter takes a value of '0' or '1'. When

From 557a701c16553b0b691dbb64ef30361115a80f64 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Mon, 14 Dec 2009 11:44:15 +0100
Subject: [PATCH 055/640] [CPUFREQ] Fix use after free of struct
 powernow_k8_data

Easy fix for a regression introduced in 2.6.31.

On managed CPUs the cpufreq.c core will call driver->exit(cpu) on the
managed cpus and powernow_k8 will free the core's data.

Later driver->get(cpu) function might get called trying to read out the
current freq of a managed cpu and the NULL pointer check does not work on
the freed object -> better set it to NULL.

->get() is unsigned and must return 0 as invalid frequency.

Reference:
http://bugzilla.kernel.org/show_bug.cgi?id=14391

Signed-off-by: Thomas Renninger <trenn@suse.de>
Tested-by: Michal Schmidt <mschmidt@redhat.com>
CC: stable@kernel.org
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a9df9441a9a2..2da4fa3bf6e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
 
 	kfree(data->powernow_table);
 	kfree(data);
+	per_cpu(powernow_data, pol->cpu) = NULL;
 
 	return 0;
 }
@@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu)
 	int err;
 
 	if (!data)
-		return -EINVAL;
+		return 0;
 
 	smp_call_function_single(cpu, query_values_on_cpu, &err, true);
 	if (err)

From 1dbf58881f307e21a3df4b990a5bea401360d02e Mon Sep 17 00:00:00 2001
From: "Nagananda.Chumbalkar@hp.com" <Nagananda.Chumbalkar@hp.com>
Date: Mon, 21 Dec 2009 23:40:52 +0100
Subject: [PATCH 056/640] [CPUFREQ] Fix ondemand to not request targets outside
 policy limits

Dominik said:
target_freq cannot be below policy->min or above policy->max.
If it were, the whole cpufreq subsystem is broken.

But (answer):
I think the "ondemand" governor can ask for a target frequency that is
below policy->min.
...
A patch such as below may be needed to sanitize the target frequency
requested by "ondemand". The "conservative" governor already has this check:

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>

# diff -bur x/drivers/cpufreq/cpufreq_ondemand.c.orig y/drivers/cpufreq/cpufreq_ondemand.c
---
 drivers/cpufreq/cpufreq_ondemand.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 4b34ade2332b..bd444dc93cf2 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -554,6 +554,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 				(dbs_tuners_ins.up_threshold -
 				 dbs_tuners_ins.down_differential);
 
+		if (freq_next < policy->min)
+			freq_next = policy->min;
+
 		if (!dbs_tuners_ins.powersave_bias) {
 			__cpufreq_driver_target(policy, freq_next,
 					CPUFREQ_RELATION_L);

From b7cece76783c68fb391f9882235b4b0c9c300c46 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 13 Jan 2010 13:22:17 -0200
Subject: [PATCH 057/640] perf tools: Encode kernel module mappings in
 perf.data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)

Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).

One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-kmem.c   |   5 ++
 tools/perf/builtin-record.c |  11 ++++
 tools/perf/builtin-top.c    |   5 ++
 tools/perf/util/event.c     | 106 ++++++++++++++++++++++++++++----
 tools/perf/util/event.h     |   2 +
 tools/perf/util/session.c   |   8 +--
 tools/perf/util/symbol.c    | 116 ++++++++++++++++++++++++++----------
 tools/perf/util/symbol.h    |   3 +
 tools/perf/util/thread.h    |   4 ++
 9 files changed, 212 insertions(+), 48 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 88c570c18e3e..4af7199c5af7 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -494,6 +494,11 @@ static int __cmd_kmem(void)
 	if (!perf_session__has_traces(session, "kmem record"))
 		goto out_delete;
 
+	if (perf_session__create_kernel_maps(session) < 0) {
+		pr_err("Problems creating kernel maps\n");
+		return -1;
+	}
+
 	setup_pager();
 	err = perf_session__process_events(session, &event_ops);
 	if (err != 0)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8f88420e066b..c130df2676f1 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -465,6 +465,11 @@ static int __cmd_record(int argc, const char **argv)
 		return -1;
 	}
 
+	if (perf_session__create_kernel_maps(session) < 0) {
+		pr_err("Problems creating kernel maps\n");
+		return -1;
+	}
+
 	if (!file_new) {
 		err = perf_header__read(&session->header, output);
 		if (err < 0)
@@ -558,6 +563,12 @@ static int __cmd_record(int argc, const char **argv)
 		return err;
 	}
 
+	err = event__synthesize_modules(process_synthesized_event, session);
+	if (err < 0) {
+		pr_err("Couldn't record kernel reference relocation symbol.\n");
+		return err;
+	}
+
 	if (!system_wide && profile_cpu == -1)
 		event__synthesize_thread(pid, process_synthesized_event,
 					 session);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ddc584b64871..6822b44ca4f9 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1165,6 +1165,11 @@ static int __cmd_top(void)
 	if (session == NULL)
 		return -ENOMEM;
 
+	if (perf_session__create_kernel_maps(session) < 0) {
+		pr_err("Problems creating kernel maps\n");
+		return -1;
+	}
+
 	if (target_pid != -1)
 		event__synthesize_thread(target_pid, event__process, session);
 	else
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bfb3d872b9f5..4f3e7ef33b83 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -154,6 +154,36 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
 	return 0;
 }
 
+int event__synthesize_modules(event__handler_t process,
+			      struct perf_session *session)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(&session->kmaps.maps[MAP__FUNCTION]);
+	     nd; nd = rb_next(nd)) {
+		event_t ev;
+		size_t size;
+		struct map *pos = rb_entry(nd, struct map, rb_node);
+
+		if (pos->dso->kernel)
+			continue;
+
+		size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+		memset(&ev, 0, sizeof(ev));
+		ev.mmap.header.type = PERF_RECORD_MMAP;
+		ev.mmap.header.size = (sizeof(ev.mmap) -
+				        (sizeof(ev.mmap.filename) - size));
+		ev.mmap.start = pos->start;
+		ev.mmap.len   = pos->end - pos->start;
+
+		memcpy(ev.mmap.filename, pos->dso->long_name,
+		       pos->dso->long_name_len + 1);
+		process(&ev, session);
+	}
+
+	return 0;
+}
+
 int event__synthesize_thread(pid_t pid, event__handler_t process,
 			     struct perf_session *session)
 {
@@ -222,7 +252,9 @@ int event__synthesize_kernel_mmap(event__handler_t process,
 			"[kernel.kallsyms.%s]", symbol_name) + 1;
 	size = ALIGN(size, sizeof(u64));
 	ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size));
-	ev.mmap.start = args.start;
+	ev.mmap.pgoff = args.start;
+	ev.mmap.start = session->vmlinux_maps[MAP__FUNCTION]->start;
+	ev.mmap.len   = session->vmlinux_maps[MAP__FUNCTION]->end - ev.mmap.start ;
 
 	return process(&ev, session);
 }
@@ -280,7 +312,6 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 {
 	struct thread *thread;
 	struct map *map;
-	static const char kmmap_prefix[] = "[kernel.kallsyms.";
 
 	dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
 		    self->mmap.pid, self->mmap.tid,
@@ -289,13 +320,61 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 		    (void *)(long)self->mmap.pgoff,
 		    self->mmap.filename);
 
-	if (self->mmap.pid == 0 &&
-	    memcmp(self->mmap.filename, kmmap_prefix,
-		   sizeof(kmmap_prefix) - 1) == 0) {
-		const char *symbol_name = (self->mmap.filename +
-					   sizeof(kmmap_prefix) - 1);
-		perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name,
-							 self->mmap.start);
+	if (self->mmap.pid == 0) {
+		static const char kmmap_prefix[] = "[kernel.kallsyms.";
+
+		if (self->mmap.filename[0] == '/') {
+			char short_module_name[1024];
+			char *name = strrchr(self->mmap.filename, '/'), *dot;
+
+			if (name == NULL)
+				goto out_problem;
+
+			++name; /* skip / */
+			dot = strrchr(name, '.');
+			if (dot == NULL)
+				goto out_problem;
+
+			snprintf(short_module_name, sizeof(short_module_name),
+				 "[%.*s]", (int)(dot - name), name);
+			strxfrchar(short_module_name, '-', '_');
+
+			map = perf_session__new_module_map(session,
+							   self->mmap.start,
+							   short_module_name);
+			if (map == NULL)
+				goto out_problem;
+
+			name = strdup(self->mmap.filename);
+			if (name == NULL)
+				goto out_problem;
+
+			dso__set_long_name(map->dso, name);
+			map->end = map->start + self->mmap.len;
+		} else if (memcmp(self->mmap.filename, kmmap_prefix,
+				sizeof(kmmap_prefix) - 1) == 0) {
+			const char *symbol_name = (self->mmap.filename +
+						   sizeof(kmmap_prefix) - 1);
+			/*
+			 * Should be there already, from the build-id table in
+			 * the header.
+			 */
+			struct dso *kernel = __dsos__findnew(&dsos__kernel,
+							     "[kernel.kallsyms]");
+			if (kernel == NULL)
+				goto out_problem;
+
+			if (__map_groups__create_kernel_maps(&session->kmaps,
+							     session->vmlinux_maps,
+							     kernel) < 0)
+				goto out_problem;
+
+			session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start;
+			session->vmlinux_maps[MAP__FUNCTION]->end   = self->mmap.start + self->mmap.len;
+
+			perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name,
+								 self->mmap.pgoff);
+		}
 		return 0;
 	}
 
@@ -304,10 +383,13 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 		       session->cwd, session->cwdlen);
 
 	if (thread == NULL || map == NULL)
-		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
-	else
-		thread__insert_map(thread, map);
+		goto out_problem;
 
+	thread__insert_map(thread, map);
+	return 0;
+
+out_problem:
+	dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 	return 0;
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 80356da8216c..50a7132887f5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -112,6 +112,8 @@ void event__synthesize_threads(event__handler_t process,
 int event__synthesize_kernel_mmap(event__handler_t process,
 				  struct perf_session *session,
 				  const char *symbol_name);
+int event__synthesize_modules(event__handler_t process,
+			      struct perf_session *session);
 
 int event__process_comm(event_t *self, struct perf_session *session);
 int event__process_lost(event_t *self, struct perf_session *session);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 378ac5422bcf..fd1c5a39a5bb 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -69,9 +69,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
 	self->unknown_events = 0;
 	map_groups__init(&self->kmaps);
 
-	if (perf_session__create_kernel_maps(self) < 0)
-		goto out_delete;
-
 	if (mode == O_RDONLY && perf_session__open(self, force) < 0)
 		goto out_delete;
 
@@ -268,8 +265,11 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size)
 			head = &dsos__kernel;
 
 		dso = __dsos__findnew(head, filename);
-		if (dso != NULL)
+		if (dso != NULL) {
 			dso__set_build_id(dso, &bev.build_id);
+			if (head == &dsos__kernel && filename[0] == '[')
+				dso->kernel = 1;
+		}
 
 		offset += bev.header.size;
 	}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 8e6627e6b778..381999dd5c1f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -161,7 +161,7 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
 		       self->start, self->end, self->name);
 }
 
-static void dso__set_long_name(struct dso *self, char *name)
+void dso__set_long_name(struct dso *self, char *name)
 {
 	if (name == NULL)
 		return;
@@ -176,7 +176,7 @@ static void dso__set_basename(struct dso *self)
 
 struct dso *dso__new(const char *name)
 {
-	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
+	struct dso *self = zalloc(sizeof(*self) + strlen(name) + 1);
 
 	if (self != NULL) {
 		int i;
@@ -500,13 +500,17 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 
 			*module++ = '\0';
 
-			if (strcmp(self->name, module)) {
+			if (strcmp(curr_map->dso->short_name, module)) {
 				curr_map = map_groups__find_by_name(&session->kmaps, map->type, module);
 				if (curr_map == NULL) {
 					pr_debug("/proc/{kallsyms,modules} "
-					         "inconsistency!\n");
+					         "inconsistency while looking "
+						 "for \"%s\" module!\n", module);
 					return -1;
 				}
+
+				if (curr_map->dso->loaded)
+					goto discard_symbol;
 			}
 			/*
 			 * So that we look just like we get from .ko files,
@@ -1343,13 +1347,33 @@ struct map *map_groups__find_by_name(struct map_groups *self,
 	for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
 		struct map *map = rb_entry(nd, struct map, rb_node);
 
-		if (map->dso && strcmp(map->dso->name, name) == 0)
+		if (map->dso && strcmp(map->dso->short_name, name) == 0)
 			return map;
 	}
 
 	return NULL;
 }
 
+static int dso__kernel_module_get_build_id(struct dso *self)
+{
+	char filename[PATH_MAX];
+	/*
+	 * kernel module short names are of the form "[module]" and
+	 * we need just "module" here.
+	 */
+	const char *name = self->short_name + 1;
+
+	snprintf(filename, sizeof(filename),
+		 "/sys/module/%.*s/notes/.note.gnu.build-id",
+		 (int)strlen(name - 1), name);
+
+	if (sysfs__read_build_id(filename, self->build_id,
+				 sizeof(self->build_id)) == 0)
+		self->has_build_id = true;
+
+	return 0;
+}
+
 static int perf_session__set_modules_path_dir(struct perf_session *self, char *dirname)
 {
 	struct dirent *dent;
@@ -1395,6 +1419,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d
 			if (long_name == NULL)
 				goto failure;
 			dso__set_long_name(map->dso, long_name);
+			dso__kernel_module_get_build_id(map->dso);
 		}
 	}
 
@@ -1437,6 +1462,24 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 	return self;
 }
 
+struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
+					 const char *filename)
+{
+	struct map *map;
+	struct dso *dso = __dsos__findnew(&dsos__kernel, filename);
+
+	if (dso == NULL)
+		return NULL;
+
+	map = map__new2(start, dso, MAP__FUNCTION);
+	if (map == NULL)
+		return NULL;
+
+	dso->origin = DSO__ORIG_KMODULE;
+	map_groups__insert(&self->kmaps, map);
+	return map;
+}
+
 static int perf_session__create_module_maps(struct perf_session *self)
 {
 	char *line = NULL;
@@ -1450,7 +1493,6 @@ static int perf_session__create_module_maps(struct perf_session *self)
 	while (!feof(file)) {
 		char name[PATH_MAX];
 		u64 start;
-		struct dso *dso;
 		char *sep;
 		int line_len;
 
@@ -1476,26 +1518,10 @@ static int perf_session__create_module_maps(struct perf_session *self)
 		*sep = '\0';
 
 		snprintf(name, sizeof(name), "[%s]", line);
-		dso = dso__new(name);
-
-		if (dso == NULL)
+		map = perf_session__new_module_map(self, start, name);
+		if (map == NULL)
 			goto out_delete_line;
-
-		map = map__new2(start, dso, MAP__FUNCTION);
-		if (map == NULL) {
-			dso__delete(dso);
-			goto out_delete_line;
-		}
-
-		snprintf(name, sizeof(name),
-			 "/sys/module/%s/notes/.note.gnu.build-id", line);
-		if (sysfs__read_build_id(name, dso->build_id,
-					 sizeof(dso->build_id)) == 0)
-			dso->has_build_id = true;
-
-		dso->origin = DSO__ORIG_KMODULE;
-		map_groups__insert(&self->kmaps, map);
-		dsos__add(&dsos__kernel, dso);
+		dso__kernel_module_get_build_id(map->dso);
 	}
 
 	free(line);
@@ -1573,10 +1599,28 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 		}
 	}
 
+	/*
+	 * Say the kernel DSO was created when processing the build-id header table,
+	 * we have a build-id, so check if it is the same as the running kernel,
+	 * using it if it is.
+	 */
+	if (self->has_build_id) {
+		u8 kallsyms_build_id[BUILD_ID_SIZE];
+
+		if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
+					 sizeof(kallsyms_build_id)) == 0)
+
+		is_kallsyms = dso__build_id_equal(self, kallsyms_build_id);
+		if (is_kallsyms)
+			goto do_kallsyms;
+		goto do_vmlinux;
+	}
+
 	is_kallsyms = self->long_name[0] == '[';
 	if (is_kallsyms)
 		goto do_kallsyms;
 
+do_vmlinux:
 	err = dso__load_vmlinux(self, map, session, self->long_name, filter);
 	if (err <= 0) {
 		pr_info("The file %s cannot be used, "
@@ -1694,16 +1738,12 @@ out_delete_kernel_dso:
 	return NULL;
 }
 
-static int map_groups__create_kernel_maps(struct map_groups *self,
-					  struct map *vmlinux_maps[MAP__NR_TYPES],
-					  const char *vmlinux)
+int __map_groups__create_kernel_maps(struct map_groups *self,
+				     struct map *vmlinux_maps[MAP__NR_TYPES],
+				     struct dso *kernel)
 {
-	struct dso *kernel = dsos__create_kernel(vmlinux);
 	enum map_type type;
 
-	if (kernel == NULL)
-		return -1;
-
 	for (type = 0; type < MAP__NR_TYPES; ++type) {
 		vmlinux_maps[type] = map__new2(0, kernel, type);
 		if (vmlinux_maps[type] == NULL)
@@ -1717,6 +1757,18 @@ static int map_groups__create_kernel_maps(struct map_groups *self,
 	return 0;
 }
 
+static int map_groups__create_kernel_maps(struct map_groups *self,
+					  struct map *vmlinux_maps[MAP__NR_TYPES],
+					  const char *vmlinux)
+{
+	struct dso *kernel = dsos__create_kernel(vmlinux);
+
+	if (kernel == NULL)
+		return -1;
+
+	return __map_groups__create_kernel_maps(self, vmlinux_maps, kernel);
+}
+
 static void vmlinux_path__exit(void)
 {
 	while (--vmlinux_path__nr_entries >= 0) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ee0b4593db7b..594156e43b10 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -134,6 +134,7 @@ size_t dsos__fprintf_buildid(FILE *fp);
 size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
 size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
 char dso__symtab_origin(const struct dso *self);
+void dso__set_long_name(struct dso *self, char *name);
 void dso__set_build_id(struct dso *self, void *build_id);
 struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
@@ -151,5 +152,7 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 
 int perf_session__create_kernel_maps(struct perf_session *self);
 
+struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
+					 const char *filename);
 extern struct dso *vdso;
 #endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index c206f72c8881..c06c13535a70 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -67,4 +67,8 @@ map_groups__find_function(struct map_groups *self, struct perf_session *session,
 
 struct map *map_groups__find_by_name(struct map_groups *self,
 				     enum map_type type, const char *name);
+
+int __map_groups__create_kernel_maps(struct map_groups *self,
+				     struct map *vmlinux_maps[MAP__NR_TYPES],
+				     struct dso *kernel);
 #endif	/* __PERF_THREAD_H */

From 0895cf0a823e03ea6d79736611e90186006c805e Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Date: Wed, 13 Jan 2010 13:22:18 -0200
Subject: [PATCH 058/640] perf: Fix few typos + cosmetics

Signed-off-by: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1263396139-4798-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf.txt | 2 +-
 tools/perf/design.txt             | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 69c832557199..0eeb247dc7d2 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-Performance counters for Linux are are a new kernel-based subsystem
+Performance counters for Linux are a new kernel-based subsystem
 that provide a framework for all things performance analysis. It
 covers hardware level (CPU/PMU, Performance Monitoring Unit) features
 and software features (software counters, tracepoints) as well.
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 8d0de5130db3..bd0bb1b1279b 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -101,10 +101,10 @@ enum hw_event_ids {
 	 */
 	PERF_COUNT_HW_CPU_CYCLES		= 0,
 	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES	= 2,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
 	PERF_COUNT_HW_CACHE_MISSES		= 3,
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES	= 5,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
 };
 
@@ -131,8 +131,8 @@ software events, selected by 'event_id':
  */
 enum sw_event_ids {
 	PERF_COUNT_SW_CPU_CLOCK		= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_TASK_CLOCK	= 1,
+	PERF_COUNT_SW_PAGE_FAULTS	= 2,
 	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
 	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
 	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,

From 66aeb6d5cb701aedd508187e08612bfd1e108e2e Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Date: Wed, 13 Jan 2010 13:22:19 -0200
Subject: [PATCH 059/640] perf top: Fix code typo in prompt_symbol()

sym_filter is what was (if ever) passed with -s option. What was
typed by user, and what we were looking for, is in buf.

Signed-off-by: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1263396139-4798-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6822b44ca4f9..7a8a77ec2c9d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -667,7 +667,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
 	}
 
 	if (!found) {
-		fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
+		fprintf(stderr, "Sorry, %s is not active.\n", buf);
 		sleep(1);
 		return;
 	} else

From 0d755034dbd01e240eadf2d31f4f75d3088ccd21 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 12:23:09 -0200
Subject: [PATCH 060/640] perf tools: Don't cast RIP to pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since they can come from another architecture with bigger
pointers, i.e. processing a 64-bit perf.data on a 32-bit arch.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263478990-8200-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-annotate.c |  4 ++--
 tools/perf/builtin-diff.c     |  4 ++--
 tools/perf/builtin-kmem.c     |  7 ++-----
 tools/perf/builtin-report.c   |  7 ++-----
 tools/perf/builtin-sched.c    |  7 ++-----
 tools/perf/builtin-trace.c    |  7 ++-----
 tools/perf/util/event.c       |  9 +++------
 tools/perf/util/session.c     | 16 ++++++----------
 8 files changed, 21 insertions(+), 40 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 117bbae844bf..73c202ee0882 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -132,8 +132,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 {
 	struct addr_location al;
 
-	dump_printf("(IP, %d): %d: %p\n", event->header.misc,
-		    event->ip.pid, (void *)(long)event->ip.ip);
+	dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
+		    event->ip.pid, event->ip.ip);
 
 	if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 924bfb77a6ab..18b3f505f9db 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -42,8 +42,8 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
 	struct addr_location al;
 	struct sample_data data = { .period = 1, };
 
-	dump_printf("(IP, %d): %d: %p\n", event->header.misc,
-		    event->ip.pid, (void *)(long)event->ip.ip);
+	dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
+		    event->ip.pid, event->ip.ip);
 
 	if (event__preprocess_sample(event, session, &al, NULL) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4af7199c5af7..7323d9dfbce8 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -316,11 +316,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
 	event__parse_sample(event, session->sample_type, &data);
 
-	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
-		event->header.misc,
-		data.pid, data.tid,
-		(void *)(long)data.ip,
-		(long long)data.period);
+	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+		    data.pid, data.tid, data.ip, data.period);
 
 	thread = perf_session__findnew(session, event->ip.pid);
 	if (thread == NULL) {
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 80d691a4191f..4c3d6997995b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -93,11 +93,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
 	event__parse_sample(event, session->sample_type, &data);
 
-	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
-		event->header.misc,
-		data.pid, data.tid,
-		(void *)(long)data.ip,
-		(long long)data.period);
+	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+		    data.pid, data.tid, data.ip, data.period);
 
 	if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
 		unsigned int i;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 702322f8fec1..4f5a03e43444 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1621,11 +1621,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
 	event__parse_sample(event, session->sample_type, &data);
 
-	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
-		event->header.misc,
-		data.pid, data.tid,
-		(void *)(long)data.ip,
-		(long long)data.period);
+	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+		    data.pid, data.tid, data.ip, data.period);
 
 	thread = perf_session__findnew(session, data.pid);
 	if (thread == NULL) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1831434aa938..8e9cbfe608d6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -75,11 +75,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
 	event__parse_sample(event, session->sample_type, &data);
 
-	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
-		event->header.misc,
-		data.pid, data.tid,
-		(void *)(long)data.ip,
-		(long long)data.period);
+	dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+		    data.pid, data.tid, data.ip, data.period);
 
 	thread = perf_session__findnew(session, event->ip.pid);
 	if (thread == NULL) {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 4f3e7ef33b83..24ec5be4a1c0 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -313,12 +313,9 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 	struct thread *thread;
 	struct map *map;
 
-	dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
-		    self->mmap.pid, self->mmap.tid,
-		    (void *)(long)self->mmap.start,
-		    (void *)(long)self->mmap.len,
-		    (void *)(long)self->mmap.pgoff,
-		    self->mmap.filename);
+	dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n",
+		    self->mmap.pid, self->mmap.tid, self->mmap.start,
+		    self->mmap.len, self->mmap.pgoff, self->mmap.filename);
 
 	if (self->mmap.pid == 0) {
 		static const char kmmap_prefix[] = "[kernel.kallsyms.";
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index fd1c5a39a5bb..e3ccdb46d6c4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -209,9 +209,8 @@ static int perf_session__process_event(struct perf_session *self,
 	trace_event(event);
 
 	if (event->header.type < PERF_RECORD_MAX) {
-		dump_printf("%p [%p]: PERF_RECORD_%s",
-			    (void *)(offset + head),
-			    (void *)(long)(event->header.size),
+		dump_printf("%#lx [%#x]: PERF_RECORD_%s",
+			    offset + head, event->header.size,
 			    event__name[event->header.type]);
 		++event__total[0];
 		++event__total[event->header.type];
@@ -362,16 +361,13 @@ more:
 
 	size = event->header.size;
 
-	dump_printf("\n%p [%p]: event: %d\n",
-			(void *)(offset + head),
-			(void *)(long)event->header.size,
-			event->header.type);
+	dump_printf("\n%#lx [%#x]: event: %d\n",
+		    offset + head, event->header.size, event->header.type);
 
 	if (size == 0 ||
 	    perf_session__process_event(self, event, ops, offset, head) < 0) {
-		dump_printf("%p [%p]: skipping unknown header type: %d\n",
-			    (void *)(offset + head),
-			    (void *)(long)(event->header.size),
+		dump_printf("%#lx [%#x]: skipping unknown header type: %d\n",
+			    offset + head, event->header.size,
 			    event->header.type);
 		/*
 		 * assume we lost track of the stream, check alignment, and

From ba21594cddee0a3af582971656702b1c4509d8f5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 12:23:10 -0200
Subject: [PATCH 061/640] perf tools: Cross platform perf.data analysis support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are still some problems related to loading vmlinux files,
but those are unrelated to the feature implemented in this
patch, so will get fixed in the next patches, but here are some
results:

1. collect perf.data file on a Fedora 12 machine, x86_64, 64-bit
userland

2. transfer it to a Debian Testing machine, PARISC64, 32-bit
userland

  acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | head -5
  74f9930ee94475b6b3238caf3725a50d59cb994b [kernel.kallsyms]
  55fdd56670453ea66c011158c4b9d30179c1d049 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/ipt_MASQUERADE.ko
  41adff63c730890480980d5d8ba513f1c216a858 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/iptable_nat.ko
  90a33def1077bb8e97b8a78546dc96c2de62df46 /lib/modules/2.6.33-rc4-tip+/kernel/net/ipv4/netfilter/nf_nat.ko
  984c7bea90ce1376d5c8e7ef43a781801286e62d /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/tun.ko

  acme@parisc:~/git/linux-2.6-tip$ perf buildid-list | tail -5
  22492f3753c6a67de5c7ccbd6b863390c92c0723 /usr/lib64/libXt.so.6.0.0
  353802bb7e1b895ba43507cc678f951e778e4c6f /usr/lib64/libMagickCore.so.2.0.0
  d10c2897558595efe7be8b0584cf7e6398bc776c /usr/lib64/libfprint.so.0.0.0
  a83ecfb519a788774a84d5ddde633c9ba56c03ab /home/acme/bin/perf
  d3ca765a8ecf257d263801d7ad8c49c189082317 /usr/lib64/libdwarf.so.0.0
  acme@parisc:~/git/linux-2.6-tip$

  acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm
  The file [kernel.kallsyms] cannot be used, trying to use /proc/kallsyms...

  ^^^^ The problem related to vmlinux handling, it shouldn't be trying this
  ^^^^ rather alien /proc/kallsyms at all...

  /lib64/libpthread-2.10.2.so with build id 5c68f7afeb33309c78037e374b0deee84dd441f6 not found, continuing without symbols
  /lib64/libc-2.10.2.so with build id eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 not found, continuing without symbols
  /home/acme/bin/perf with build id a83ecfb519a788774a84d5ddde633c9ba56c03ab not found, continuing without symbols
  /usr/sbin/openvpn with build id f2037a091ef36b591187a858d75e203690ea9409 not found, continuing without symbols
  Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko, continuing without symbols
  Failed to open /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwlcore.ko, continuing without symbols

  <SNIP more complaints about not finding the right build-ids,
        those will have to wait for 'perf archive' or plain
        copying what was collected by 'perf record' on the x86_64,
        source machine, see further below for an example of this >

  # Samples: 293085637
  #
  # Overhead          Command
  # ........  ...............
  #
      61.70%             find
      23.50%             perf
       5.86%          swapper
       3.12%             sshd
       2.39%             init
       0.87%             bash
       0.86%            sleep
       0.59%      dbus-daemon
       0.25%             hald
       0.24%   NetworkManager
       0.19%  hald-addon-rfki
       0.15%          openvpn
       0.07%             phy0
       0.07%         events/0
       0.05%          iwl3945
       0.05%         events/1
       0.03%      kondemand/0
  acme@parisc:~/git/linux-2.6-tip$

Which matches what we get when running the same command for the
same perf.data file on the F12, x86_64, source machine:

  [root@doppio linux-2.6-tip]# perf report --sort comm
  # Samples: 293085637
  #
  # Overhead          Command
  # ........  ...............
  #
      61.70%             find
      23.50%             perf
       5.86%          swapper
       3.12%             sshd
       2.39%             init
       0.87%             bash
       0.86%            sleep
       0.59%      dbus-daemon
       0.25%             hald
       0.24%   NetworkManager
       0.19%  hald-addon-rfki
       0.15%          openvpn
       0.07%             phy0
       0.07%         events/0
       0.05%          iwl3945
       0.05%         events/1
       0.03%      kondemand/0
  [root@doppio linux-2.6-tip]#

The other modes work as well, modulo the problem with vmlinux:

  acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso 2> /dev/null | head -15
  # Samples: 293085637
  #
  # Overhead          Command                      Shared Object
  # ........  ...............  .................................
  #
      35.11%             find                   ffffffff81002b5a
      18.25%             perf                   ffffffff8102235f
      16.17%             find  libc-2.10.2.so
       9.07%             find  find
       5.80%          swapper                   ffffffff8102235f
       3.95%             perf  libc-2.10.2.so
       2.33%             init                   ffffffff810091b9
       1.65%             sshd  libcrypto.so.0.9.8k
       1.35%             find  [e1000e]
       0.68%            sleep  libc-2.10.2.so
  acme@parisc:~/git/linux-2.6-tip$

And the lack of the right buildids:

  acme@parisc:~/git/linux-2.6-tip$ perf report --sort comm,dso,symbol 2> /dev/null | head -15
  # Samples: 293085637
  #
  # Overhead          Command                      Shared Object  Symbol
  # ........  ...............  .................................  ......
  #
      35.11%             find                   ffffffff81002b5a  [k] 0xffffffff81002b5a
      18.25%             perf                   ffffffff8102235f  [k] 0xffffffff8102235f
      16.17%             find  libc-2.10.2.so                     [.] 0x00000000045782
       9.07%             find  find                               [.] 0x0000000000fb0e
       5.80%          swapper                   ffffffff8102235f  [k] 0xffffffff8102235f
       3.95%             perf  libc-2.10.2.so                     [.] 0x0000000007f398
       2.33%             init                   ffffffff810091b9  [k] 0xffffffff810091b9
       1.65%             sshd  libcrypto.so.0.9.8k                [.] 0x00000000105440
       1.35%             find  [e1000e]                           [k] 0x00000000010948
       0.68%            sleep  libc-2.10.2.so                     [.] 0x0000000011ad5b
  acme@parisc:~/git/linux-2.6-tip$

But if we:

  acme@parisc:~/git/linux-2.6-tip$ ls ~/.debug
  ls: cannot access /home/acme/.debug: No such file or directory
  acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/lib64/libc-2.10.2.so/
  acme@parisc:~/git/linux-2.6-tip$ scp doppio:.debug/lib64/libc-2.10.2.so/* ~/.debug/lib64/libc-2.10.2.so/
  acme@doppio's password:
  eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1	             100% 1783KB 714.7KB/s   00:02
  acme@parisc:~/git/linux-2.6-tip$ mkdir -p ~/.debug/.build-id/eb
  acme@parisc:~/git/linux-2.6-tip$ ln -s ../../lib64/libc-2.10.2.so/eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 ~/.debug/.build-id/eb/4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1
  acme@parisc:~/git/linux-2.6-tip$ perf report --dsos libc-2.10.2.so 2> /dev/null
  # dso: libc-2.10.2.so
  # Samples: 64281170
  #
  # Overhead          Command  Symbol
  # ........  ...............  ......
  #
      14.98%             perf  [.] __GI_strcmp
      12.30%             find  [.] __GI_memmove
       9.25%             find  [.] _int_malloc
       7.60%             find  [.] _IO_vfprintf_internal
       6.10%             find  [.] _IO_new_file_xsputn
       6.02%             find  [.] __GI_close
       3.08%             find  [.] _IO_file_overflow_internal
       3.08%             find  [.] malloc_consolidate
       3.08%             find  [.] _int_free
       3.08%             find  [.] __strchrnul
       3.08%             find  [.] __getdents64
       3.08%             find  [.] __write_nocancel
       3.08%            sleep  [.] __GI__dl_addr
       3.08%             sshd  [.] __libc_select
       3.08%             find  [.] _IO_new_file_write
       3.07%             find  [.] _IO_new_do_write
       3.06%             find  [.] __GI___errno_location
       3.05%             find  [.] __GI___libc_malloc
       3.04%             perf  [.] __GI_memcpy
       1.71%             find  [.] __fprintf_chk
       1.29%             bash  [.] __gconv_transform_utf8_internal
       0.79%      dbus-daemon  [.] __GI_strlen
  #
  # (For a higher level overview, try: perf report --sort comm,dso)
  #
  acme@parisc:~/git/linux-2.6-tip$

Which matches what we get on the source, F12, x86_64 machine:

  [root@doppio linux-2.6-tip]# perf report --dsos libc-2.10.2.so
  # dso: libc-2.10.2.so
  # Samples: 64281170
  #
  # Overhead          Command  Symbol
  # ........  ...............  ......
  #
      14.98%             perf  [.] __GI_strcmp
      12.30%             find  [.] __GI_memmove
       9.25%             find  [.] _int_malloc
       7.60%             find  [.] _IO_vfprintf_internal
       6.10%             find  [.] _IO_new_file_xsputn
       6.02%             find  [.] __GI_close
       3.08%             find  [.] _IO_file_overflow_internal
       3.08%             find  [.] malloc_consolidate
       3.08%             find  [.] _int_free
       3.08%             find  [.] __strchrnul
       3.08%             find  [.] __getdents64
       3.08%             find  [.] __write_nocancel
       3.08%            sleep  [.] __GI__dl_addr
       3.08%             sshd  [.] __libc_select
       3.08%             find  [.] _IO_new_file_write
       3.07%             find  [.] _IO_new_do_write
       3.06%             find  [.] __GI___errno_location
       3.05%             find  [.] __GI___libc_malloc
       3.04%             perf  [.] __GI_memcpy
       1.71%             find  [.] __fprintf_chk
       1.29%             bash  [.] __gconv_transform_utf8_internal
       0.79%      dbus-daemon  [.] __GI_strlen
  #
  # (For a higher level overview, try: perf report --sort comm,dso)
  #
  [root@doppio linux-2.6-tip]#

So I think this is really, really nice in that it demonstrates
the portability of perf.data files and the use of build-ids
accross such aliens worlds :-)

There are some things to fix tho, like the bitmap on the header,
but things are looking good.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263478990-8200-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/header.c  |  63 +++++++++++++++++-----
 tools/perf/util/header.h  |   2 +
 tools/perf/util/session.c | 108 ++++++++++++++++++++++++++++++++++----
 tools/perf/util/session.h |   7 ++-
 4 files changed, 157 insertions(+), 23 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ec96321eb9e4..b31e0ae4b8db 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1,8 +1,10 @@
 #include <sys/types.h>
+#include <byteswap.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <linux/list.h>
+#include <linux/kernel.h>
 
 #include "util.h"
 #include "header.h"
@@ -464,8 +466,21 @@ static int do_read(int fd, void *buf, size_t size)
 	return 0;
 }
 
+static int perf_header__getbuffer64(struct perf_header *self,
+				    int fd, void *buf, size_t size)
+{
+	if (do_read(fd, buf, size))
+		return -1;
+
+	if (self->needs_swap)
+		mem_bswap_64(buf, size);
+
+	return 0;
+}
+
 int perf_header__process_sections(struct perf_header *self, int fd,
 				  int (*process)(struct perf_file_section *self,
+						 struct perf_header *ph,
 						 int feat, int fd))
 {
 	struct perf_file_section *feat_sec;
@@ -486,7 +501,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 
 	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
 
-	if (do_read(fd, feat_sec, sec_size))
+	if (perf_header__getbuffer64(self, fd, feat_sec, sec_size))
 		goto out_free;
 
 	err = 0;
@@ -494,7 +509,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 		if (perf_header__has_feat(self, feat)) {
 			struct perf_file_section *sec = &feat_sec[idx++];
 
-			err = process(sec, feat, fd);
+			err = process(sec, self, feat, fd);
 			if (err < 0)
 				break;
 		}
@@ -511,10 +526,20 @@ int perf_file_header__read(struct perf_file_header *self,
 	lseek(fd, 0, SEEK_SET);
 
 	if (do_read(fd, self, sizeof(*self)) ||
-	    self->magic     != PERF_MAGIC ||
-	    self->attr_size != sizeof(struct perf_file_attr))
+	    memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
 		return -1;
 
+	if (self->attr_size != sizeof(struct perf_file_attr)) {
+		u64 attr_size = bswap_64(self->attr_size);
+
+		if (attr_size != sizeof(struct perf_file_attr))
+			return -1;
+
+		mem_bswap_64(self, offsetof(struct perf_file_header,
+					    adds_features));
+		ph->needs_swap = true;
+	}
+
 	if (self->size != sizeof(*self)) {
 		/* Support the previous format */
 		if (self->size == offsetof(typeof(*self), adds_features))
@@ -524,16 +549,28 @@ int perf_file_header__read(struct perf_file_header *self,
 	}
 
 	memcpy(&ph->adds_features, &self->adds_features,
-	       sizeof(self->adds_features));
+	       sizeof(ph->adds_features));
+	/*
+	 * FIXME: hack that assumes that if we need swap the perf.data file
+	 * may be coming from an arch with a different word-size, ergo different
+	 * DEFINE_BITMAP format, investigate more later, but for now its mostly
+	 * safe to assume that we have a build-id section. Trace files probably
+	 * have several other issues in this realm anyway...
+	 */
+	if (ph->needs_swap) {
+		memset(&ph->adds_features, 0, sizeof(ph->adds_features));
+		perf_header__set_feat(ph, HEADER_BUILD_ID);
+	}
 
 	ph->event_offset = self->event_types.offset;
-	ph->event_size	 = self->event_types.size;
-	ph->data_offset	 = self->data.offset;
+	ph->event_size   = self->event_types.size;
+	ph->data_offset  = self->data.offset;
 	ph->data_size	 = self->data.size;
 	return 0;
 }
 
 static int perf_file_section__process(struct perf_file_section *self,
+				      struct perf_header *ph,
 				      int feat, int fd)
 {
 	if (lseek(fd, self->offset, SEEK_SET) < 0) {
@@ -548,7 +585,7 @@ static int perf_file_section__process(struct perf_file_section *self,
 		break;
 
 	case HEADER_BUILD_ID:
-		if (perf_header__read_build_ids(fd, self->offset, self->size))
+		if (perf_header__read_build_ids(ph, fd, self->offset, self->size))
 			pr_debug("Failed to read buildids, continuing...\n");
 		break;
 	default:
@@ -560,7 +597,7 @@ static int perf_file_section__process(struct perf_file_section *self,
 
 int perf_header__read(struct perf_header *self, int fd)
 {
-	struct perf_file_header f_header;
+	struct perf_file_header	f_header;
 	struct perf_file_attr	f_attr;
 	u64			f_id;
 	int nr_attrs, nr_ids, i, j;
@@ -577,8 +614,9 @@ int perf_header__read(struct perf_header *self, int fd)
 		struct perf_header_attr *attr;
 		off_t tmp;
 
-		if (do_read(fd, &f_attr, sizeof(f_attr)))
+		if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr)))
 			goto out_errno;
+
 		tmp = lseek(fd, 0, SEEK_CUR);
 
 		attr = perf_header_attr__new(&f_attr.attr);
@@ -589,7 +627,7 @@ int perf_header__read(struct perf_header *self, int fd)
 		lseek(fd, f_attr.ids.offset, SEEK_SET);
 
 		for (j = 0; j < nr_ids; j++) {
-			if (do_read(fd, &f_id, sizeof(f_id)))
+			if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id)))
 				goto out_errno;
 
 			if (perf_header_attr__add_id(attr, f_id) < 0) {
@@ -610,7 +648,8 @@ int perf_header__read(struct perf_header *self, int fd)
 		events = malloc(f_header.event_types.size);
 		if (events == NULL)
 			return -ENOMEM;
-		if (do_read(fd, events, f_header.event_types.size))
+		if (perf_header__getbuffer64(self, fd, events,
+					     f_header.event_types.size))
 			goto out_errno;
 		event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);
 	}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 2b69aab67e35..ccc8540feccd 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -52,6 +52,7 @@ struct perf_header {
 	u64			data_size;
 	u64			event_offset;
 	u64			event_size;
+	bool			needs_swap;
 	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
 };
 
@@ -80,6 +81,7 @@ bool perf_header__has_feat(const struct perf_header *self, int feat);
 
 int perf_header__process_sections(struct perf_header *self, int fd,
 				  int (*process)(struct perf_file_section *self,
+						 struct perf_header *ph,
 						 int feat, int fd));
 
 #endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e3ccdb46d6c4..604e14f6a6f9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 
+#include <byteswap.h>
 #include <unistd.h>
 #include <sys/types.h>
 
@@ -201,21 +202,88 @@ void event__print_totals(void)
 			event__name[i], event__total[i]);
 }
 
+void mem_bswap_64(void *src, int byte_size)
+{
+	u64 *m = src;
+
+	while (byte_size > 0) {
+		*m = bswap_64(*m);
+		byte_size -= sizeof(u64);
+		++m;
+	}
+}
+
+static void event__all64_swap(event_t *self)
+{
+	struct perf_event_header *hdr = &self->header;
+	mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
+}
+
+static void event__comm_swap(event_t *self)
+{
+	self->comm.pid = bswap_32(self->comm.pid);
+	self->comm.tid = bswap_32(self->comm.tid);
+}
+
+static void event__mmap_swap(event_t *self)
+{
+	self->mmap.pid	 = bswap_32(self->mmap.pid);
+	self->mmap.tid	 = bswap_32(self->mmap.tid);
+	self->mmap.start = bswap_64(self->mmap.start);
+	self->mmap.len	 = bswap_64(self->mmap.len);
+	self->mmap.pgoff = bswap_64(self->mmap.pgoff);
+}
+
+static void event__task_swap(event_t *self)
+{
+	self->fork.pid	= bswap_32(self->fork.pid);
+	self->fork.tid	= bswap_32(self->fork.tid);
+	self->fork.ppid	= bswap_32(self->fork.ppid);
+	self->fork.ptid	= bswap_32(self->fork.ptid);
+	self->fork.time	= bswap_64(self->fork.time);
+}
+
+static void event__read_swap(event_t *self)
+{
+	self->read.pid		= bswap_32(self->read.pid);
+	self->read.tid		= bswap_32(self->read.tid);
+	self->read.value	= bswap_64(self->read.value);
+	self->read.time_enabled	= bswap_64(self->read.time_enabled);
+	self->read.time_running	= bswap_64(self->read.time_running);
+	self->read.id		= bswap_64(self->read.id);
+}
+
+typedef void (*event__swap_op)(event_t *self);
+
+static event__swap_op event__swap_ops[] = {
+	[PERF_RECORD_MMAP]   = event__mmap_swap,
+	[PERF_RECORD_COMM]   = event__comm_swap,
+	[PERF_RECORD_FORK]   = event__task_swap,
+	[PERF_RECORD_EXIT]   = event__task_swap,
+	[PERF_RECORD_LOST]   = event__all64_swap,
+	[PERF_RECORD_READ]   = event__read_swap,
+	[PERF_RECORD_SAMPLE] = event__all64_swap,
+	[PERF_RECORD_MAX]    = NULL,
+};
+
 static int perf_session__process_event(struct perf_session *self,
 				       event_t *event,
 				       struct perf_event_ops *ops,
-				       unsigned long offset, unsigned long head)
+				       u64 offset, u64 head)
 {
 	trace_event(event);
 
 	if (event->header.type < PERF_RECORD_MAX) {
-		dump_printf("%#lx [%#x]: PERF_RECORD_%s",
+		dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
 			    offset + head, event->header.size,
 			    event__name[event->header.type]);
 		++event__total[0];
 		++event__total[event->header.type];
 	}
 
+	if (self->header.needs_swap && event__swap_ops[event->header.type])
+		event__swap_ops[event->header.type](event);
+
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
 		return ops->sample(event, self);
@@ -241,7 +309,15 @@ static int perf_session__process_event(struct perf_session *self,
 	}
 }
 
-int perf_header__read_build_ids(int input, u64 offset, u64 size)
+void perf_event_header__bswap(struct perf_event_header *self)
+{
+	self->type = bswap_32(self->type);
+	self->misc = bswap_16(self->misc);
+	self->size = bswap_16(self->size);
+}
+
+int perf_header__read_build_ids(struct perf_header *self,
+				int input, u64 offset, u64 size)
 {
 	struct build_id_event bev;
 	char filename[PATH_MAX];
@@ -256,6 +332,9 @@ int perf_header__read_build_ids(int input, u64 offset, u64 size)
 		if (read(input, &bev, sizeof(bev)) != sizeof(bev))
 			goto out;
 
+		if (self->needs_swap)
+			perf_event_header__bswap(&bev.header);
+
 		len = bev.header.size - sizeof(bev);
 		if (read(input, filename, len) != len)
 			goto out;
@@ -292,9 +371,9 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
 int perf_session__process_events(struct perf_session *self,
 				 struct perf_event_ops *ops)
 {
-	int err;
-	unsigned long head, shift;
-	unsigned long offset = 0;
+	int err, mmap_prot, mmap_flags;
+	u64 head, shift;
+	u64 offset = 0;
 	size_t	page_size;
 	event_t *event;
 	uint32_t size;
@@ -330,9 +409,16 @@ out_getcwd_err:
 	offset += shift;
 	head -= shift;
 
+	mmap_prot  = PROT_READ;
+	mmap_flags = MAP_SHARED;
+
+	if (self->header.needs_swap) {
+		mmap_prot  |= PROT_WRITE;
+		mmap_flags = MAP_PRIVATE;
+	}
 remap:
-	buf = mmap(NULL, page_size * self->mmap_window, PROT_READ,
-		   MAP_SHARED, self->fd, offset);
+	buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
+		   mmap_flags, self->fd, offset);
 	if (buf == MAP_FAILED) {
 		pr_err("failed to mmap file\n");
 		err = -errno;
@@ -342,6 +428,8 @@ remap:
 more:
 	event = (event_t *)(buf + head);
 
+	if (self->header.needs_swap)
+		perf_event_header__bswap(&event->header);
 	size = event->header.size;
 	if (size == 0)
 		size = 8;
@@ -361,12 +449,12 @@ more:
 
 	size = event->header.size;
 
-	dump_printf("\n%#lx [%#x]: event: %d\n",
+	dump_printf("\n%#Lx [%#x]: event: %d\n",
 		    offset + head, event->header.size, event->header.type);
 
 	if (size == 0 ||
 	    perf_session__process_event(self, event, ops, offset, head) < 0) {
-		dump_printf("%#lx [%#x]: skipping unknown header type: %d\n",
+		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
 			    offset + head, event->header.size,
 			    event->header.type);
 		/*
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d4a9d20f8d44..36d1a80c0b6c 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -51,6 +51,8 @@ struct perf_event_ops {
 struct perf_session *perf_session__new(const char *filename, int mode, bool force);
 void perf_session__delete(struct perf_session *self);
 
+void perf_event_header__bswap(struct perf_event_header *self);
+
 int perf_session__process_events(struct perf_session *self,
 				 struct perf_event_ops *event_ops);
 
@@ -61,7 +63,8 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
 
 bool perf_session__has_traces(struct perf_session *self, const char *msg);
 
-int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
+int perf_header__read_build_ids(struct perf_header *self, int input,
+				u64 offset, u64 file_size);
 
 int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
 					     const char *symbol_name,
@@ -69,4 +72,6 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
 void perf_session__reloc_vmlinux_maps(struct perf_session *self,
 				      u64 unrelocated_addr);
 
+void mem_bswap_64(void *src, int byte_size);
+
 #endif /* __PERF_SESSION_H */

From 1b75962e92d48a41019d4b440e221638aa2a7238 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 18:30:04 -0200
Subject: [PATCH 062/640] perf tools: Convert getpagesize() uses to
 sysconf(_SC_GETPAGESIZE)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using the more portable and equivalent sysconf call.

Reported-by: Aristeu Rozanski <aris@redhat.com>
Reported-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ulrich Drepper <drepper@redhat.com>
LKML-Reference: <1263501006-14185-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/session.c          | 2 +-
 tools/perf/util/trace-event-info.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 604e14f6a6f9..1951e330377c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -384,7 +384,7 @@ int perf_session__process_events(struct perf_session *self,
 
 	perf_event_ops__fill_defaults(ops);
 
-	page_size = getpagesize();
+	page_size = sysconf(_SC_PAGESIZE);
 
 	head = self->header.data_offset;
 
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 407fd65b6cdb..5ea8973ad331 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -515,7 +515,7 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
 	write_or_die(buf, 1);
 
 	/* save page_size */
-	page_size = getpagesize();
+	page_size = sysconf(_SC_PAGESIZE);
 	write_or_die(&page_size, 4);
 
 	read_header_files();

From 8d0591f6ad9edf66697ce29de176fb6f3213b9e3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 18:30:05 -0200
Subject: [PATCH 063/640] perf symbols: Don't try to load kallsyms if doesn't
 match the record build-id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now a perf.data file collected on a x86_64 fedora 12 machine
gets properly parsed on a Debian testing PARISC64 machine with
32-bit userland:

  acme@parisc:~/git/linux-2.6-tip$ perf report 2> /dev/null | head -15
  # Samples: 293085637
  #
  # Overhead          Command                      Shared Object  Symbol
  # ........  ...............  .................................  ......
  #
      35.11%             find  [kernel.kallsyms]                  [k] 0xffffffff81002b5a
      18.25%             perf  [kernel.kallsyms]                  [k] 0xffffffff8102235f
       9.07%             find  find                               [.] 0x0000000000fb0e
       5.80%          swapper  [kernel.kallsyms]                  [k] 0xffffffff8102235f
       3.29%             perf  libc-2.10.2.so                     [.] __GI_strcmp
       2.70%             find  libc-2.10.2.so                     [.] __GI_memmove
       2.33%             init  [kernel.kallsyms]                  [k] 0xffffffff810091b9
       2.03%             find  libc-2.10.2.so                     [.] _int_malloc
       1.67%             find  libc-2.10.2.so                     [.] _IO_vfprintf_internal
       1.65%             sshd  libcrypto.so.0.9.8k                [.] 0x00000000105440
  acme@parisc:~/git/linux-2.6-tip$

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263501006-14185-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 381999dd5c1f..71d23e1e30e8 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1608,11 +1608,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 		u8 kallsyms_build_id[BUILD_ID_SIZE];
 
 		if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
-					 sizeof(kallsyms_build_id)) == 0)
-
-		is_kallsyms = dso__build_id_equal(self, kallsyms_build_id);
-		if (is_kallsyms)
-			goto do_kallsyms;
+					 sizeof(kallsyms_build_id)) == 0) {
+			is_kallsyms = dso__build_id_equal(self, kallsyms_build_id);
+			if (is_kallsyms)
+				goto do_kallsyms;
+		}
 		goto do_vmlinux;
 	}
 
@@ -1623,6 +1623,9 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 do_vmlinux:
 	err = dso__load_vmlinux(self, map, session, self->long_name, filter);
 	if (err <= 0) {
+		if (self->has_build_id)
+			return -1;
+
 		pr_info("The file %s cannot be used, "
 			"trying to use /proc/kallsyms...", self->long_name);
 do_kallsyms:

From 9e201442de7c954f03710ac76f28c1927d07550c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 18:30:06 -0200
Subject: [PATCH 064/640] perf symbols: Cache /proc/kallsyms files by build-id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So that when we don't have a vmlinux handy we can store the
kallsyms for later use by 'perf report'.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263501006-14185-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/event.c  |  2 +-
 tools/perf/util/header.c | 15 +++++++++----
 tools/perf/util/symbol.c | 48 +++++++++++++++++++++++++++++-----------
 tools/perf/util/symbol.h |  5 +++--
 tools/perf/util/util.c   | 30 +++++++++++++++++++++++++
 5 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 24ec5be4a1c0..0e9820ac4f5e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -245,7 +245,7 @@ int event__synthesize_kernel_mmap(event__handler_t process,
 	 */
 	struct process_symbol_args args = { .name = symbol_name, };
 
-	if (kallsyms__parse(&args, find_symbol_cb) <= 0)
+	if (kallsyms__parse("/proc/kallsyms", &args, find_symbol_cb) <= 0)
 		return -ENOENT;
 
 	size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b31e0ae4b8db..1b65fed0dd2d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -237,11 +237,13 @@ static int dso__cache_build_id(struct dso *self, const char *debugdir)
 	char *filename = malloc(size),
 	     *linkname = malloc(size), *targetname, *sbuild_id;
 	int len, err = -1;
+	bool is_kallsyms = self->kernel && self->long_name[0] != '/';
 
 	if (filename == NULL || linkname == NULL)
 		goto out_free;
 
-	len = snprintf(filename, size, "%s%s", debugdir, self->long_name);
+	len = snprintf(filename, size, "%s%s%s",
+		       debugdir, is_kallsyms ? "/" : "", self->long_name);
 	if (mkdir_p(filename, 0755))
 		goto out_free;
 
@@ -249,9 +251,14 @@ static int dso__cache_build_id(struct dso *self, const char *debugdir)
 	sbuild_id = filename + len;
 	build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id);
 
-	if (access(filename, F_OK) && link(self->long_name, filename) &&
-	    copyfile(self->long_name, filename))
-		goto out_free;
+	if (access(filename, F_OK)) {
+		if (is_kallsyms) {
+			 if (copyfile("/proc/kallsyms", filename))
+				goto out_free;
+		} else if (link(self->long_name, filename) &&
+			   copyfile(self->long_name, filename))
+			goto out_free;
+	}
 
 	len = snprintf(linkname, size, "%s/.build-id/%.2s",
 		       debugdir, sbuild_id);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 71d23e1e30e8..ae61e9f4d6eb 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -383,13 +383,14 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
 	return ret;
 }
 
-int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name,
+int kallsyms__parse(const char *filename, void *arg,
+		    int (*process_symbol)(void *arg, const char *name,
 						     char type, u64 start))
 {
 	char *line = NULL;
 	size_t n;
 	int err = 0;
-	FILE *file = fopen("/proc/kallsyms", "r");
+	FILE *file = fopen(filename, "r");
 
 	if (file == NULL)
 		goto out_failure;
@@ -466,10 +467,11 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
  * so that we can in the next step set the symbol ->end address and then
  * call kernel_maps__split_kallsyms.
  */
-static int dso__load_all_kallsyms(struct dso *self, struct map *map)
+static int dso__load_all_kallsyms(struct dso *self, const char *filename,
+				  struct map *map)
 {
 	struct process_kallsyms_args args = { .map = map, .dso = self, };
-	return kallsyms__parse(&args, map__process_kallsym_symbol);
+	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
 }
 
 /*
@@ -556,10 +558,10 @@ discard_symbol:		rb_erase(&pos->rb_node, root);
 }
 
 
-static int dso__load_kallsyms(struct dso *self, struct map *map,
+static int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
 			      struct perf_session *session, symbol_filter_t filter)
 {
-	if (dso__load_all_kallsyms(self, map) < 0)
+	if (dso__load_all_kallsyms(self, filename, map) < 0)
 		return -1;
 
 	symbols__fixup_end(&self->symbols[map->type]);
@@ -1580,7 +1582,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 				struct perf_session *session, symbol_filter_t filter)
 {
 	int err;
-	bool is_kallsyms;
+	const char *kallsyms_filename = NULL;
+	char *kallsyms_allocated_filename = NULL;
 
 	if (vmlinux_path != NULL) {
 		int i;
@@ -1606,19 +1609,37 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	 */
 	if (self->has_build_id) {
 		u8 kallsyms_build_id[BUILD_ID_SIZE];
+		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
 		if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
 					 sizeof(kallsyms_build_id)) == 0) {
-			is_kallsyms = dso__build_id_equal(self, kallsyms_build_id);
-			if (is_kallsyms)
+			if (dso__build_id_equal(self, kallsyms_build_id)) {
+				kallsyms_filename = "/proc/kallsyms";
 				goto do_kallsyms;
+			}
 		}
+
+		build_id__sprintf(self->build_id, sizeof(self->build_id),
+				  sbuild_id);
+
+		if (asprintf(&kallsyms_allocated_filename,
+			     "%s/.debug/[kernel.kallsyms]/%s",
+			     getenv("HOME"), sbuild_id) != -1) {
+			if (access(kallsyms_filename, F_OK)) {
+				kallsyms_filename = kallsyms_allocated_filename;
+				goto do_kallsyms;
+			}
+			free(kallsyms_allocated_filename);
+			kallsyms_allocated_filename = NULL;
+		}
+
 		goto do_vmlinux;
 	}
 
-	is_kallsyms = self->long_name[0] == '[';
-	if (is_kallsyms)
+	if (self->long_name[0] == '[') {
+		kallsyms_filename = "/proc/kallsyms";
 		goto do_kallsyms;
+	}
 
 do_vmlinux:
 	err = dso__load_vmlinux(self, map, session, self->long_name, filter);
@@ -1629,9 +1650,10 @@ do_vmlinux:
 		pr_info("The file %s cannot be used, "
 			"trying to use /proc/kallsyms...", self->long_name);
 do_kallsyms:
-		err = dso__load_kallsyms(self, map, session, filter);
-		if (err > 0 && !is_kallsyms)
+		err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter);
+		if (err > 0 && kallsyms_filename == NULL)
                         dso__set_long_name(self, strdup("[kernel.kallsyms]"));
+		free(kallsyms_allocated_filename);
 	}
 
 	if (err > 0) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 594156e43b10..36b7c717f5ee 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -144,8 +144,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
 bool dsos__read_build_ids(void);
 int build_id__sprintf(u8 *self, int len, char *bf);
-int kallsyms__parse(void *arg, int (*process_symbol)(void *arg, const char *name,
-						     char type, u64 start));
+int kallsyms__parse(const char *filename, void *arg,
+		    int (*process_symbol)(void *arg, const char *name,
+					  char type, u64 start));
 
 int symbol__init(void);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index f3c0798a5e78..f0685849b244 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -32,6 +32,33 @@ int mkdir_p(char *path, mode_t mode)
 	return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
 }
 
+static int slow_copyfile(const char *from, const char *to)
+{
+	int err = 0;
+	char *line = NULL;
+	size_t n;
+	FILE *from_fp = fopen(from, "r"), *to_fp;
+
+	if (from_fp == NULL)
+		goto out;
+
+	to_fp = fopen(to, "w");
+	if (to_fp == NULL)
+		goto out_fclose_from;
+
+	while (getline(&line, &n, from_fp) > 0)
+		if (fputs(line, to_fp) == EOF)
+			goto out_fclose_to;
+	err = 0;
+out_fclose_to:
+	fclose(to_fp);
+	free(line);
+out_fclose_from:
+	fclose(from_fp);
+out:
+	return err;
+}
+
 int copyfile(const char *from, const char *to)
 {
 	int fromfd, tofd;
@@ -42,6 +69,9 @@ int copyfile(const char *from, const char *to)
 	if (stat(from, &st))
 		goto out;
 
+	if (st.st_size == 0) /* /proc? do it slowly... */
+		return slow_copyfile(from, to);
+
 	fromfd = open(from, O_RDONLY);
 	if (fromfd < 0)
 		goto out;

From cf4e5b0838e822dd404638ad00d35b63fffe8191 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 23:45:27 -0200
Subject: [PATCH 065/640] perf symbols: Use dso->long_name in dsos__find()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If not we end up duplicating the module DSOs because first we
insert them using the short name found in /proc/modules, then,
when processing synthesized MMAP events we add them again.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263519930-22803-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ae61e9f4d6eb..4267138c7bbe 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1679,7 +1679,7 @@ static struct dso *dsos__find(struct list_head *head, const char *name)
 	struct dso *pos;
 
 	list_for_each_entry(pos, head, node)
-		if (strcmp(pos->name, name) == 0)
+		if (strcmp(pos->long_name, name) == 0)
 			return pos;
 	return NULL;
 }

From 18c3daa4961b9fa1f2db0711d93c0acf0c39fd12 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 23:45:28 -0200
Subject: [PATCH 066/640] perf record: Encode the domain while synthesizing
 MMAP events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the past 'perf record' had to process only userspace MMAP
events, the ones generated in the kernel, but after we reused
the MMAP events to encode the module mapings we ended up adding
them first to the list of userspace DSOs (dsos__user) and to the
kernel one (dsos__kernel).

Fix this by encoding the header.misc field and then using it,
like other parts to decide the right DSOs list to insert/find.

The gotcha here is that since the kernel puts zero in .misc,
which isn't PERF_RECORD_MISC_KERNEL (1 << 1), to differentiate,
we put 1 in .misc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263519930-22803-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c |  8 ++++++--
 tools/perf/util/event.c     | 11 +++++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c130df2676f1..614fa9a4c67c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -117,8 +117,12 @@ static void write_event(event_t *buf, size_t size)
 	* Add it to the list of DSOs, so that when we finish this
 	 * record session we can pick the available build-ids.
 	 */
-	if (buf->header.type == PERF_RECORD_MMAP)
-		dsos__findnew(buf->mmap.filename);
+	if (buf->header.type == PERF_RECORD_MMAP) {
+		struct list_head *head = &dsos__user;
+		if (buf->mmap.header.misc == 1)
+			head = &dsos__kernel;
+		__dsos__findnew(head, buf->mmap.filename);
+	}
 
 	write_output(buf, size);
 }
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 0e9820ac4f5e..1abaefc126a8 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -110,7 +110,10 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
 	while (1) {
 		char bf[BUFSIZ], *pbf = bf;
 		event_t ev = {
-			.header = { .type = PERF_RECORD_MMAP },
+			.header = {
+				.type = PERF_RECORD_MMAP,
+				.misc = 0, /* Just like the kernel, see kernel/perf_event.c __perf_event_mmap */
+			 },
 		};
 		int n;
 		size_t size;
@@ -170,6 +173,7 @@ int event__synthesize_modules(event__handler_t process,
 
 		size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
 		memset(&ev, 0, sizeof(ev));
+		ev.mmap.header.misc = 1; /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
 		ev.mmap.header.type = PERF_RECORD_MMAP;
 		ev.mmap.header.size = (sizeof(ev.mmap) -
 				        (sizeof(ev.mmap.filename) - size));
@@ -236,7 +240,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
 {
 	size_t size;
 	event_t ev = {
-		.header = { .type = PERF_RECORD_MMAP },
+		.header = {
+			.type = PERF_RECORD_MMAP,
+			.misc = 1, /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
+		},
 	};
 	/*
 	 * We should get this from /sys/kernel/sections/.text, but till that is

From 59ee68ecd1561a233fb6ad351980bea8402533e7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 23:45:29 -0200
Subject: [PATCH 067/640] perf symbols: Create thread__find_addr_map from
 thread__find_addr_location
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because some tools will only want to know with maps had hits,
not needing the full symbol resolution done by
thread__find_addr_location.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263519930-22803-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/event.c  | 26 +++++++++++++++++---------
 tools/perf/util/thread.h |  5 +++++
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 1abaefc126a8..5a6e827a09eb 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -422,11 +422,10 @@ int event__process_task(event_t *self, struct perf_session *session)
 	return 0;
 }
 
-void thread__find_addr_location(struct thread *self,
-				struct perf_session *session, u8 cpumode,
-				enum map_type type, u64 addr,
-				struct addr_location *al,
-				symbol_filter_t filter)
+void thread__find_addr_map(struct thread *self,
+			   struct perf_session *session, u8 cpumode,
+			   enum map_type type, u64 addr,
+			   struct addr_location *al)
 {
 	struct map_groups *mg = &self->mg;
 
@@ -441,7 +440,6 @@ void thread__find_addr_location(struct thread *self,
 	else {
 		al->level = 'H';
 		al->map = NULL;
-		al->sym = NULL;
 		return;
 	}
 try_again:
@@ -460,11 +458,21 @@ try_again:
 			mg = &session->kmaps;
 			goto try_again;
 		}
-		al->sym = NULL;
-	} else {
+	} else
 		al->addr = al->map->map_ip(al->map, al->addr);
+}
+
+void thread__find_addr_location(struct thread *self,
+				struct perf_session *session, u8 cpumode,
+				enum map_type type, u64 addr,
+				struct addr_location *al,
+				symbol_filter_t filter)
+{
+	thread__find_addr_map(self, session, cpumode, type, addr, al);
+	if (al->map != NULL)
 		al->sym = map__find_symbol(al->map, session, al->addr, filter);
-	}
+	else
+		al->sym = NULL;
 }
 
 static void dso__calc_col_width(struct dso *self)
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index c06c13535a70..e35653c1817c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -48,6 +48,11 @@ static inline struct map *thread__find_map(struct thread *self,
 	return self ? map_groups__find(&self->mg, type, addr) : NULL;
 }
 
+void thread__find_addr_map(struct thread *self,
+			   struct perf_session *session, u8 cpumode,
+			   enum map_type type, u64 addr,
+			   struct addr_location *al);
+
 void thread__find_addr_location(struct thread *self,
 				struct perf_session *session, u8 cpumode,
 				enum map_type type, u64 addr,

From 88d3d9b7c843a42cb73c55a2d13cd1041da31fb9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 Jan 2010 23:45:30 -0200
Subject: [PATCH 068/640] perf buildid-list: Introduce --with-hits option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using this option 'perf buildid-list' will process all samples,
marking the DSOs that had some hits to list just them.

This in turn will be used by a new porcelain, 'perf archive',
that will be just a shell script to create a tarball from the
'perf buildid-list --with-hits' output and the files cached by
'perf record' in ~/.debug.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263519930-22803-4-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-buildid-list.c | 35 ++++++++++++++++++++++++++++++-
 tools/perf/util/symbol.c          | 11 ++++++----
 tools/perf/util/symbol.h          |  3 ++-
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 4229c2c213cc..431f204bde64 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -16,6 +16,7 @@
 
 static char const *input_name = "perf.data";
 static int force;
+static bool with_hits;
 
 static const char * const buildid_list_usage[] = {
 	"perf buildid-list [<options>]",
@@ -23,6 +24,7 @@ static const char * const buildid_list_usage[] = {
 };
 
 static const struct option options[] = {
+	OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -31,6 +33,34 @@ static const struct option options[] = {
 	OPT_END()
 };
 
+static int build_id_list__process_event(event_t *event,
+					struct perf_session *session)
+{
+	struct addr_location al;
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	struct thread *thread = perf_session__findnew(session, event->ip.pid);
+
+	if (thread == NULL) {
+		pr_err("problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
+			      event->ip.ip, &al);
+
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
+	return 0;
+}
+
+static struct perf_event_ops build_id_list__event_ops = {
+	.sample	= build_id_list__process_event,
+	.mmap	= event__process_mmap,
+	.fork	= event__process_task,
+};
+
 static int __cmd_buildid_list(void)
 {
 	int err = -1;
@@ -40,7 +70,10 @@ static int __cmd_buildid_list(void)
 	if (session == NULL)
 		return -1;
 
-	dsos__fprintf_buildid(stdout);
+	if (with_hits)
+		perf_session__process_events(session, &build_id_list__event_ops);
+
+	dsos__fprintf_buildid(stdout, with_hits);
 
 	perf_session__delete(session);
 	return err;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 4267138c7bbe..a4e745934584 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1716,22 +1716,25 @@ void dsos__fprintf(FILE *fp)
 	__dsos__fprintf(&dsos__user, fp);
 }
 
-static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp)
+static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+				      bool with_hits)
 {
 	struct dso *pos;
 	size_t ret = 0;
 
 	list_for_each_entry(pos, head, node) {
+		if (with_hits && !pos->hit)
+			continue;
 		ret += dso__fprintf_buildid(pos, fp);
 		ret += fprintf(fp, " %s\n", pos->long_name);
 	}
 	return ret;
 }
 
-size_t dsos__fprintf_buildid(FILE *fp)
+size_t dsos__fprintf_buildid(FILE *fp, bool with_hits)
 {
-	return (__dsos__fprintf_buildid(&dsos__kernel, fp) +
-		__dsos__fprintf_buildid(&dsos__user, fp));
+	return (__dsos__fprintf_buildid(&dsos__kernel, fp, with_hits) +
+		__dsos__fprintf_buildid(&dsos__user, fp, with_hits));
 }
 
 static struct dso *dsos__create_kernel(const char *vmlinux)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 36b7c717f5ee..525085fd0735 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -97,6 +97,7 @@ struct dso {
 	u8		 slen_calculated:1;
 	u8		 has_build_id:1;
 	u8		 kernel:1;
+	u8		 hit:1;
 	unsigned char	 origin;
 	u8		 sorted_by_name;
 	u8		 loaded;
@@ -129,7 +130,7 @@ struct perf_session;
 int dso__load(struct dso *self, struct map *map, struct perf_session *session,
 	      symbol_filter_t filter);
 void dsos__fprintf(FILE *fp);
-size_t dsos__fprintf_buildid(FILE *fp);
+size_t dsos__fprintf_buildid(FILE *fp, bool with_hits);
 
 size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
 size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);

From 460848fceffc91652b2d36d19db4ac40d12fb607 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 Jan 2010 13:17:51 -0200
Subject: [PATCH 069/640] perf symbols: The synthesized kernel modules MMAP
 must use the pathnames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we use ->long_name in dsos__find now.

Now 'perf buildid_list' is not duplicating those and managing to
show the proper build-ids for the DSOs with hits:

[root@doppio linux-2.6-tip]# perf buildid-list -H
74f9930ee94475b6b3238caf3725a50d59cb994b [kernel.kallsyms]
9ffdcac0a7935922d1f04b6cc9029dfef0f066ef /lib/modules/2.6.33-rc4-tip+/kernel/arch/x86/crypto/aes-x86_64.ko
3aaf89c32ebfc438ff546c93597d41788e3e65f3 /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwl3945.ko
19f46033f73e1ec612937189bb118c5daba5a0c8 /lib/modules/2.6.33-rc4-tip+/kernel/net/mac80211/mac80211.ko
1772f014a7a7272859655acb0c64a20ab20b75ee /lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko
eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1 /lib64/libc-2.10.2.so
5c68f7afeb33309c78037e374b0deee84dd441f6 /lib64/libpthread-2.10.2.so
e9c9ad5c138ef882e4507d2605645b597da43873 /bin/dbus-daemon
bcda7d09eb6c9ee380dae0ed3d591d4311decc31 /lib64/libdbus-1.so.3.4.0
7cc449a77f48b85d6088114000e970ced613bed8 /usr/lib64/libcrypto.so.0.9.8k
fdd1ccd1ff7917ab020653147ab3bacf0a85b5b9 /lib64/libglib-2.0.so.0.2000.5
e4417ebb8762e5f2eee93c8011a71115ff5edad8 /lib64/libgobject-2.0.so.0.2000.5
931e49461f6df99104f0febcc52f6fed5e2efce6 /usr/sbin/sshd
dab5f724c088f89fbd8304da553ed6cb30bbec96 /usr/lib64/libgdk-x11-2.0.so.0.1600.6
f2037a091ef36b591187a858d75e203690ea9409 /usr/sbin/openvpn
a8e4f743b40fb1fd8b85e2f9b88d93b661472b8f /bin/find
81120aada06e68b1e85882925a0fc6d7345ef59a /home/acme/bin/perf
[root@doppio linux-2.6-tip]#

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263568672-30323-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 5a6e827a09eb..966d207a1509 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -345,15 +345,15 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 
 			map = perf_session__new_module_map(session,
 							   self->mmap.start,
-							   short_module_name);
+							   self->mmap.filename);
 			if (map == NULL)
 				goto out_problem;
 
-			name = strdup(self->mmap.filename);
+			name = strdup(short_module_name);
 			if (name == NULL)
 				goto out_problem;
 
-			dso__set_long_name(map->dso, name);
+			map->dso->short_name = name;
 			map->end = map->start + self->mmap.len;
 		} else if (memcmp(self->mmap.filename, kmmap_prefix,
 				sizeof(kmmap_prefix) - 1) == 0) {

From 2c5851747bcf751908c02e253cb7582d342b4612 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 Jan 2010 13:17:52 -0200
Subject: [PATCH 070/640] perf archive: Add helper script to package files
 needed to do analysis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It uses 'perf buildid-list --with-hits' to create a tarball with
what is needed to have in the destination machine ~/.debug
hierarchy to properly decode the perf.data file specified.

Here is an example where a perf.data file collected on a x86-64
machine running Fedora 12 is used and then the data is packaged,
transferred and decoded on a PARISC64 machine running Debian
Testing, 32-bit userspace:

[root@doppio linux-2.6-tip]# uname -a
Linux doppio.ghostprotocols.net 2.6.33-rc4-tip+ #3 SMP Wed Jan 13 11:58:15 BRST 2010 x86_64 x86_64 x86_64 GNU/Linux
[root@doppio linux-2.6-tip]# perf archive
[root@doppio linux-2.6-tip]# ls -la perf.data*
-rw------- 1 root root  737696 2010-01-14 23:36 perf.data
-rw-r--r-- 1 root root 8840025 2010-01-15 12:27 perf.data.tar.bz2
[root@doppio linux-2.6-tip]# scp perf.data.* parisc64:.
Password:
perf.data.tar.bz2                                      100% 8633KB   1.4MB/s   00:06
[root@doppio linux-2.6-tip]# ssh parisc64
Password:
Linux parisc 2.6.19-g2bbf29ac-dirty #1 Sun Dec 3 17:24:04 BRST 2006 parisc64

The programs included with the Debian GNU/Linux system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.

Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent
permitted by applicable law.
Last login: Thu Jan 14 11:23:24 2010 from d
parisc:~# uname -a
Linux parisc 2.6.19-g2bbf29ac-dirty #1 Sun Dec 3 17:24:04 BRST 2006 parisc64 GNU/Linux
parisc:~# mkdir .debug
parisc:~# tar xvf perf.data.tar.bz2 -C ~/.debug
tar: Record size = 8 blocks
.build-id/74/f9930ee94475b6b3238caf3725a50d59cb994b
[kernel.kallsyms]/74f9930ee94475b6b3238caf3725a50d59cb994b
.build-id/9f/fdcac0a7935922d1f04b6cc9029dfef0f066ef
lib/modules/2.6.33-rc4-tip+/kernel/arch/x86/crypto/aes-x86_64.ko/9ffdcac0a7935922d1f04b6cc9029dfef0f066ef
.build-id/3a/af89c32ebfc438ff546c93597d41788e3e65f3
lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/wireless/iwlwifi/iwl3945.ko/3aaf89c32ebfc438ff546c93597d41788e3e65f3
.build-id/19/f46033f73e1ec612937189bb118c5daba5a0c8
lib/modules/2.6.33-rc4-tip+/kernel/net/mac80211/mac80211.ko/19f46033f73e1ec612937189bb118c5daba5a0c8
.build-id/17/72f014a7a7272859655acb0c64a20ab20b75ee
lib/modules/2.6.33-rc4-tip+/kernel/drivers/net/e1000e/e1000e.ko/1772f014a7a7272859655acb0c64a20ab20b75ee
.build-id/eb/4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1
lib64/libc-2.10.2.so/eb4ec8fa8b2a5eb18cad173c92f27ed8887ed1c1
.build-id/5c/68f7afeb33309c78037e374b0deee84dd441f6
lib64/libpthread-2.10.2.so/5c68f7afeb33309c78037e374b0deee84dd441f6
.build-id/e9/c9ad5c138ef882e4507d2605645b597da43873
bin/dbus-daemon/e9c9ad5c138ef882e4507d2605645b597da43873
.build-id/bc/da7d09eb6c9ee380dae0ed3d591d4311decc31
lib64/libdbus-1.so.3.4.0/bcda7d09eb6c9ee380dae0ed3d591d4311decc31
.build-id/7c/c449a77f48b85d6088114000e970ced613bed8
usr/lib64/libcrypto.so.0.9.8k/7cc449a77f48b85d6088114000e970ced613bed8
.build-id/fd/d1ccd1ff7917ab020653147ab3bacf0a85b5b9
lib64/libglib-2.0.so.0.2000.5/fdd1ccd1ff7917ab020653147ab3bacf0a85b5b9
.build-id/e4/417ebb8762e5f2eee93c8011a71115ff5edad8
lib64/libgobject-2.0.so.0.2000.5/e4417ebb8762e5f2eee93c8011a71115ff5edad8
.build-id/93/1e49461f6df99104f0febcc52f6fed5e2efce6
usr/sbin/sshd/931e49461f6df99104f0febcc52f6fed5e2efce6
.build-id/da/b5f724c088f89fbd8304da553ed6cb30bbec96
usr/lib64/libgdk-x11-2.0.so.0.1600.6/dab5f724c088f89fbd8304da553ed6cb30bbec96
.build-id/f2/037a091ef36b591187a858d75e203690ea9409
usr/sbin/openvpn/f2037a091ef36b591187a858d75e203690ea9409
.build-id/a8/e4f743b40fb1fd8b85e2f9b88d93b661472b8f
bin/find/a8e4f743b40fb1fd8b85e2f9b88d93b661472b8f
.build-id/81/120aada06e68b1e85882925a0fc6d7345ef59a
home/acme/bin/perf/81120aada06e68b1e85882925a0fc6d7345ef59a
parisc:~# perf report 2> /dev/null | head -25
     9.07%             find  find                               [.] 0x0000000000fb0e
     3.29%             perf  libc-2.10.2.so                     [.] __GI_strcmp
     3.19%             find  [kernel.kallsyms]                  [k] _raw_spin_unlock_irqrestore
     2.70%             find  libc-2.10.2.so                     [.] __GI_memmove
     2.62%             perf  [kernel.kallsyms]                  [k] vsnprintf
     2.03%             find  libc-2.10.2.so                     [.] _int_malloc
     2.02%             perf  [kernel.kallsyms]                  [k] format_decode
     1.70%             find  [kernel.kallsyms]                  [k] n_tty_write
     1.70%             find  [kernel.kallsyms]                  [k] half_md4_transform
     1.67%             find  libc-2.10.2.so                     [.] _IO_vfprintf_internal
     1.66%             perf  [kernel.kallsyms]                  [k] audit_free_aux
     1.62%          swapper  [kernel.kallsyms]                  [k] mwait_idle_with_hints
     1.58%             find  [kernel.kallsyms]                  [k] __kmalloc
     1.35%             find  [kernel.kallsyms]                  [k] sched_clock_local
     1.35%             find  [kernel.kallsyms]                  [k] ext4_check_dir_entry
     1.35%             find  [kernel.kallsyms]                  [k] ext4_htree_store_dirent
     1.35%             find  [kernel.kallsyms]                  [k] sys_write
     1.35%             find  [e1000e]                           [k] e1000_clean
     1.35%             find  [kernel.kallsyms]                  [k] _atomic_dec_and_lock
     1.34%             find  [kernel.kallsyms]                  [k] __d_lookup
parisc:~#

Probably the next step is to have 'perf report' notice that there is a
perf.data.tar.bz2 file in the same directory and look if it was already
added to ~/.debug/.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263568672-30323-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile         | 10 ++--------
 tools/perf/command-list.txt |  1 +
 tools/perf/perf-archive.sh  | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 8 deletions(-)
 create mode 100644 tools/perf/perf-archive.sh

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 2c03a9411317..d739552036d0 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -286,11 +286,7 @@ SCRIPT_PERL =
 SCRIPT_SH =
 TEST_PROGRAMS =
 
-#
-# No scripts right now:
-#
-
-# SCRIPT_SH += perf-am.sh
+SCRIPT_SH += perf-archive.sh
 
 #
 # No Perl scripts right now:
@@ -315,9 +311,7 @@ PROGRAMS += perf
 # List built-in command $C whose implementation cmd_$C() is not in
 # builtin-$C.o but is linked in as part of some other command.
 #
-# None right now:
-#
-# BUILT_INS += perf-init $X
+BUILT_INS += perf-archive
 
 # what 'all' will build and 'install' will install, in perfexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 71dc7c3fe7b2..f73d1d90f5bd 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -3,6 +3,7 @@
 # command name			category [deprecated] [common]
 #
 perf-annotate			mainporcelain common
+perf-archive			mainporcelain
 perf-bench			mainporcelain common
 perf-buildid-list		mainporcelain common
 perf-diff			mainporcelain common
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
new file mode 100644
index 000000000000..45fbe2f07b15
--- /dev/null
+++ b/tools/perf/perf-archive.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# perf archive
+# Arnaldo Carvalho de Melo <acme@redhat.com>
+
+PERF_DATA=perf.data
+if [ $# -ne 0 ] ; then
+	PERF_DATA=$1
+fi
+
+DEBUGDIR=~/.debug/
+BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
+
+perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS
+if [ ! -s $BUILDIDS ] ; then
+	echo "perf archive: no build-ids found"
+	rm -f $BUILDIDS
+	exit 1
+fi
+
+MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
+
+cut -d ' ' -f 1 $BUILDIDS | \
+while read build_id ; do
+	linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2}
+	filename=$(readlink -f $linkname)
+	echo ${linkname#$DEBUGDIR} >> $MANIFEST
+	echo ${filename#$DEBUGDIR} >> $MANIFEST
+done
+
+tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST
+rm -f $MANIFEST $BUILDIDS
+exit 0

From f5a2c3dce03621b55f84496f58adc2d1a87ca16f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 Jan 2010 18:08:26 -0200
Subject: [PATCH 071/640] perf record: Intercept all events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The event interception we need to do in 'perf record' to create
a list of all DSOs in PERF_RECORD_MMAP events wasn't seeing all
events, make sure that happens by checking size agains
event_t->header.size.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263586107-1756-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 614fa9a4c67c..7bb9ca1b30fa 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -113,16 +113,24 @@ static void write_output(void *buf, size_t size)
 
 static void write_event(event_t *buf, size_t size)
 {
-	/*
-	* Add it to the list of DSOs, so that when we finish this
-	 * record session we can pick the available build-ids.
-	 */
-	if (buf->header.type == PERF_RECORD_MMAP) {
-		struct list_head *head = &dsos__user;
-		if (buf->mmap.header.misc == 1)
-			head = &dsos__kernel;
-		__dsos__findnew(head, buf->mmap.filename);
-	}
+	size_t processed_size = buf->header.size;
+	event_t *ev = buf;
+
+	do {
+		/*
+		* Add it to the list of DSOs, so that when we finish this
+		 * record session we can pick the available build-ids.
+		 */
+		if (ev->header.type == PERF_RECORD_MMAP) {
+			struct list_head *head = &dsos__user;
+			if (ev->header.misc == 1)
+				head = &dsos__kernel;
+			__dsos__findnew(head, ev->mmap.filename);
+		}
+
+		ev = ((void *)ev) + ev->header.size;
+		processed_size += ev->header.size;
+	} while (processed_size < size);
 
 	write_output(buf, size);
 }

From 881516eb828a3f7276c378bcef96b7788fc99016 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 Jan 2010 18:08:27 -0200
Subject: [PATCH 072/640] perf symbols: Accept an alias when looking for
 "_text"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As it is in PARISC64:

parisc:~# uname -a
Linux parisc 2.6.33-rc4-tip+ #1 SMP Thu Jan 14 13:33:34 BRST
2010 parisc64 GNU/Linux parisc:~# grep -w _text /proc/kallsyms
0000000040100000 A _text
parisc:~# grep 0000000040100000 /proc/kallsyms
0000000040100000 T stext
0000000040100000 T _stext
0000000040100000 A _text
parisc:~#

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263586107-1756-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/event.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 966d207a1509..dc13cad828d7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -227,7 +227,12 @@ static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
 {
 	struct process_symbol_args *args = arg;
 
-	if (!symbol_type__is_a(type, MAP__FUNCTION) || strcmp(name, args->name))
+	/*
+	 * Must be a function or at least an alias, as in PARISC64, where "_text" is
+	 * an 'A' to the same address as "_stext".
+	 */
+	if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
+	      type == 'A') || strcmp(name, args->name))
 		return 0;
 
 	args->start = start;

From 889ff0150661512d79484219612b7e2e024b6c07 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 9 Jan 2010 20:04:47 +0100
Subject: [PATCH 073/640] perf/core: Split context's event group list into
 pinned and non-pinned lists

Split-up struct perf_event_context::group_list into pinned_groups
and flexible_groups (non-pinned).

This first appears to be useless as it duplicates various loops around
the group list handlings.

But it scales better in the fast-path in perf_sched_in(). We don't
anymore iterate twice through the entire list to separate pinned and
non-pinned scheduling. Instead we interate through two distinct lists.

The another desired effect is that it makes easier to define distinct
scheduling rules on both.

Changes in v2:
- Respectively rename pinned_grp_list and
  volatile_grp_list into pinned_groups and flexible_groups as per
  Ingo suggestion.
- Various cleanups

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 include/linux/perf_event.h |   3 +-
 kernel/perf_event.c        | 231 ++++++++++++++++++++++++-------------
 2 files changed, 155 insertions(+), 79 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9a1d276db754..cdbc2aa64a0b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -683,7 +683,8 @@ struct perf_event_context {
 	 */
 	struct mutex			mutex;
 
-	struct list_head		group_list;
+	struct list_head		pinned_groups;
+	struct list_head		flexible_groups;
 	struct list_head		event_list;
 	int				nr_events;
 	int				nr_active;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 27f69a04541d..c9f8a757649d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event)
 	event->total_time_running = run_end - event->tstamp_running;
 }
 
+static struct list_head *
+ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
+{
+	if (event->attr.pinned)
+		return &ctx->pinned_groups;
+	else
+		return &ctx->flexible_groups;
+}
+
 /*
  * Add a event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
@@ -303,9 +312,12 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	 * add it straight to the context's event list, or to the group
 	 * leader's sibling list:
 	 */
-	if (group_leader == event)
-		list_add_tail(&event->group_entry, &ctx->group_list);
-	else {
+	if (group_leader == event) {
+		struct list_head *list;
+
+		list = ctx_group_list(event, ctx);
+		list_add_tail(&event->group_entry, list);
+	} else {
 		list_add_tail(&event->group_entry, &group_leader->sibling_list);
 		group_leader->nr_siblings++;
 	}
@@ -355,8 +367,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	 * to the context list directly:
 	 */
 	list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+		struct list_head *list;
 
-		list_move_tail(&sibling->group_entry, &ctx->group_list);
+		list = ctx_group_list(event, ctx);
+		list_move_tail(&sibling->group_entry, list);
 		sibling->group_leader = sibling;
 	}
 }
@@ -1056,7 +1070,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
 
 	perf_disable();
 	if (ctx->nr_active) {
-		list_for_each_entry(event, &ctx->group_list, group_entry)
+		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
+			group_sched_out(event, cpuctx, ctx);
+
+		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
 	perf_enable();
@@ -1271,9 +1288,8 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		if (event->state <= PERF_EVENT_STATE_OFF ||
-		    !event->attr.pinned)
+	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+		if (event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 		if (event->cpu != -1 && event->cpu != cpu)
 			continue;
@@ -1291,15 +1307,10 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 		}
 	}
 
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		/*
-		 * Ignore events in OFF or ERROR state, and
-		 * ignore pinned events since we did them already.
-		 */
-		if (event->state <= PERF_EVENT_STATE_OFF ||
-		    event->attr.pinned)
+	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+		/* Ignore events in OFF or ERROR state */
+		if (event->state <= PERF_EVENT_STATE_OFF)
 			continue;
-
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of events:
@@ -1453,8 +1464,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	 * Rotate the first entry last (works just fine for group events too):
 	 */
 	perf_disable();
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		list_move_tail(&event->group_entry, &ctx->group_list);
+	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+		list_move_tail(&event->group_entry, &ctx->pinned_groups);
+		break;
+	}
+
+	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+		list_move_tail(&event->group_entry, &ctx->flexible_groups);
 		break;
 	}
 	perf_enable();
@@ -1490,6 +1506,21 @@ void perf_event_task_tick(struct task_struct *curr)
 		perf_event_task_sched_in(curr);
 }
 
+static int event_enable_on_exec(struct perf_event *event,
+				struct perf_event_context *ctx)
+{
+	if (!event->attr.enable_on_exec)
+		return 0;
+
+	event->attr.enable_on_exec = 0;
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		return 0;
+
+	__perf_event_mark_enabled(event, ctx);
+
+	return 1;
+}
+
 /*
  * Enable all of a task's events that have been marked enable-on-exec.
  * This expects task == current.
@@ -1500,6 +1531,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 	struct perf_event *event;
 	unsigned long flags;
 	int enabled = 0;
+	int ret;
 
 	local_irq_save(flags);
 	ctx = task->perf_event_ctxp;
@@ -1510,14 +1542,16 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 
 	raw_spin_lock(&ctx->lock);
 
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		if (!event->attr.enable_on_exec)
-			continue;
-		event->attr.enable_on_exec = 0;
-		if (event->state >= PERF_EVENT_STATE_INACTIVE)
-			continue;
-		__perf_event_mark_enabled(event, ctx);
-		enabled = 1;
+	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+		ret = event_enable_on_exec(event, ctx);
+		if (ret)
+			enabled = 1;
+	}
+
+	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+		ret = event_enable_on_exec(event, ctx);
+		if (ret)
+			enabled = 1;
 	}
 
 	/*
@@ -1591,7 +1625,8 @@ __perf_event_init_context(struct perf_event_context *ctx,
 {
 	raw_spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->group_list);
+	INIT_LIST_HEAD(&ctx->pinned_groups);
+	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
 	ctx->task = task;
@@ -5032,7 +5067,11 @@ void perf_event_exit_task(struct task_struct *child)
 	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
 
 again:
-	list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+	list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
+				 group_entry)
+		__perf_event_exit_task(child_event, child_ctx, child);
+
+	list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
 				 group_entry)
 		__perf_event_exit_task(child_event, child_ctx, child);
 
@@ -5041,7 +5080,8 @@ again:
 	 * its siblings to the list, but we obtained 'tmp' before that which
 	 * will still point to the list head terminating the iteration.
 	 */
-	if (!list_empty(&child_ctx->group_list))
+	if (!list_empty(&child_ctx->pinned_groups) ||
+	    !list_empty(&child_ctx->flexible_groups))
 		goto again;
 
 	mutex_unlock(&child_ctx->mutex);
@@ -5049,6 +5089,24 @@ again:
 	put_ctx(child_ctx);
 }
 
+static void perf_free_event(struct perf_event *event,
+			    struct perf_event_context *ctx)
+{
+	struct perf_event *parent = event->parent;
+
+	if (WARN_ON_ONCE(!parent))
+		return;
+
+	mutex_lock(&parent->child_mutex);
+	list_del_init(&event->child_list);
+	mutex_unlock(&parent->child_mutex);
+
+	fput(parent->filp);
+
+	list_del_event(event, ctx);
+	free_event(event);
+}
+
 /*
  * free an unexposed, unused context as created by inheritance by
  * init_task below, used by fork() in case of fail.
@@ -5063,23 +5121,15 @@ void perf_event_free_task(struct task_struct *task)
 
 	mutex_lock(&ctx->mutex);
 again:
-	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
-		struct perf_event *parent = event->parent;
+	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+		perf_free_event(event, ctx);
 
-		if (WARN_ON_ONCE(!parent))
-			continue;
+	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
+				 group_entry)
+		perf_free_event(event, ctx);
 
-		mutex_lock(&parent->child_mutex);
-		list_del_init(&event->child_list);
-		mutex_unlock(&parent->child_mutex);
-
-		fput(parent->filp);
-
-		list_del_event(event, ctx);
-		free_event(event);
-	}
-
-	if (!list_empty(&ctx->group_list))
+	if (!list_empty(&ctx->pinned_groups) ||
+	    !list_empty(&ctx->flexible_groups))
 		goto again;
 
 	mutex_unlock(&ctx->mutex);
@@ -5087,12 +5137,54 @@ again:
 	put_ctx(ctx);
 }
 
+static int
+inherit_task_group(struct perf_event *event, struct task_struct *parent,
+		   struct perf_event_context *parent_ctx,
+		   struct task_struct *child,
+		   int *inherited_all)
+{
+	int ret;
+	struct perf_event_context *child_ctx = child->perf_event_ctxp;
+
+	if (!event->attr.inherit) {
+		*inherited_all = 0;
+		return 0;
+	}
+
+	if (!child_ctx) {
+		/*
+		 * This is executed from the parent task context, so
+		 * inherit events that have been marked for cloning.
+		 * First allocate and initialize a context for the
+		 * child.
+		 */
+
+		child_ctx = kzalloc(sizeof(struct perf_event_context),
+				    GFP_KERNEL);
+		if (!child_ctx)
+			return -ENOMEM;
+
+		__perf_event_init_context(child_ctx, child);
+		child->perf_event_ctxp = child_ctx;
+		get_task_struct(child);
+	}
+
+	ret = inherit_group(event, parent, parent_ctx,
+			    child, child_ctx);
+
+	if (ret)
+		*inherited_all = 0;
+
+	return ret;
+}
+
+
 /*
  * Initialize the perf_event context in task_struct
  */
 int perf_event_init_task(struct task_struct *child)
 {
-	struct perf_event_context *child_ctx = NULL, *parent_ctx;
+	struct perf_event_context *child_ctx, *parent_ctx;
 	struct perf_event_context *cloned_ctx;
 	struct perf_event *event;
 	struct task_struct *parent = current;
@@ -5130,41 +5222,22 @@ int perf_event_init_task(struct task_struct *child)
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
 	 */
-	list_for_each_entry(event, &parent_ctx->group_list, group_entry) {
-
-		if (!event->attr.inherit) {
-			inherited_all = 0;
-			continue;
-		}
-
-		if (!child->perf_event_ctxp) {
-			/*
-			 * This is executed from the parent task context, so
-			 * inherit events that have been marked for cloning.
-			 * First allocate and initialize a context for the
-			 * child.
-			 */
-
-			child_ctx = kzalloc(sizeof(struct perf_event_context),
-					    GFP_KERNEL);
-			if (!child_ctx) {
-				ret = -ENOMEM;
-				break;
-			}
-
-			__perf_event_init_context(child_ctx, child);
-			child->perf_event_ctxp = child_ctx;
-			get_task_struct(child);
-		}
-
-		ret = inherit_group(event, parent, parent_ctx,
-					     child, child_ctx);
-		if (ret) {
-			inherited_all = 0;
+	list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
+		ret = inherit_task_group(event, parent, parent_ctx, child,
+					 &inherited_all);
+		if (ret)
 			break;
-		}
 	}
 
+	list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
+		ret = inherit_task_group(event, parent, parent_ctx, child,
+					 &inherited_all);
+		if (ret)
+			break;
+	}
+
+	child_ctx = child->perf_event_ctxp;
+
 	if (child_ctx && inherited_all) {
 		/*
 		 * Mark the child context as a clone of the parent
@@ -5213,7 +5286,9 @@ static void __perf_event_exit_cpu(void *info)
 	struct perf_event_context *ctx = &cpuctx->ctx;
 	struct perf_event *event, *tmp;
 
-	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+		__perf_event_remove_from_context(event);
+	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
 		__perf_event_remove_from_context(event);
 }
 static void perf_event_exit_cpu(int cpu)

From 5908cdc85eb30f8d07f2cb11d4a62334d7229048 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 9 Jan 2010 20:53:14 +0100
Subject: [PATCH 074/640] list: Introduce list_rotate_left()

Bring a new list_rotate_left() helper that rotates a list to
the left. This is useful for codes that need to round roubin
elements which queue priority increases from tail to head.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 include/linux/list.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/list.h b/include/linux/list.h
index 969f6e92d089..5d9c6558e8ab 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -205,6 +205,20 @@ static inline int list_empty_careful(const struct list_head *head)
 	return (next == head) && (next == head->prev);
 }
 
+/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void list_rotate_left(struct list_head *head)
+{
+	struct list_head *first;
+
+	if (!list_empty(head)) {
+		first = head->next;
+		list_move_tail(first, head);
+	}
+}
+
 /**
  * list_is_singular - tests whether a list has just one entry.
  * @head: the list to test.

From e286417378b4f9ce6e473b556193465ab22e12ab Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 9 Jan 2010 21:05:28 +0100
Subject: [PATCH 075/640] perf: Round robin flexible groups of events using
 list_rotate_left()

This is more proper that doing it through a list_for_each_entry()
that breaks after the first entry.

v2: Don't rotate pinned groups as its not needed to time share
them.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 kernel/perf_event.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index c9f8a757649d..bbebe2832639 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1454,25 +1454,16 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
  */
 static void rotate_ctx(struct perf_event_context *ctx)
 {
-	struct perf_event *event;
-
 	if (!ctx->nr_events)
 		return;
 
 	raw_spin_lock(&ctx->lock);
-	/*
-	 * Rotate the first entry last (works just fine for group events too):
-	 */
-	perf_disable();
-	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
-		list_move_tail(&event->group_entry, &ctx->pinned_groups);
-		break;
-	}
 
-	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
-		list_move_tail(&event->group_entry, &ctx->flexible_groups);
-		break;
-	}
+	/* Rotate the first entry last of non-pinned groups */
+	perf_disable();
+
+	list_rotate_left(&ctx->flexible_groups);
+
 	perf_enable();
 
 	raw_spin_unlock(&ctx->lock);

From d6f962b57bfaab62891c7abbf1469212a56d6103 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 10 Jan 2010 01:25:51 +0100
Subject: [PATCH 076/640] perf: Export software-only event group characteristic
 as a flag

Before scheduling an event group, we first check if a group can go
on. We first check if the group is made of software only events
first, in which case it is enough to know if the group can be
scheduled in.

For that purpose, we iterate through the whole group, which is
wasteful as we could do this check when we add/delete an event to
a group.

So we create a group_flags field in perf event that can host
characteristics from a group of events, starting with a first
PERF_GROUP_SOFTWARE flag that reduces the check on the fast path.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 include/linux/perf_event.h |  5 +++++
 kernel/perf_event.c        | 30 +++++++++++-------------------
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cdbc2aa64a0b..c6f812e4d058 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -565,6 +565,10 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 					struct perf_sample_data *,
 					struct pt_regs *regs);
 
+enum perf_group_flag {
+	PERF_GROUP_SOFTWARE = 0x1,
+};
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -574,6 +578,7 @@ struct perf_event {
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
 	int				nr_siblings;
+	int				group_flags;
 	struct perf_event		*group_leader;
 	struct perf_event		*output;
 	const struct pmu		*pmu;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bbebe2832639..eae6ff693604 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -315,9 +315,16 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	if (group_leader == event) {
 		struct list_head *list;
 
+		if (is_software_event(event))
+			event->group_flags |= PERF_GROUP_SOFTWARE;
+
 		list = ctx_group_list(event, ctx);
 		list_add_tail(&event->group_entry, list);
 	} else {
+		if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+		    !is_software_event(event))
+			group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+
 		list_add_tail(&event->group_entry, &group_leader->sibling_list);
 		group_leader->nr_siblings++;
 	}
@@ -372,6 +379,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 		list = ctx_group_list(event, ctx);
 		list_move_tail(&sibling->group_entry, list);
 		sibling->group_leader = sibling;
+
+		/* Inherit group flags from the previous leader */
+		sibling->group_flags = event->group_flags;
 	}
 }
 
@@ -699,24 +709,6 @@ group_error:
 	return -EAGAIN;
 }
 
-/*
- * Return 1 for a group consisting entirely of software events,
- * 0 if the group contains any hardware events.
- */
-static int is_software_only_group(struct perf_event *leader)
-{
-	struct perf_event *event;
-
-	if (!is_software_event(leader))
-		return 0;
-
-	list_for_each_entry(event, &leader->sibling_list, group_entry)
-		if (!is_software_event(event))
-			return 0;
-
-	return 1;
-}
-
 /*
  * Work out whether we can put this event group on the CPU now.
  */
@@ -727,7 +719,7 @@ static int group_can_go_on(struct perf_event *event,
 	/*
 	 * Groups consisting entirely of software events can always go on.
 	 */
-	if (is_software_only_group(event))
+	if (event->group_flags & PERF_GROUP_SOFTWARE)
 		return 1;
 	/*
 	 * If an exclusive group is already on, no other hardware

From 69e3f52d1b1a3ed4390bb8a09bb1324265af7fbf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 Jan 2010 14:21:15 +0100
Subject: [PATCH 077/640] perf: Fix implicit declaration of getline in util.c

getline() is considered as undeclared in util/util.c because
it includes string.h, that in turn includes stdio.h, without
having defined _GNU_SOURCE.

But util.c also includes util.h that handles the _GNU_SOURCE and
all the needed inclusions already. Let's include only util.h
and sys/mman.h which is the only one header not handled by
util.h

This fixes the following build error:

 util/util.c: In function 'slow_copyfile':
 util/util.c:49: erreur: implicit declaration of function
 'getline' util/util.c:49: erreur: nested extern declaration of 'getline'

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263648075-3858-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/util.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index f0685849b244..f9b890fde681 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -1,10 +1,5 @@
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
 #include "util.h"
+#include <sys/mman.h>
 
 int mkdir_p(char *path, mode_t mode)
 {

From 0eda7385db1f30271ade830a231006938a76fb53 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Sat, 16 Jan 2010 21:31:16 +0900
Subject: [PATCH 078/640] perf probe: Fix build error of builtin-probe.c

I got this build error when building tip tree:

| cc1: warnings being treated as errors
| builtin-probe.c:123: error: 'opt_show_lines' defined but not used

This error is caused by:

| #ifndef NO_LIBDWARF
|	OPT_CALLBACK('L', "line", NULL,
|		     "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
|		     "Show source code lines.", opt_show_lines),
| #endif

My environment defines NO_LIBDWARF, so gcc treated
opt_show_lines() as garbage. So I moved opt_show_lines() into
#ifndef NO_LIBDWARF ... #endif block.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <1263645076-9993-1-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-probe.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 1d3a99ea5ce1..34f2acb1cc88 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -118,15 +118,6 @@ static int opt_del_probe_event(const struct option *opt __used,
 	return 0;
 }
 
-static int opt_show_lines(const struct option *opt __used,
-			  const char *str, int unset __used)
-{
-	if (str)
-		parse_line_range_desc(str, &session.line_range);
-	INIT_LIST_HEAD(&session.line_range.line_list);
-	session.show_lines = true;
-	return 0;
-}
 /* Currently just checking function name from symbol map */
 static void evaluate_probe_point(struct probe_point *pp)
 {
@@ -148,6 +139,16 @@ static int open_vmlinux(void)
 	pr_debug("Try to open %s\n", session.kmap->dso->long_name);
 	return open(session.kmap->dso->long_name, O_RDONLY);
 }
+
+static int opt_show_lines(const struct option *opt __used,
+			  const char *str, int unset __used)
+{
+	if (str)
+		parse_line_range_desc(str, &session.line_range);
+	INIT_LIST_HEAD(&session.line_range.line_list);
+	session.show_lines = true;
+	return 0;
+}
 #endif
 
 static const char * const probe_usage[] = {

From 231e36f4d2e63dd770db80b9f5113310c2bcfcfd Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 14 Jan 2010 00:12:12 -0500
Subject: [PATCH 079/640] tracing/kprobe: Update kprobe tracing self test for
 new syntax

Update kprobe tracing self test for new syntax (it supports
deleting individual probes, and drops $argN support)
and behavior change (new probes are disabled in default).

This selftest includes the following checks:

 - Adding function-entry probe and return probe with arguments.
 - Enabling these probes.
 - Deleting it individually.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20100114051211.7814.29436.stgit@localhost6.localdomain6>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_kprobe.c | 55 +++++++++++++++++++++++++++++++------
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ac728ded964..d6266cad6953 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1507,28 +1507,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
 
 static __init int kprobe_trace_self_tests_init(void)
 {
-	int ret;
+	int ret, warn = 0;
 	int (*target)(int, int, int, int, int, int);
+	struct trace_probe *tp;
 
 	target = kprobe_trace_selftest_target;
 
 	pr_info("Testing kprobe tracing: ");
 
 	ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
-				  "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
-	if (WARN_ON_ONCE(ret))
-		pr_warning("error enabling function entry\n");
+				  "$stack $stack0 +0($stack)");
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error on probing function entry.\n");
+		warn++;
+	} else {
+		/* Enable trace point */
+		tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
+		if (WARN_ON_ONCE(tp == NULL)) {
+			pr_warning("error on getting new probe.\n");
+			warn++;
+		} else
+			probe_event_enable(&tp->call);
+	}
 
 	ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
 				  "$retval");
-	if (WARN_ON_ONCE(ret))
-		pr_warning("error enabling function return\n");
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error on probing function return.\n");
+		warn++;
+	} else {
+		/* Enable trace point */
+		tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
+		if (WARN_ON_ONCE(tp == NULL)) {
+			pr_warning("error on getting new probe.\n");
+			warn++;
+		} else
+			probe_event_enable(&tp->call);
+	}
+
+	if (warn)
+		goto end;
 
 	ret = target(1, 2, 3, 4, 5, 6);
 
-	cleanup_all_probes();
+	ret = command_trace_probe("-:testprobe");
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error on deleting a probe.\n");
+		warn++;
+	}
 
-	pr_cont("OK\n");
+	ret = command_trace_probe("-:testprobe2");
+	if (WARN_ON_ONCE(ret)) {
+		pr_warning("error on deleting a probe.\n");
+		warn++;
+	}
+
+end:
+	cleanup_all_probes();
+	if (warn)
+		pr_cont("NG: Some tests are failed. Please check them.\n");
+	else
+		pr_cont("OK\n");
 	return 0;
 }
 

From 42cce92f4ddfa41e2dfe26fdcad4887943c032f2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 17 Jan 2010 10:36:08 +0100
Subject: [PATCH 080/640] perf: Make __perf_event_sched_out static

__perf_event_sched_out doesn't need to be globally available, make
it static.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 kernel/perf_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index eae6ff693604..c4e90b8cd60d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1049,8 +1049,8 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
 	return 0;
 }
 
-void __perf_event_sched_out(struct perf_event_context *ctx,
-			      struct perf_cpu_context *cpuctx)
+static void __perf_event_sched_out(struct perf_event_context *ctx,
+				   struct perf_cpu_context *cpuctx)
 {
 	struct perf_event *event;
 

From 5b0311e1f2464547fc6f17a82d7ea2538c8c7a70 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 17 Jan 2010 11:59:13 +0100
Subject: [PATCH 081/640] perf: Allow pinned and flexible groups to be
 scheduled separately

Tune the scheduling helpers so that we can choose to schedule either
pinned and/or flexible groups from a context.

And while at it, refactor a bit the naming of these helpers to make
these more consistent and flexible.

There is no (intended) change in scheduling behaviour in this
patch.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 kernel/perf_event.c | 137 ++++++++++++++++++++++++++++++--------------
 1 file changed, 93 insertions(+), 44 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index c4e90b8cd60d..bfc4ee015c87 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1049,8 +1049,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
 	return 0;
 }
 
-static void __perf_event_sched_out(struct perf_event_context *ctx,
-				   struct perf_cpu_context *cpuctx)
+enum event_type_t {
+	EVENT_FLEXIBLE = 0x1,
+	EVENT_PINNED = 0x2,
+	EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+};
+
+static void ctx_sched_out(struct perf_event_context *ctx,
+			  struct perf_cpu_context *cpuctx,
+			  enum event_type_t event_type)
 {
 	struct perf_event *event;
 
@@ -1061,13 +1068,18 @@ static void __perf_event_sched_out(struct perf_event_context *ctx,
 	update_context_time(ctx);
 
 	perf_disable();
-	if (ctx->nr_active) {
+	if (!ctx->nr_active)
+		goto out_enable;
+
+	if (event_type & EVENT_PINNED)
 		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 
+	if (event_type & EVENT_FLEXIBLE)
 		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
-	}
+
+ out_enable:
 	perf_enable();
  out:
 	raw_spin_unlock(&ctx->lock);
@@ -1229,15 +1241,13 @@ void perf_event_task_sched_out(struct task_struct *task,
 	rcu_read_unlock();
 
 	if (do_switch) {
-		__perf_event_sched_out(ctx, cpuctx);
+		ctx_sched_out(ctx, cpuctx, EVENT_ALL);
 		cpuctx->task_ctx = NULL;
 	}
 }
 
-/*
- * Called with IRQs disabled
- */
-static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_event_context *ctx,
+			       enum event_type_t event_type)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 
@@ -1247,39 +1257,34 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx)
 	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
 		return;
 
-	__perf_event_sched_out(ctx, cpuctx);
+	ctx_sched_out(ctx, cpuctx, event_type);
 	cpuctx->task_ctx = NULL;
 }
 
 /*
  * Called with IRQs disabled
  */
-static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
+static void __perf_event_task_sched_out(struct perf_event_context *ctx)
 {
-	__perf_event_sched_out(&cpuctx->ctx, cpuctx);
+	task_ctx_sched_out(ctx, EVENT_ALL);
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+			      enum event_type_t event_type)
+{
+	ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
 }
 
 static void
-__perf_event_sched_in(struct perf_event_context *ctx,
-			struct perf_cpu_context *cpuctx)
+ctx_pinned_sched_in(struct perf_event_context *ctx,
+		    struct perf_cpu_context *cpuctx,
+		    int cpu)
 {
-	int cpu = smp_processor_id();
 	struct perf_event *event;
-	int can_add_hw = 1;
 
-	raw_spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	if (likely(!ctx->nr_events))
-		goto out;
-
-	ctx->timestamp = perf_clock();
-
-	perf_disable();
-
-	/*
-	 * First go through the list and put on any pinned groups
-	 * in order to give them the best chance of going on.
-	 */
 	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
 		if (event->state <= PERF_EVENT_STATE_OFF)
 			continue;
@@ -1298,6 +1303,15 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 			event->state = PERF_EVENT_STATE_ERROR;
 		}
 	}
+}
+
+static void
+ctx_flexible_sched_in(struct perf_event_context *ctx,
+		      struct perf_cpu_context *cpuctx,
+		      int cpu)
+{
+	struct perf_event *event;
+	int can_add_hw = 1;
 
 	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
 		/* Ignore events in OFF or ERROR state */
@@ -1314,11 +1328,53 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 			if (group_sched_in(event, cpuctx, ctx, cpu))
 				can_add_hw = 0;
 	}
+}
+
+static void
+ctx_sched_in(struct perf_event_context *ctx,
+	     struct perf_cpu_context *cpuctx,
+	     enum event_type_t event_type)
+{
+	int cpu = smp_processor_id();
+
+	raw_spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	if (likely(!ctx->nr_events))
+		goto out;
+
+	ctx->timestamp = perf_clock();
+
+	perf_disable();
+
+	/*
+	 * First go through the list and put on any pinned groups
+	 * in order to give them the best chance of going on.
+	 */
+	if (event_type & EVENT_PINNED)
+		ctx_pinned_sched_in(ctx, cpuctx, cpu);
+
+	/* Then walk through the lower prio flexible groups */
+	if (event_type & EVENT_FLEXIBLE)
+		ctx_flexible_sched_in(ctx, cpuctx, cpu);
+
 	perf_enable();
  out:
 	raw_spin_unlock(&ctx->lock);
 }
 
+static void task_ctx_sched_in(struct task_struct *task,
+			      enum event_type_t event_type)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+
+	if (likely(!ctx))
+		return;
+	if (cpuctx->task_ctx == ctx)
+		return;
+	ctx_sched_in(ctx, cpuctx, event_type);
+	cpuctx->task_ctx = ctx;
+}
 /*
  * Called from scheduler to add the events of the current task
  * with interrupts disabled.
@@ -1332,22 +1388,15 @@ __perf_event_sched_in(struct perf_event_context *ctx,
  */
 void perf_event_task_sched_in(struct task_struct *task)
 {
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_event_context *ctx = task->perf_event_ctxp;
-
-	if (likely(!ctx))
-		return;
-	if (cpuctx->task_ctx == ctx)
-		return;
-	__perf_event_sched_in(ctx, cpuctx);
-	cpuctx->task_ctx = ctx;
+	task_ctx_sched_in(task, EVENT_ALL);
 }
 
-static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx)
+static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+			     enum event_type_t event_type)
 {
 	struct perf_event_context *ctx = &cpuctx->ctx;
 
-	__perf_event_sched_in(ctx, cpuctx);
+	ctx_sched_in(ctx, cpuctx, event_type);
 }
 
 #define MAX_INTERRUPTS (~0ULL)
@@ -1476,17 +1525,17 @@ void perf_event_task_tick(struct task_struct *curr)
 	if (ctx)
 		perf_ctx_adjust_freq(ctx);
 
-	perf_event_cpu_sched_out(cpuctx);
+	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
 	if (ctx)
-		__perf_event_task_sched_out(ctx);
+		task_ctx_sched_out(ctx, EVENT_ALL);
 
 	rotate_ctx(&cpuctx->ctx);
 	if (ctx)
 		rotate_ctx(ctx);
 
-	perf_event_cpu_sched_in(cpuctx);
+	cpu_ctx_sched_in(cpuctx, EVENT_ALL);
 	if (ctx)
-		perf_event_task_sched_in(curr);
+		task_ctx_sched_in(curr, EVENT_ALL);
 }
 
 static int event_enable_on_exec(struct perf_event *event,

From 7defb0f879bbcfe29e3c6f29d685d4f29b7a0700 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 17 Jan 2010 12:15:31 +0100
Subject: [PATCH 082/640] perf: Don't schedule out/in pinned events on task
 tick

We don't need to schedule in/out pinned events on task tick,
now that pinned and flexible groups can be scheduled separately.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 kernel/perf_event.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bfc4ee015c87..a90ae694cbc1 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1525,17 +1525,17 @@ void perf_event_task_tick(struct task_struct *curr)
 	if (ctx)
 		perf_ctx_adjust_freq(ctx);
 
-	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
-		task_ctx_sched_out(ctx, EVENT_ALL);
+		task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
 
 	rotate_ctx(&cpuctx->ctx);
 	if (ctx)
 		rotate_ctx(ctx);
 
-	cpu_ctx_sched_in(cpuctx, EVENT_ALL);
+	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
-		task_ctx_sched_in(curr, EVENT_ALL);
+		task_ctx_sched_in(curr, EVENT_FLEXIBLE);
 }
 
 static int event_enable_on_exec(struct perf_event *event,

From 329c0e012b99fa2325a0be205c052e4aba690f16 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 17 Jan 2010 12:56:05 +0100
Subject: [PATCH 083/640] perf: Better order flexible and pinned scheduling

When a task gets scheduled in. We don't touch the cpu bound events
so the priority order becomes:

	cpu pinned, cpu flexible, task pinned, task flexible.

So schedule out cpu flexibles when a new task context gets in
and correctly order the groups to schedule in:

	task pinned, cpu flexible, task flexible.

Cpu pinned groups don't need to be touched at this time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 kernel/perf_event.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a90ae694cbc1..edc46b92b508 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1362,6 +1362,14 @@ ctx_sched_in(struct perf_event_context *ctx,
 	raw_spin_unlock(&ctx->lock);
 }
 
+static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+			     enum event_type_t event_type)
+{
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	ctx_sched_in(ctx, cpuctx, event_type);
+}
+
 static void task_ctx_sched_in(struct task_struct *task,
 			      enum event_type_t event_type)
 {
@@ -1388,15 +1396,27 @@ static void task_ctx_sched_in(struct task_struct *task,
  */
 void perf_event_task_sched_in(struct task_struct *task)
 {
-	task_ctx_sched_in(task, EVENT_ALL);
-}
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
 
-static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
-			     enum event_type_t event_type)
-{
-	struct perf_event_context *ctx = &cpuctx->ctx;
+	if (likely(!ctx))
+		return;
 
-	ctx_sched_in(ctx, cpuctx, event_type);
+	if (cpuctx->task_ctx == ctx)
+		return;
+
+	/*
+	 * We want to keep the following priority order:
+	 * cpu pinned (that don't need to move), task pinned,
+	 * cpu flexible, task flexible.
+	 */
+	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+
+	ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
+	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
+	ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
+
+	cpuctx->task_ctx = ctx;
 }
 
 #define MAX_INTERRUPTS (~0ULL)

From 580d9e00fdfb85e65c5097dcd739c6efcdbadc96 Mon Sep 17 00:00:00 2001
From: Motohiro KOSAKI <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 18 Jan 2010 21:35:05 -0500
Subject: [PATCH 084/640] kprobetrace, doc: Shell needs single quote to use $
 character

Shell interprets $val as shell variable, thus we need quote if
we use the echo command.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
LKML-Reference: <20100119023505.31880.17367.stgit@localhost6.localdomain6>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/kprobetrace.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index f30978e001f8..ab57f02e53bb 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -79,7 +79,7 @@ Usage examples
 To add a probe as a new event, write a new definition to kprobe_events
 as below.
 
-  echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events
+  echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
 
  This sets a kprobe on the top of do_sys_open() function with recording
 1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
@@ -88,7 +88,7 @@ the ABI, please try to use probe subcommand of perf-tools (you can find it
 under tools/perf/).
 As this example shows, users can choose more familiar names for each arguments.
 
-  echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
+  echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events
 
  This sets a kretprobe on the return point of do_sys_open() function with
 recording return value as "myretprobe" event.

From df3ab708b787a2b35de5101452bd51d4a8ae0ded Mon Sep 17 00:00:00 2001
From: Motohiro KOSAKI <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 18 Jan 2010 21:35:12 -0500
Subject: [PATCH 085/640] kprobetrace, doc: Add the explanation to remove probe
 points

Latest kprobetrace can remove probe points selectively, thus
the documentation should be updated too.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
LKML-Reference: <20100119023512.31880.35535.stgit@localhost6.localdomain6>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/kprobetrace.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ab57f02e53bb..a9100b28eb84 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -24,6 +24,7 @@ Synopsis of kprobe_events
 -------------------------
   p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS]	: Set a probe
   r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS]		: Set a return probe
+  -:[GRP/]EVENT						: Clear a probe
 
  GRP		: Group name. If omitted, use "kprobes" for it.
  EVENT		: Event name. If omitted, the event name is generated
@@ -122,6 +123,12 @@ REC->dfd, REC->filename, REC->flags, REC->mode
 
  This clears all probe points.
 
+ Or,
+
+  echo -:myprobe >> kprobe_events
+
+ This clears probe points selectively.
+
  Right after definition, each event is disabled by default. For tracing these
 events, you need to enable it.
 

From e8d433f335d44028d41af231ef5c52fd8a9b280b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 18 Jan 2010 15:59:19 -0200
Subject: [PATCH 086/640] perf archive: Add documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This also makes it appear on the 'perf --help' output, i.e.
util/generate-cmdlist.sh now takes it into account.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263837559-24168-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-archive.txt | 22 ++++++++++++++++++++++
 tools/perf/command-list.txt               |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/Documentation/perf-archive.txt

diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
new file mode 100644
index 000000000000..fae174dc7d01
--- /dev/null
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -0,0 +1,22 @@
+perf-archive(1)
+===============
+
+NAME
+----
+perf-archive - Create archive with object files with build-ids found in perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf archive' [file]
+
+DESCRIPTION
+-----------
+This command runs runs perf-buildid-list --with-hits, and collects the files
+with the buildids found so that analisys of perf.data contents can be possible
+on another machine.
+
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index f73d1d90f5bd..cf6444dfd73a 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -3,7 +3,7 @@
 # command name			category [deprecated] [common]
 #
 perf-annotate			mainporcelain common
-perf-archive			mainporcelain
+perf-archive			mainporcelain common
 perf-bench			mainporcelain common
 perf-buildid-list		mainporcelain common
 perf-diff			mainporcelain common

From d5526d8cb8e5aa3349c1ff4e409ad9b4cdac380c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 18 Jan 2010 18:21:42 -0200
Subject: [PATCH 087/640] perf archive: Fix installation steps in the Makefile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix these warning:

acme@parisc:~/git/linux-2.6-tip$ make -C tools/perf/ install
make: Entering directory
`/home/acme/git/linux-2.6-tip/tools/perf' Makefile:833: warning:
overriding commands for target `perf-archive' Makefile:822:
warning: ignoring old commands for target `perf-archive'

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263846102-24841-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d739552036d0..ddbeeee9ade2 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -311,7 +311,6 @@ PROGRAMS += perf
 # List built-in command $C whose implementation cmd_$C() is not in
 # builtin-$C.o but is linked in as part of some other command.
 #
-BUILT_INS += perf-archive
 
 # what 'all' will build and 'install' will install, in perfexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
@@ -1004,6 +1003,7 @@ install: all
 	$(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+	$(INSTALL) perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
 	$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'

From f162f87ad6e98e8bfb2362955da46bed7b2514be Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 19 Jan 2010 10:36:13 -0200
Subject: [PATCH 088/640] perf symbols: Set dso->kernel when handling the fake
 vmlinux MMAP event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because it may be possible that there was no buildid section,
where we would set this to 1.

Found while analysing a perf.data file collected on an ARM
machine where an explicitely specified vmlinux was being
disregarded.

Reported-by: Jamie Iles <jamie.iles@picochip.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263904574-30732-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/event.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dc13cad828d7..bbaee61c1683 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -373,6 +373,7 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 			if (kernel == NULL)
 				goto out_problem;
 
+			kernel->kernel = 1;
 			if (__map_groups__create_kernel_maps(&session->kmaps,
 							     session->vmlinux_maps,
 							     kernel) < 0)

From dc8d6ab2b61a2d92b5d7438565ccd20b29724cb2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 19 Jan 2010 10:36:14 -0200
Subject: [PATCH 089/640] perf symbols: Use only --vmlinux if specified
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Found while analysing a perf.data file collected on an ARM
machine where an explicitely specified vmlinux was being
disregarded.

Reported-by: Jamie Iles <jamie.iles@picochip.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263904574-30732-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 75 ++++++++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a4e745934584..b6ab23dd5f9f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1572,7 +1572,7 @@ static int dso__load_vmlinux(struct dso *self, struct map *map,
 		return -1;
 
 	dso__set_loaded(self, map->type);
-	err = dso__load_sym(self, map, session, self->long_name, fd, filter, 1, 0);
+	err = dso__load_sym(self, map, session, vmlinux, fd, filter, 1, 0);
 	close(fd);
 
 	return err;
@@ -1584,6 +1584,26 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	int err;
 	const char *kallsyms_filename = NULL;
 	char *kallsyms_allocated_filename = NULL;
+	/*
+	 * Step 1: if the user specified a vmlinux filename, use it and only
+	 * it, reporting errors to the user if it cannot be used.
+	 *
+	 * For instance, try to analyse an ARM perf.data file _without_ a
+	 * build-id, or if the user specifies the wrong path to the right
+	 * vmlinux file, obviously we can't fallback to another vmlinux (a
+	 * x86_86 one, on the machine where analysis is being performed, say),
+	 * or worse, /proc/kallsyms.
+	 *
+	 * If the specified file _has_ a build-id and there is a build-id
+	 * section in the perf.data file, we will still do the expected
+	 * validation in dso__load_vmlinux and will bail out if they don't
+	 * match.
+	 */
+	if (symbol_conf.vmlinux_name != NULL) {
+		err = dso__load_vmlinux(self, map, session,
+					symbol_conf.vmlinux_name, filter);
+		goto out_try_fixup;
+	}
 
 	if (vmlinux_path != NULL) {
 		int i;
@@ -1618,46 +1638,41 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 				goto do_kallsyms;
 			}
 		}
-
+		/*
+		 * Now look if we have it on the build-id cache in
+		 * $HOME/.debug/[kernel.kallsyms].
+		 */
 		build_id__sprintf(self->build_id, sizeof(self->build_id),
 				  sbuild_id);
 
 		if (asprintf(&kallsyms_allocated_filename,
 			     "%s/.debug/[kernel.kallsyms]/%s",
-			     getenv("HOME"), sbuild_id) != -1) {
-			if (access(kallsyms_filename, F_OK)) {
-				kallsyms_filename = kallsyms_allocated_filename;
-				goto do_kallsyms;
-			}
-			free(kallsyms_allocated_filename);
-			kallsyms_allocated_filename = NULL;
-		}
-
-		goto do_vmlinux;
-	}
-
-	if (self->long_name[0] == '[') {
-		kallsyms_filename = "/proc/kallsyms";
-		goto do_kallsyms;
-	}
-
-do_vmlinux:
-	err = dso__load_vmlinux(self, map, session, self->long_name, filter);
-	if (err <= 0) {
-		if (self->has_build_id)
+			     getenv("HOME"), sbuild_id) == -1)
 			return -1;
 
-		pr_info("The file %s cannot be used, "
-			"trying to use /proc/kallsyms...", self->long_name);
-do_kallsyms:
-		err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter);
-		if (err > 0 && kallsyms_filename == NULL)
-                        dso__set_long_name(self, strdup("[kernel.kallsyms]"));
-		free(kallsyms_allocated_filename);
+		if (access(kallsyms_filename, F_OK)) {
+			free(kallsyms_allocated_filename);
+			return -1;
+		}
+
+		kallsyms_filename = kallsyms_allocated_filename;
+	} else {
+		/*
+		 * Last resort, if we don't have a build-id and couldn't find
+		 * any vmlinux file, try the running kernel kallsyms table.
+		 */
+		kallsyms_filename = "/proc/kallsyms";
 	}
 
+do_kallsyms:
+	err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter);
+	free(kallsyms_allocated_filename);
+
+out_try_fixup:
 	if (err > 0) {
 out_fixup:
+		if (kallsyms_filename == NULL)
+			dso__set_long_name(self, strdup("[kernel.kallsyms]"));
 		map__fixup_start(map);
 		map__fixup_end(map);
 	}

From ef12a141306c90336a3a10d40213ecd98624d274 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 20 Jan 2010 15:28:45 -0200
Subject: [PATCH 090/640] perf buildid-cache: Add new command to manage
 build-id cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For now it just has operations to examine a given file, find its
build-id and add or remove it to/from the cache.

Useful, for instance, when adding binaries sent together with a
perf.data file, so that we can add them to the cache and have
the tools find it when resolving symbols.

It'll also manage the size of the cache like 'ccache' does.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264008525-29025-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 .../perf/Documentation/perf-buildid-cache.txt |  33 +++++
 tools/perf/Makefile                           |   1 +
 tools/perf/builtin-buildid-cache.c            | 133 ++++++++++++++++++
 tools/perf/builtin.h                          |   1 +
 tools/perf/command-list.txt                   |   1 +
 tools/perf/perf.c                             |   1 +
 tools/perf/util/header.c                      |  72 ++++++++--
 tools/perf/util/header.h                      |   5 +
 tools/perf/util/symbol.c                      |   4 +-
 tools/perf/util/symbol.h                      |   2 +-
 10 files changed, 241 insertions(+), 12 deletions(-)
 create mode 100644 tools/perf/Documentation/perf-buildid-cache.txt
 create mode 100644 tools/perf/builtin-buildid-cache.c

diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
new file mode 100644
index 000000000000..88bc3b519746
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -0,0 +1,33 @@
+perf-buildid-cache(1)
+=====================
+
+NAME
+----
+perf-buildid-cache - Manage build-id cache.
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-list <options>'
+
+DESCRIPTION
+-----------
+This command manages the build-id cache. It can add and remove files to the
+cache. In the future it should as well purge older entries, set upper limits
+for the space used by the cache, etc.
+
+OPTIONS
+-------
+-a::
+--add=::
+        Add specified file to the cache.
+-r::
+--remove=::
+        Remove specified file to the cache.
+-v::
+--verbose::
+	Be more verbose.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index ddbeeee9ade2..9b173e66fb41 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -445,6 +445,7 @@ BUILTIN_OBJS += builtin-diff.o
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-sched.o
 BUILTIN_OBJS += builtin-buildid-list.o
+BUILTIN_OBJS += builtin-buildid-cache.o
 BUILTIN_OBJS += builtin-list.o
 BUILTIN_OBJS += builtin-record.o
 BUILTIN_OBJS += builtin-report.o
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
new file mode 100644
index 000000000000..30a05f552c96
--- /dev/null
+++ b/tools/perf/builtin-buildid-cache.c
@@ -0,0 +1,133 @@
+/*
+ * builtin-buildid-cache.c
+ *
+ * Builtin buildid-cache command: Manages build-id cache
+ *
+ * Copyright (C) 2010, Red Hat Inc.
+ * Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "builtin.h"
+#include "perf.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include "util/header.h"
+#include "util/parse-options.h"
+#include "util/strlist.h"
+#include "util/symbol.h"
+
+static char const *add_name_list_str, *remove_name_list_str;
+
+static const char * const buildid_cache_usage[] = {
+	"perf buildid-cache [<options>]",
+	NULL
+};
+
+static const struct option buildid_cache_options[] = {
+	OPT_STRING('a', "add", &add_name_list_str,
+		   "file list", "file(s) to add"),
+	OPT_STRING('r', "remove", &remove_name_list_str, "file list",
+		    "file(s) to remove"),
+	OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose"),
+	OPT_END()
+};
+
+static int build_id_cache__add_file(const char *filename, const char *debugdir)
+{
+	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+	u8 build_id[BUILD_ID_SIZE];
+	int err;
+
+	if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
+		pr_debug("Couldn't read a build-id in %s\n", filename);
+		return -1;
+	}
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+	err = build_id_cache__add_s(sbuild_id, debugdir, filename, false);
+	if (verbose)
+		pr_info("Adding %s %s: %s\n", sbuild_id, filename,
+			err ? "FAIL" : "Ok");
+	return err;
+}
+
+static int build_id_cache__remove_file(const char *filename __used,
+				       const char *debugdir __used)
+{
+	u8 build_id[BUILD_ID_SIZE];
+	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+	int err;
+
+	if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
+		pr_debug("Couldn't read a build-id in %s\n", filename);
+		return -1;
+	}
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+	err = build_id_cache__remove_s(sbuild_id, debugdir);
+	if (verbose)
+		pr_info("Removing %s %s: %s\n", sbuild_id, filename,
+			err ? "FAIL" : "Ok");
+
+	return err;
+}
+
+static int __cmd_buildid_cache(void)
+{
+	struct strlist *list;
+	struct str_node *pos;
+	char debugdir[PATH_MAX];
+
+	snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
+		 DEBUG_CACHE_DIR);
+
+	if (add_name_list_str) {
+		list = strlist__new(true, add_name_list_str);
+		if (list) {
+			strlist__for_each(pos, list)
+				if (build_id_cache__add_file(pos->s, debugdir)) {
+					if (errno == EEXIST) {
+						pr_debug("%s already in the cache\n",
+							 pos->s);
+						continue;
+					}
+					pr_warning("Couldn't add %s: %s\n",
+						   pos->s, strerror(errno));
+				}
+
+			strlist__delete(list);
+		}
+	}
+
+	if (remove_name_list_str) {
+		list = strlist__new(true, remove_name_list_str);
+		if (list) {
+			strlist__for_each(pos, list)
+				if (build_id_cache__remove_file(pos->s, debugdir)) {
+					if (errno == ENOENT) {
+						pr_debug("%s wasn't in the cache\n",
+							 pos->s);
+						continue;
+					}
+					pr_warning("Couldn't remove %s: %s\n",
+						   pos->s, strerror(errno));
+				}
+
+			strlist__delete(list);
+		}
+	}
+
+	return 0;
+}
+
+int cmd_buildid_cache(int argc, const char **argv, const char *prefix __used)
+{
+	argc = parse_options(argc, argv, buildid_cache_options,
+			     buildid_cache_usage, 0);
+
+	if (symbol__init() < 0)
+		return -1;
+
+	setup_pager();
+	return __cmd_buildid_cache();
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 18035b1f16c7..dee97cfe3794 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -16,6 +16,7 @@ extern int check_pager_config(const char *cmd);
 
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_bench(int argc, const char **argv, const char *prefix);
+extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
 extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
 extern int cmd_diff(int argc, const char **argv, const char *prefix);
 extern int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index cf6444dfd73a..9afcff2e3ae5 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -5,6 +5,7 @@
 perf-annotate			mainporcelain common
 perf-archive			mainporcelain common
 perf-bench			mainporcelain common
+perf-buildid-cache		mainporcelain common
 perf-buildid-list		mainporcelain common
 perf-diff			mainporcelain common
 perf-list			mainporcelain common
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index fc89005c3e51..05c861c045d5 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -285,6 +285,7 @@ static void handle_internal_command(int argc, const char **argv)
 {
 	const char *cmd = argv[0];
 	static struct cmd_struct commands[] = {
+		{ "buildid-cache", cmd_buildid_cache, 0 },
 		{ "buildid-list", cmd_buildid_list, 0 },
 		{ "diff",	cmd_diff,	0 },
 		{ "help",	cmd_help,	0 },
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1b65fed0dd2d..2bb2bdb1f456 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -231,32 +231,29 @@ static int dsos__write_buildid_table(int fd)
 	return err;
 }
 
-static int dso__cache_build_id(struct dso *self, const char *debugdir)
+int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
+			  const char *name, bool is_kallsyms)
 {
 	const size_t size = PATH_MAX;
 	char *filename = malloc(size),
-	     *linkname = malloc(size), *targetname, *sbuild_id;
+	     *linkname = malloc(size), *targetname;
 	int len, err = -1;
-	bool is_kallsyms = self->kernel && self->long_name[0] != '/';
 
 	if (filename == NULL || linkname == NULL)
 		goto out_free;
 
 	len = snprintf(filename, size, "%s%s%s",
-		       debugdir, is_kallsyms ? "/" : "", self->long_name);
+		       debugdir, is_kallsyms ? "/" : "", name);
 	if (mkdir_p(filename, 0755))
 		goto out_free;
 
-	len += snprintf(filename + len, sizeof(filename) - len, "/");
-	sbuild_id = filename + len;
-	build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id);
+	snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id);
 
 	if (access(filename, F_OK)) {
 		if (is_kallsyms) {
 			 if (copyfile("/proc/kallsyms", filename))
 				goto out_free;
-		} else if (link(self->long_name, filename) &&
-			   copyfile(self->long_name, filename))
+		} else if (link(name, filename) && copyfile(name, filename))
 			goto out_free;
 	}
 
@@ -278,6 +275,63 @@ out_free:
 	return err;
 }
 
+static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
+				 const char *name, const char *debugdir,
+				 bool is_kallsyms)
+{
+	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+	build_id__sprintf(build_id, build_id_size, sbuild_id);
+
+	return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
+}
+
+int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
+{
+	const size_t size = PATH_MAX;
+	char *filename = malloc(size),
+	     *linkname = malloc(size);
+	int err = -1;
+
+	if (filename == NULL || linkname == NULL)
+		goto out_free;
+
+	snprintf(linkname, size, "%s/.build-id/%.2s/%s",
+		 debugdir, sbuild_id, sbuild_id + 2);
+
+	if (access(linkname, F_OK))
+		goto out_free;
+
+	if (readlink(linkname, filename, size) < 0)
+		goto out_free;
+
+	if (unlink(linkname))
+		goto out_free;
+
+	/*
+	 * Since the link is relative, we must make it absolute:
+	 */
+	snprintf(linkname, size, "%s/.build-id/%.2s/%s",
+		 debugdir, sbuild_id, filename);
+
+	if (unlink(linkname))
+		goto out_free;
+
+	err = 0;
+out_free:
+	free(filename);
+	free(linkname);
+	return err;
+}
+
+static int dso__cache_build_id(struct dso *self, const char *debugdir)
+{
+	bool is_kallsyms = self->kernel && self->long_name[0] != '/';
+
+	return build_id_cache__add_b(self->build_id, sizeof(self->build_id),
+				     self->long_name, debugdir, is_kallsyms);
+}
+
 static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
 {
 	struct dso *pos;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index ccc8540feccd..82a6af72d4cc 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -5,6 +5,7 @@
 #include <sys/types.h>
 #include <stdbool.h>
 #include "types.h"
+#include "event.h"
 
 #include <linux/bitmap.h>
 
@@ -84,4 +85,8 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 						 struct perf_header *ph,
 						 int feat, int fd));
 
+int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
+			  const char *name, bool is_kallsyms);
+int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
+
 #endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b6ab23dd5f9f..6f30fe18c265 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -345,10 +345,10 @@ void dso__sort_by_name(struct dso *self, enum map_type type)
 				     &self->symbols[type]);
 }
 
-int build_id__sprintf(u8 *self, int len, char *bf)
+int build_id__sprintf(const u8 *self, int len, char *bf)
 {
 	char *bid = bf;
-	u8 *raw = self;
+	const u8 *raw = self;
 	int i;
 
 	for (i = 0; i < len; ++i) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 525085fd0735..ffe0b0f2e5d3 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -144,7 +144,7 @@ struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
 int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
 bool dsos__read_build_ids(void);
-int build_id__sprintf(u8 *self, int len, char *bf);
+int build_id__sprintf(const u8 *self, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start));

From 71318da9d2a46b9986dcecb44b7ae978753ca4dd Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vzapolskiy@gmail.com>
Date: Thu, 21 Jan 2010 12:12:52 +0300
Subject: [PATCH 091/640] ARM: MX3: Fixed typo in declared enum type name.

To distinguish between mx31lite and mx31lilly boards better to use
different enum types.

Signed-off-by: Vladimir Zapolskiy <vzapolskiy@gmail.com>
Acked-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/plat-mxc/include/mach/board-mx31lite.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/plat-mxc/include/mach/board-mx31lite.h b/arch/arm/plat-mxc/include/mach/board-mx31lite.h
index 0184b638c268..2b2da0367578 100644
--- a/arch/arm/plat-mxc/include/mach/board-mx31lite.h
+++ b/arch/arm/plat-mxc/include/mach/board-mx31lite.h
@@ -25,7 +25,7 @@
 
 #ifndef __ASSEMBLY__
 
-enum mx31lilly_boards {
+enum mx31lite_boards {
 	MX31LITE_NOBOARD	= 0,
 	MX31LITE_DB		= 1,
 };

From 0406ad336c066190770cbf350b552d608e43ed09 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Wed, 20 Jan 2010 00:06:30 -0700
Subject: [PATCH 092/640] ACPI: processor: add kernel command line support for
 early _PDC eval

Allow platforms not listed in DMI table
to opt-in and evaluate _PDC early.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/kernel-parameters.txt | 4 ++++
 drivers/acpi/processor_pdc.c        | 7 +++++++
 2 files changed, 11 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d45602886..826b6e148316 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -199,6 +199,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			acpi_display_output=video
 			See above.
 
+	acpi_early_pdc_eval	[HW,ACPI] Evaluate processor _PDC methods
+				early. Needed on some platforms to properly
+				initialize the EC.
+
 	acpi_irq_balance [HW,ACPI]
 			ACPI will balance active IRQs
 			default in APIC mode
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 7247819dbd80..3bbafe9576ae 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -151,6 +151,13 @@ static int set_early_pdc_optin(const struct dmi_system_id *id)
 	return 0;
 }
 
+static int param_early_pdc_optin(char *s)
+{
+	early_pdc_optin = 1;
+	return 1;
+}
+__setup("acpi_early_pdc_eval", param_early_pdc_optin);
+
 static struct dmi_system_id __cpuinitdata early_pdc_optin_table[] = {
 	{
 	set_early_pdc_optin, "HP Envy", {

From a4932299d03a1c20e58e4cc40a66fb0a048fb3a7 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Wed, 20 Jan 2010 00:06:35 -0700
Subject: [PATCH 093/640] ACPI: processor: only evaluate _PDC once per
 processor

If we evaluate _PDC in the early path, we do not want to evaluate
it again when the processor driver is loaded.

Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_pdc.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 3bbafe9576ae..e306ba9aa34e 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -125,6 +125,8 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
 	return status;
 }
 
+static int early_pdc_done;
+
 void acpi_processor_set_pdc(acpi_handle handle)
 {
 	struct acpi_object_list *obj_list;
@@ -132,6 +134,9 @@ void acpi_processor_set_pdc(acpi_handle handle)
 	if (arch_has_acpi_pdc() == false)
 		return;
 
+	if (early_pdc_done)
+		return;
+
 	obj_list = acpi_processor_alloc_pdc();
 	if (!obj_list)
 		return;
@@ -199,4 +204,6 @@ void __init acpi_early_processor_set_pdc(void)
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 			    ACPI_UINT32_MAX,
 			    early_init_pdc, NULL, NULL, NULL);
+
+	early_pdc_done = 1;
 }

From eceb784cec4dc0fcc2993d9ee4a7c0d111ada80a Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Mon, 25 Jan 2010 10:35:16 +0800
Subject: [PATCH 094/640] drm/i915: disable hotplug detect before Ironlake CRT
 detect

This tries to fix CRT detect loop hang seen on some Ironlake form
factor, to clear up hotplug detect state before taking CRT detect
to make sure next hotplug detect cycle is consistent.

Cc: Stable Team <stable@kernel.org>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_crt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index ddefc871edfe..79dd4026586f 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -157,6 +157,9 @@ static bool intel_ironlake_crt_detect_hotplug(struct drm_connector *connector)
 	adpa = I915_READ(PCH_ADPA);
 
 	adpa &= ~ADPA_CRT_HOTPLUG_MASK;
+	/* disable HPD first */
+	I915_WRITE(PCH_ADPA, adpa);
+	(void)I915_READ(PCH_ADPA);
 
 	adpa |= (ADPA_CRT_HOTPLUG_PERIOD_128 |
 			ADPA_CRT_HOTPLUG_WARMUP_10MS |

From 0ce907f89118aa8748f950700b6919b1d8d8a038 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 23 Jan 2010 20:26:35 +0000
Subject: [PATCH 095/640] drm/i915: Prevent use of uninitialized pointers along
 error path.

X.org hang with [drm:i915_gem_do_execbuffer] *ERROR* in dmesg
  http://bugzilla.kernel.org/show_bug.cgi?id=15114

Matej found he was hitting an error path within i915_gem_do_execbuffer()
that led to the attempt to dereference an uninitialised pointer during
cleanup. This path used to be safe as we used to calloc the object
lists, but this was changed in c8e0f93. Daniel Vetter had also spotted
this error and proposed a similar patch.

[ 6379.732892] [drm:i915_gem_do_execbuffer] *ERROR* Object ffff880098cd6540 appears more than once in object list
[ 6379.740976] [drm:i915_gem_do_execbuffer] *ERROR* Object ffff880098cd6540 appears more than once in object list
[ 6379.740995] BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0
[ 6379.740998] IP: [<ffffffff8122ddb5>] i915_gem_do_execbuffer+0xba5/0x1260
[ 6379.741006] PGD babab067 PUD bb435067 PMD 0
[ 6379.741010] Oops: 0002 [#1] PREEMPT SMP
[ 6379.741014] last sysfs file: /sys/devices/pci0000:00/0000:00:1c.2/0000:06:00.0/ieee80211/phy0/rfkill0/state
[ 6379.741017] CPU 1
[ 6379.741021] Pid: 2186, comm: X Not tainted 2.6.33-rc4-00399-g24bc734 #142 M11D/ESPRIMO Mobile M9400
[ 6379.741023] RIP: 0010:[<ffffffff8122ddb5>] [<ffffffff8122ddb5>] i915_gem_do_execbuffer+0xba5/0x1260
[ 6379.741027] RSP: 0018:ffff8800b9047b78  EFLAGS: 00213206
[ 6379.741029] RAX: 0000000000000000 RBX: 000000000000004f RCX: ffff880098cac800
[ 6379.741032] RDX: ffff880098caca78 RSI: ffff8800b9047c98 RDI: ffff880098cd6540
[ 6379.741034] RBP: ffff8800b9047c78 R08: ffffffff814b96b5 R09: 0000000000000006
[ 6379.741036] R10: 0000000000000000 R11: 0000000000000003 R12: 000000000000004e
[ 6379.741038] R13: 00000000fffffff7 R14: 0000000000000000 R15: 0000000000000001
[ 6379.741041] FS:  0000000000000000(0000) GS:ffff880001900000(0063) knlGS:00000000f72636c0
[ 6379.741043] CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
[ 6379.741041] FS:  0000000000000000(0000) GS:ffff880001900000(0063) knlGS:00000000f72636c0
[ 6379.741043] CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
[ 6379.741045] CR2: 00000000000000a0 CR3: 00000000b9000000 CR4: 00000000000006e0
[ 6379.741048] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 6379.741050] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 6379.741052] Process X (pid: 2186, threadinfo ffff8800b9046000, task ffff8800bb5d8000)
[ 6379.741054] Stack:
[ 6379.741055]  ffffc90023f57000 ffffc90023f56fff ffffc90023f56fff ffffc90023f55000
[ 6379.741059] <0> ffff8800b9047c98 ffff8800bb43c840 ffff8800bf1de800 ffff8800bf1de820
[ 6379.741063] <0> ffff8800b9047bd8 ffff880098cac800 0000000000000000 0000000000000002
[ 6379.741068] Call Trace:
[ 6379.741072]  [<ffffffff8122e6cb>] ?  i915_gem_execbuffer+0x6b/0x370
[ 6379.741077]  [<ffffffff810a5f52>] ? __vmalloc_node+0xa2/0xb0
[ 6379.741080]  [<ffffffff8122e6cb>] ?  i915_gem_execbuffer+0x6b/0x370
[ 6379.741083]  [<ffffffff8122e816>] i915_gem_execbuffer+0x1b6/0x370
[ 6379.741086]  [<ffffffff8120cd55>] drm_ioctl+0x1d5/0x460
[ 6379.741089]  [<ffffffff8122e660>] ?  i915_gem_execbuffer+0x0/0x370
[ 6379.741093]  [<ffffffff81248c35>] i915_compat_ioctl+0x45/0x50
[ 6379.741097]  [<ffffffff810f1659>] compat_sys_ioctl+0xa9/0x1570
[ 6379.741102]  [<ffffffff810b1d5c>] ? vfs_read+0x13c/0x1a0
[ 6379.741106]  [<ffffffff81028424>] sysenter_dispatch+0x7/0x2b
[ 6379.741108] Code: 08 85 c0 74 52 31 db 0f 1f 80 00 00 00 00 48 63 c3 48 8b
8d 68 ff ff ff 48 8d 14 c1 48 8b 02 48 85 c0 74 25 48 8b 80 80 00 00 00 <c7> 80
a0 00 00 00 00 00 00 00 48 8b 3a 48 85 ff 74 0c 48 c7 c6
[ 6379.741142] RIP  [<ffffffff8122ddb5>] i915_gem_do_execbuffer+0xba5/0x1260
[ 6379.741145]  RSP <ffff8800b9047b78>
[ 6379.741147] CR2: 00000000000000a0
[ 6379.741159] ---[ end trace 0598809afa4c31db ]---

Reported-by: Matej Laitl <strohel@gmail.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_gem.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0c67924ca80c..1ef7ec4f38fe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3742,6 +3742,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		if (object_list[i] == NULL) {
 			DRM_ERROR("Invalid object handle %d at index %d\n",
 				   exec_list[i].handle, i);
+			/* prevent error path from reading uninitialized data */
+			args->buffer_count = i + 1;
 			ret = -EBADF;
 			goto err;
 		}
@@ -3750,6 +3752,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		if (obj_priv->in_execbuffer) {
 			DRM_ERROR("Object %p appears more than once in object list\n",
 				   object_list[i]);
+			/* prevent error path from reading uninitialized data */
+			args->buffer_count = i + 1;
 			ret = -EBADF;
 			goto err;
 		}

From c062df61966405b0be6743bfb0cf300dca2fa7c2 Mon Sep 17 00:00:00 2001
From: Li Peng <peng.li@linux.intel.com>
Date: Sat, 23 Jan 2010 00:12:58 +0800
Subject: [PATCH 096/640] drm/i915: enable vblank interrupt on ironlake

so far vblank interrupt on ironlake is disabled, this would cause
bad gfx performance if userspace calls drm_wait_vblank. This patch
enables vblank interrupt on ironlake and follows vblank get/put
model.

Signed-off-by: Li Peng <peng.li@intel.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_irq.c      | 30 ++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_display.c |  1 +
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 89a071a3e6fb..e7472d82132a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -309,6 +309,12 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 	if (de_iir & DE_GSE)
 		ironlake_opregion_gse_intr(dev);
 
+	if (de_iir & DE_PIPEA_VBLANK)
+		drm_handle_vblank(dev, 0);
+
+	if (de_iir & DE_PIPEB_VBLANK)
+		drm_handle_vblank(dev, 1);
+
 	/* check event from PCH */
 	if ((de_iir & DE_PCH_EVENT) &&
 	    (pch_iir & SDE_HOTPLUG_MASK)) {
@@ -844,11 +850,11 @@ int i915_enable_vblank(struct drm_device *dev, int pipe)
 	if (!(pipeconf & PIPEACONF_ENABLE))
 		return -EINVAL;
 
-	if (IS_IRONLAKE(dev))
-		return 0;
-
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
-	if (IS_I965G(dev))
+	if (IS_IRONLAKE(dev))
+		ironlake_enable_display_irq(dev_priv, (pipe == 0) ? 
+					    DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+	else if (IS_I965G(dev))
 		i915_enable_pipestat(dev_priv, pipe,
 				     PIPE_START_VBLANK_INTERRUPT_ENABLE);
 	else
@@ -866,13 +872,14 @@ void i915_disable_vblank(struct drm_device *dev, int pipe)
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
 
-	if (IS_IRONLAKE(dev))
-		return;
-
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
-	i915_disable_pipestat(dev_priv, pipe,
-			      PIPE_VBLANK_INTERRUPT_ENABLE |
-			      PIPE_START_VBLANK_INTERRUPT_ENABLE);
+	if (IS_IRONLAKE(dev))
+		ironlake_disable_display_irq(dev_priv, (pipe == 0) ? 
+					     DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+	else
+		i915_disable_pipestat(dev_priv, pipe,
+				      PIPE_VBLANK_INTERRUPT_ENABLE |
+				      PIPE_START_VBLANK_INTERRUPT_ENABLE);
 	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
 }
 
@@ -1015,7 +1022,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	/* enable kind of interrupts always enabled */
-	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT;
+	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
+			   DE_PIPEA_VBLANK | DE_PIPEB_VBLANK;
 	u32 render_mask = GT_USER_INTERRUPT;
 	u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
 			   SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 45da78ef4a92..2cc489b6629e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1638,6 +1638,7 @@ static void ironlake_crtc_dpms(struct drm_crtc *crtc, int mode)
 	case DRM_MODE_DPMS_OFF:
 		DRM_DEBUG_KMS("crtc %d dpms off\n", pipe);
 
+		drm_vblank_off(dev, pipe);
 		/* Disable display plane */
 		temp = I915_READ(dspcntr_reg);
 		if ((temp & DISPLAY_PLANE_ENABLE) != 0) {

From 8a6c77d645d9111f7bc01f908464301f5c3e0a05 Mon Sep 17 00:00:00 2001
From: Li Peng <peng.li@linux.intel.com>
Date: Thu, 21 Jan 2010 18:45:46 +0800
Subject: [PATCH 097/640] drm/i915: Fix the device info of Pineview

Pineview doesn't has CXSR and need GTT-based hardware status page.
It fixes a X boot hung issue on Pinview since commit cfdf1f

Signed-off-by: Li Peng <peng.li@intel.com>
Acked-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 46d88965852a..ecac882e1d54 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -120,7 +120,7 @@ const static struct intel_device_info intel_gm45_info = {
 
 const static struct intel_device_info intel_pineview_info = {
 	.is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .is_i9xx = 1,
-	.has_pipe_cxsr = 1,
+	.need_gfx_hws = 1,
 	.has_hotplug = 1,
 };
 

From 1097df3ffe855eb1476496fa5394816fb197af05 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 20 Jan 2010 11:31:11 +0800
Subject: [PATCH 098/640] ocfs2: Sync max_inline_data_with_xattr from tools.

In ocfs2-tools, we have added ocfs2_max_inline_data_with_xattr,
so add it in the kernel's ocfs2_fs.h.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/ocfs2_fs.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 1a1a679e51b5..7638a38c32bc 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1417,9 +1417,16 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
 	return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
 }
 
-static inline int ocfs2_max_inline_data(int blocksize)
+static inline int ocfs2_max_inline_data_with_xattr(int blocksize,
+						   struct ocfs2_dinode *di)
 {
-	return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data);
+	if (di && (di->i_dyn_features & OCFS2_INLINE_XATTR_FL))
+		return blocksize -
+			offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
+			di->i_xattr_inline_size;
+	else
+		return blocksize -
+			offsetof(struct ocfs2_dinode, id2.i_data.id_data);
 }
 
 static inline int ocfs2_extent_recs_per_inode(int blocksize)

From e5f2cb2b1ad05473fffe6970618997b906f23873 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Fri, 22 Jan 2010 21:58:04 +0800
Subject: [PATCH 099/640] ocfs2: fix a misleading variable name

a local variable "dlm_version" is used as a fs locking version.
rename it fs_version.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/stack_o2cb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index e49c41050264..3038c92af493 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -277,7 +277,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
 	u32 dlm_key;
 	struct dlm_ctxt *dlm;
 	struct o2dlm_private *priv;
-	struct dlm_protocol_version dlm_version;
+	struct dlm_protocol_version fs_version;
 
 	BUG_ON(conn == NULL);
 	BUG_ON(o2cb_stack.sp_proto == NULL);
@@ -304,18 +304,18 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
 	/* used by the dlm code to make message headers unique, each
 	 * node in this domain must agree on this. */
 	dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen);
-	dlm_version.pv_major = conn->cc_version.pv_major;
-	dlm_version.pv_minor = conn->cc_version.pv_minor;
+	fs_version.pv_major = conn->cc_version.pv_major;
+	fs_version.pv_minor = conn->cc_version.pv_minor;
 
-	dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version);
+	dlm = dlm_register_domain(conn->cc_name, dlm_key, &fs_version);
 	if (IS_ERR(dlm)) {
 		rc = PTR_ERR(dlm);
 		mlog_errno(rc);
 		goto out_free;
 	}
 
-	conn->cc_version.pv_major = dlm_version.pv_major;
-	conn->cc_version.pv_minor = dlm_version.pv_minor;
+	conn->cc_version.pv_major = fs_version.pv_major;
+	conn->cc_version.pv_minor = fs_version.pv_minor;
 	conn->cc_lockspace = dlm;
 
 	dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);

From 2bd632165c1f783888bd4cbed95f2f304829159b Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 25 Jan 2010 16:57:38 -0800
Subject: [PATCH 100/640] ocfs2/trivial: Remove trailing whitespaces

Patch removes trailing whitespaces.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/aops.c                 |  4 ++--
 fs/ocfs2/buffer_head_io.c       |  2 +-
 fs/ocfs2/cluster/heartbeat.c    |  6 +++---
 fs/ocfs2/cluster/tcp.c          |  4 ++--
 fs/ocfs2/cluster/tcp_internal.h |  4 ++--
 fs/ocfs2/dlm/dlmapi.h           |  2 +-
 fs/ocfs2/dlm/dlmast.c           |  2 +-
 fs/ocfs2/dlm/dlmconvert.c       |  2 +-
 fs/ocfs2/dlm/dlmdomain.c        |  2 +-
 fs/ocfs2/dlm/dlmlock.c          |  2 +-
 fs/ocfs2/dlm/dlmmaster.c        | 38 ++++++++++++++++-----------------
 fs/ocfs2/dlm/dlmrecovery.c      | 38 ++++++++++++++++-----------------
 fs/ocfs2/dlm/dlmunlock.c        |  8 +++----
 fs/ocfs2/dlmglue.c              |  2 +-
 fs/ocfs2/export.c               |  2 +-
 fs/ocfs2/file.c                 | 14 ++++++------
 fs/ocfs2/inode.c                |  4 ++--
 fs/ocfs2/journal.c              |  2 +-
 fs/ocfs2/super.c                |  2 +-
 fs/ocfs2/uptodate.c             |  4 ++--
 20 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3dae4a13f6e4..7e9df11260f4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -599,7 +599,7 @@ bail:
 	return ret;
 }
 
-/* 
+/*
  * ocfs2_dio_end_io is called by the dio core when a dio is finished.  We're
  * particularly interested in the aio/dio case.  Like the core uses
  * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
@@ -670,7 +670,7 @@ static ssize_t ocfs2_direct_IO(int rw,
 
 	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
 					    inode->i_sb->s_bdev, iov, offset,
-					    nr_segs, 
+					    nr_segs,
 					    ocfs2_direct_IO_get_blocks,
 					    ocfs2_dio_end_io);
 
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index d43d34a1dd31..21c808f752d8 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -368,7 +368,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 	}
 	ocfs2_metadata_cache_io_unlock(ci);
 
-	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
 	     (unsigned long long)block, nr,
 	     ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
 	     flags);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index eda5b8bcddd5..5c9890006708 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -78,7 +78,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
 
 unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
 
-/* Only sets a new threshold if there are no active regions. 
+/* Only sets a new threshold if there are no active regions.
  *
  * No locking or otherwise interesting code is required for reading
  * o2hb_dead_threshold as it can't change once regions are active and
@@ -170,7 +170,7 @@ static void o2hb_write_timeout(struct work_struct *work)
 
 	mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
 	     "milliseconds\n", reg->hr_dev_name,
-	     jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); 
+	     jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
 	o2quo_disk_timeout();
 }
 
@@ -624,7 +624,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
 	     "seq %llu last %llu changed %u equal %u\n",
 	     slot->ds_node_num, (long long)slot->ds_last_generation,
 	     le32_to_cpu(hb_block->hb_cksum),
-	     (unsigned long long)le64_to_cpu(hb_block->hb_seq), 
+	     (unsigned long long)le64_to_cpu(hb_block->hb_seq),
 	     (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
 	     slot->ds_equal_samples);
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 334f231a422c..938ba181a3d9 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -930,7 +930,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
 			cond_resched();
 			continue;
 		}
-		mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 
+		mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
 		     " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
 		o2net_ensure_shutdown(nn, sc, 0);
 		break;
@@ -1483,7 +1483,7 @@ static void o2net_idle_timer(unsigned long data)
 	mlog(ML_NOTICE, "here are some times that might help debug the "
 	     "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
 	     "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
-	     sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, 
+	     sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
 	     now.tv_sec, (long) now.tv_usec,
 	     sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
 	     sc->sc_tv_advance_start.tv_sec,
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 8d58cfe410b1..96fa7ebc530c 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -32,10 +32,10 @@
  * on their number */
 #define O2NET_QUORUM_DELAY_MS	((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
 
-/* 
+/*
  * This version number represents quite a lot, unfortunately.  It not
  * only represents the raw network message protocol on the wire but also
- * locking semantics of the file system using the protocol.  It should 
+ * locking semantics of the file system using the protocol.  It should
  * be somewhere else, I'm sure, but right now it isn't.
  *
  * With version 11, we separate out the filesystem locking portion.  The
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index b5786a787fab..3cfa114aa391 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err);
 		mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st)));	\
 } while (0)
 
-#define DLM_LKSB_UNUSED1           0x01  
+#define DLM_LKSB_UNUSED1           0x01
 #define DLM_LKSB_PUT_LVB           0x02
 #define DLM_LKSB_GET_LVB           0x04
 #define DLM_LKSB_UNUSED2           0x08
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 01cf8cc3d286..dccc439fa087 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -123,7 +123,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 		dlm_lock_put(lock);
 		/* free up the reserved bast that we are cancelling.
 		 * guaranteed that this will not be the last reserved
-		 * ast because *both* an ast and a bast were reserved 
+		 * ast because *both* an ast and a bast were reserved
 		 * to get to this point.  the res->spinlock will not be
 		 * taken here */
 		dlm_lockres_release_ast(dlm, res);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index ca96bce50e18..f283bce776b4 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -396,7 +396,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
 			/* instead of logging the same network error over
 			 * and over, sleep here and wait for the heartbeat
 			 * to notice the node is dead.  times out after 5s. */
-			dlm_wait_for_node_death(dlm, res->owner, 
+			dlm_wait_for_node_death(dlm, res->owner,
 						DLM_NODE_DEATH_WAIT_MAX);
 			ret = DLM_RECOVERING;
 			mlog(0, "node %u died so returning DLM_RECOVERING "
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 0334000676d3..988c9055fd4e 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 	}
 
 	/* Once the dlm ctxt is marked as leaving then we don't want
-	 * to be put in someone's domain map. 
+	 * to be put in someone's domain map.
 	 * Also, explicitly disallow joining at certain troublesome
 	 * times (ie. during recovery). */
 	if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 437698e9465f..733337772671 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
 		}
 		dlm_revert_pending_lock(res, lock);
 		dlm_lock_put(lock);
-	} else if (dlm_is_recovery_lock(res->lockname.name, 
+	} else if (dlm_is_recovery_lock(res->lockname.name,
 					res->lockname.len)) {
 		/* special case for the $RECOVERY lock.
 		 * there will never be an AST delivered to put
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 03ccf9a7b1f4..a659606dcb95 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
 	struct dlm_master_list_entry *mle;
 
 	assert_spin_locked(&dlm->spinlock);
-	
+
 	list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
 		if (node_up)
 			dlm_mle_node_up(dlm, mle, NULL, idx);
@@ -833,7 +833,7 @@ lookup:
 		__dlm_insert_mle(dlm, mle);
 
 		/* still holding the dlm spinlock, check the recovery map
-		 * to see if there are any nodes that still need to be 
+		 * to see if there are any nodes that still need to be
 		 * considered.  these will not appear in the mle nodemap
 		 * but they might own this lockres.  wait on them. */
 		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
@@ -883,7 +883,7 @@ redo_request:
 				msleep(500);
 			}
 			continue;
-		} 
+		}
 
 		dlm_kick_recovery_thread(dlm);
 		msleep(1000);
@@ -939,8 +939,8 @@ wait:
 		     res->lockname.name, blocked);
 		if (++tries > 20) {
 			mlog(ML_ERROR, "%s:%.*s: spinning on "
-			     "dlm_wait_for_lock_mastery, blocked=%d\n", 
-			     dlm->name, res->lockname.len, 
+			     "dlm_wait_for_lock_mastery, blocked=%d\n",
+			     dlm->name, res->lockname.len,
 			     res->lockname.name, blocked);
 			dlm_print_one_lock_resource(res);
 			dlm_print_one_mle(mle);
@@ -1029,7 +1029,7 @@ recheck:
 		ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
 		b = (mle->type == DLM_MLE_BLOCK);
 		if ((*blocked && !b) || (!*blocked && b)) {
-			mlog(0, "%s:%.*s: status change: old=%d new=%d\n", 
+			mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
 			     dlm->name, res->lockname.len, res->lockname.name,
 			     *blocked, b);
 			*blocked = b;
@@ -1602,7 +1602,7 @@ send_response:
 		}
 		mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
 			     dlm->node_num, res->lockname.len, res->lockname.name);
-		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, 
+		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
 						 DLM_ASSERT_MASTER_MLE_CLEANUP);
 		if (ret < 0) {
 			mlog(ML_ERROR, "failed to dispatch assert master work\n");
@@ -1701,7 +1701,7 @@ again:
 
 		if (r & DLM_ASSERT_RESPONSE_REASSERT) {
 			mlog(0, "%.*s: node %u create mles on other "
-			     "nodes and requests a re-assert\n", 
+			     "nodes and requests a re-assert\n",
 			     namelen, lockname, to);
 			reassert = 1;
 		}
@@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
 				spin_unlock(&dlm->master_lock);
 				spin_unlock(&dlm->spinlock);
 				goto done;
-			}	
+			}
 		}
 	}
 	spin_unlock(&dlm->master_lock);
@@ -1883,7 +1883,7 @@ ok:
 		int extra_ref = 0;
 		int nn = -1;
 		int rr, err = 0;
-		
+
 		spin_lock(&mle->spinlock);
 		if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
 			extra_ref = 1;
@@ -1891,7 +1891,7 @@ ok:
 			/* MASTER mle: if any bits set in the response map
 			 * then the calling node needs to re-assert to clear
 			 * up nodes that this node contacted */
-			while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, 
+			while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
 						    nn+1)) < O2NM_MAX_NODES) {
 				if (nn != dlm->node_num && nn != assert->node_idx)
 					master_request = 1;
@@ -2002,7 +2002,7 @@ kill:
 	__dlm_print_one_lock_resource(res);
 	spin_unlock(&res->spinlock);
 	spin_unlock(&dlm->spinlock);
-	*ret_data = (void *)res; 
+	*ret_data = (void *)res;
 	dlm_put(dlm);
 	return -EINVAL;
 }
@@ -2040,10 +2040,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 	item->u.am.request_from = request_from;
 	item->u.am.flags = flags;
 
-	if (ignore_higher) 
-		mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, 
+	if (ignore_higher)
+		mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
 		     res->lockname.name);
-		
+
 	spin_lock(&dlm->work_lock);
 	list_add_tail(&item->list, &dlm->work_list);
 	spin_unlock(&dlm->work_lock);
@@ -2133,7 +2133,7 @@ put:
  * think that $RECOVERY is currently mastered by a dead node.  If so,
  * we wait a short time to allow that node to get notified by its own
  * heartbeat stack, then check again.  All $RECOVERY lock resources
- * mastered by dead nodes are purged when the hearbeat callback is 
+ * mastered by dead nodes are purged when the hearbeat callback is
  * fired, so we can know for sure that it is safe to continue once
  * the node returns a live node or no node.  */
 static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
@@ -2174,7 +2174,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
 				ret = -EAGAIN;
 			}
 			spin_unlock(&dlm->spinlock);
-			mlog(0, "%s: reco lock master is %u\n", dlm->name, 
+			mlog(0, "%s: reco lock master is %u\n", dlm->name,
 			     master);
 			break;
 		}
@@ -2602,7 +2602,7 @@ fail:
 
 			mlog(0, "%s:%.*s: timed out during migration\n",
 			     dlm->name, res->lockname.len, res->lockname.name);
-			/* avoid hang during shutdown when migrating lockres 
+			/* avoid hang during shutdown when migrating lockres
 			 * to a node which also goes down */
 			if (dlm_is_node_dead(dlm, target)) {
 				mlog(0, "%s:%.*s: expected migration "
@@ -2738,7 +2738,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
 	can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING);
 	spin_unlock(&res->spinlock);
 
-	/* target has died, so make the caller break out of the 
+	/* target has died, so make the caller break out of the
 	 * wait_event, but caller must recheck the domain_map */
 	spin_lock(&dlm->spinlock);
 	if (!test_bit(mig_target, dlm->domain_map))
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 2f9e4e19a4f2..57736d3ea7b5 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
 				if (lock->ml.node == dead_node) {
 					mlog(0, "AHA! there was "
 					     "a $RECOVERY lock for dead "
-					     "node %u (%s)!\n", 
+					     "node %u (%s)!\n",
 					     dead_node, dlm->name);
 					list_del_init(&lock->list);
 					dlm_lock_put(lock);
@@ -1839,7 +1839,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 				 * the lvb. */
 				memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
 			} else {
-				/* otherwise, the node is sending its 
+				/* otherwise, the node is sending its
 				 * most recent valid lvb info */
 				BUG_ON(ml->type != LKM_EXMODE &&
 				       ml->type != LKM_PRMODE);
@@ -2114,7 +2114,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
 	assert_spin_locked(&res->spinlock);
 
 	if (res->owner == dlm->node_num)
-		/* if this node owned the lockres, and if the dead node 
+		/* if this node owned the lockres, and if the dead node
 		 * had an EX when he died, blank out the lvb */
 		search_node = dead_node;
 	else {
@@ -2152,7 +2152,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 
 	/* this node is the lockres master:
 	 * 1) remove any stale locks for the dead node
-	 * 2) if the dead node had an EX when he died, blank out the lvb 
+	 * 2) if the dead node had an EX when he died, blank out the lvb
 	 */
 	assert_spin_locked(&dlm->spinlock);
 	assert_spin_locked(&res->spinlock);
@@ -2260,7 +2260,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
 				}
 				spin_unlock(&res->spinlock);
 				continue;
-			}			
+			}
 			spin_lock(&res->spinlock);
 			/* zero the lvb if necessary */
 			dlm_revalidate_lvb(dlm, res, dead_node);
@@ -2411,7 +2411,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
  * this function on each node racing to become the recovery
  * master will not stop attempting this until either:
  * a) this node gets the EX (and becomes the recovery master),
- * or b) dlm->reco.new_master gets set to some nodenum 
+ * or b) dlm->reco.new_master gets set to some nodenum
  * != O2NM_INVALID_NODE_NUM (another node will do the reco).
  * so each time a recovery master is needed, the entire cluster
  * will sync at this point.  if the new master dies, that will
@@ -2424,7 +2424,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
 
 	mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
 	     dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
-again:	
+again:
 	memset(&lksb, 0, sizeof(lksb));
 
 	ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
@@ -2437,8 +2437,8 @@ again:
 	if (ret == DLM_NORMAL) {
 		mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
 		     dlm->name, dlm->node_num);
-		
-		/* got the EX lock.  check to see if another node 
+
+		/* got the EX lock.  check to see if another node
 		 * just became the reco master */
 		if (dlm_reco_master_ready(dlm)) {
 			mlog(0, "%s: got reco EX lock, but %u will "
@@ -2451,12 +2451,12 @@ again:
 			/* see if recovery was already finished elsewhere */
 			spin_lock(&dlm->spinlock);
 			if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
-				status = -EINVAL;	
+				status = -EINVAL;
 				mlog(0, "%s: got reco EX lock, but "
 				     "node got recovered already\n", dlm->name);
 				if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
 					mlog(ML_ERROR, "%s: new master is %u "
-					     "but no dead node!\n", 
+					     "but no dead node!\n",
 					     dlm->name, dlm->reco.new_master);
 					BUG();
 				}
@@ -2468,7 +2468,7 @@ again:
 		 * set the master and send the messages to begin recovery */
 		if (!status) {
 			mlog(0, "%s: dead=%u, this=%u, sending "
-			     "begin_reco now\n", dlm->name, 
+			     "begin_reco now\n", dlm->name,
 			     dlm->reco.dead_node, dlm->node_num);
 			status = dlm_send_begin_reco_message(dlm,
 				      dlm->reco.dead_node);
@@ -2501,7 +2501,7 @@ again:
 		mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
 		     dlm->name, dlm->node_num);
 		/* another node is master. wait on
-		 * reco.new_master != O2NM_INVALID_NODE_NUM 
+		 * reco.new_master != O2NM_INVALID_NODE_NUM
 		 * for at most one second */
 		wait_event_timeout(dlm->dlm_reco_thread_wq,
 					 dlm_reco_master_ready(dlm),
@@ -2599,7 +2599,7 @@ retry:
 		}
 		if (ret < 0) {
 			struct dlm_lock_resource *res;
-			/* this is now a serious problem, possibly ENOMEM 
+			/* this is now a serious problem, possibly ENOMEM
 			 * in the network stack.  must retry */
 			mlog_errno(ret);
 			mlog(ML_ERROR, "begin reco of dlm %s to node %u "
@@ -2612,7 +2612,7 @@ retry:
 			} else {
 				mlog(ML_ERROR, "recovery lock not found\n");
 			}
-			/* sleep for a bit in hopes that we can avoid 
+			/* sleep for a bit in hopes that we can avoid
 			 * another ENOMEM */
 			msleep(100);
 			goto retry;
@@ -2664,7 +2664,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
 	}
 	if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
 		mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
-		     "node %u changing it to %u\n", dlm->name, 
+		     "node %u changing it to %u\n", dlm->name,
 		     dlm->reco.dead_node, br->node_idx, br->dead_node);
 	}
 	dlm_set_reco_master(dlm, br->node_idx);
@@ -2730,8 +2730,8 @@ stage2:
 		if (ret < 0) {
 			mlog_errno(ret);
 			if (dlm_is_host_down(ret)) {
-				/* this has no effect on this recovery 
-				 * session, so set the status to zero to 
+				/* this has no effect on this recovery
+				 * session, so set the status to zero to
 				 * finish out the last recovery */
 				mlog(ML_ERROR, "node %u went down after this "
 				     "node finished recovery.\n", nodenum);
@@ -2768,7 +2768,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
 	mlog(0, "%s: node %u finalizing recovery stage%d of "
 	     "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
 	     fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
- 
+
 	spin_lock(&dlm->spinlock);
 
 	if (dlm->reco.new_master != fr->node_idx) {
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 00f53b2aea76..49e29ecd0201 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -190,8 +190,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
 			actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
 				     DLM_UNLOCK_REGRANT_LOCK|
 				     DLM_UNLOCK_CLEAR_CONVERT_TYPE);
-		} else if (status == DLM_RECOVERING || 
-			   status == DLM_MIGRATING || 
+		} else if (status == DLM_RECOVERING ||
+			   status == DLM_MIGRATING ||
 			   status == DLM_FORWARD) {
 			/* must clear the actions because this unlock
 			 * is about to be retried.  cannot free or do
@@ -661,14 +661,14 @@ retry:
 	if (call_ast) {
 		mlog(0, "calling unlockast(%p, %d)\n", data, status);
 		if (is_master) {
-			/* it is possible that there is one last bast 
+			/* it is possible that there is one last bast
 			 * pending.  make sure it is flushed, then
 			 * call the unlockast.
 			 * not an issue if this is a mastered remotely,
 			 * since this lock has been removed from the
 			 * lockres queues and cannot be found. */
 			dlm_kick_thread(dlm, NULL);
-			wait_event(dlm->ast_wq, 
+			wait_event(dlm->ast_wq,
 				   dlm_lock_basts_flushed(dlm, lock));
 		}
 		(*unlockast)(data, status);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c5e4a49e3a12..172f4c6ce1be 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3155,7 +3155,7 @@ out:
 /* Mark the lockres as being dropped. It will no longer be
  * queued if blocking, but we still may have to wait on it
  * being dequeued from the downconvert thread before we can consider
- * it safe to drop. 
+ * it safe to drop.
  *
  * You can *not* attempt to call cluster_lock on this lockres anymore. */
 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 15713cbb865c..19ad145d2af3 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -239,7 +239,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
 		mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
 		     (unsigned long long)blkno, generation);
 	}
-	
+
 	*max_len = len;
 
 bail:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 06ccf6a86d35..65e9375d2fb3 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -749,7 +749,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
 	int ret;
 
 	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
-	/* ugh.  in prepare/commit_write, if from==to==start of block, we 
+	/* ugh.  in prepare/commit_write, if from==to==start of block, we
 	** skip the prepare.  make sure we never send an offset for the start
 	** of a block
 	*/
@@ -1779,7 +1779,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
 	struct inode *inode = dentry->d_inode;
 	loff_t saved_pos, end;
 
-	/* 
+	/*
 	 * We start with a read level meta lock and only jump to an ex
 	 * if we need to make modifications here.
 	 */
@@ -2033,7 +2033,7 @@ out_dio:
 						      pos + count - 1);
 	}
 
-	/* 
+	/*
 	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
 	 * function pointer which is called when o_direct io completes so that
 	 * it can unlock our rw lock.  (it's the clustered equivalent of
@@ -2198,7 +2198,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 		goto bail;
 	}
 
-	/* 
+	/*
 	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads
 	 * need locks to protect pending reads from racing with truncate.
 	 */
@@ -2220,10 +2220,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	 * We're fine letting folks race truncates and extending
 	 * writes with read across the cluster, just like they can
 	 * locally. Hence no rw_lock during read.
-	 * 
+	 *
 	 * Take and drop the meta data lock to update inode fields
 	 * like i_size. This allows the checks down below
-	 * generic_file_aio_read() a chance of actually working. 
+	 * generic_file_aio_read() a chance of actually working.
 	 */
 	ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
 	if (ret < 0) {
@@ -2248,7 +2248,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 bail:
 	if (have_alloc_sem)
 		up_read(&inode->i_alloc_sem);
-	if (rw_level != -1) 
+	if (rw_level != -1)
 		ocfs2_rw_unlock(inode, rw_level);
 	mlog_exit(ret);
 
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0297fb8982b8..88459bdd1ff3 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -475,7 +475,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 	if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
 		status = ocfs2_try_open_lock(inode, 0);
 		if (status) {
-			make_bad_inode(inode);	
+			make_bad_inode(inode);
 			return status;
 		}
 	}
@@ -684,7 +684,7 @@ bail:
 	return status;
 }
 
-/* 
+/*
  * Serialize with orphan dir recovery. If the process doing
  * recovery on this orphan dir does an iget() with the dir
  * i_mutex held, we'll deadlock here. Instead we detect this
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bf34c491ae96..9336c60e3a36 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2034,7 +2034,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 		status = -ENOENT;
 		mlog_errno(status);
 		return status;
-	}	
+	}
 
 	mutex_lock(&orphan_dir_inode->i_mutex);
 	status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26069917a9f5..755cd49a5ef3 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1062,7 +1062,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 				     "file system, but write access is "
 				     "unavailable.\n");
 			else
-				mlog_errno(status);			
+				mlog_errno(status);
 			goto read_super_error;
 		}
 
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index c61369342a27..a0a120e82b97 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -267,8 +267,8 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
 }
 
 /* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache. 
- * 
+ * the block is stored in our inode metadata cache.
+ *
  * This can be called under lock_buffer()
  */
 int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,

From 71656fa6ec10473eb9b646c10a2173fdea2f83c9 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 25 Jan 2010 16:57:39 -0800
Subject: [PATCH 101/640] ocfs2/dlm: Ignore LVBs of locks in the Blocked list

During lock resource migration, o2dlm fills the packet with a LVB from the
first valid lock. For sanity, it ensures that the other valid locks have the
same LVB. If not, it BUGs.

The valid locks are ones that have granted EX or PR lock levels and are either
on the Granted or Converting lists. Locks in the Blocked list cannot have a
valid LVB.

This patch ensures that we skip the locks in the Blocked list.

Fixes oss bugzilla#1202
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1202

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmrecovery.c | 48 +++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 57736d3ea7b5..9d67894cda6d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
 	mres->master = master;
 }
 
+static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock,
+					  struct dlm_migratable_lockres *mres,
+					  int queue)
+{
+	if (!lock->lksb)
+	       return;
+
+	/* Ignore lvb in all locks in the blocked list */
+	if (queue == DLM_BLOCKED_LIST)
+		return;
+
+	/* Only consider lvbs in locks with granted EX or PR lock levels */
+	if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE)
+		return;
+
+	if (dlm_lvb_is_empty(mres->lvb)) {
+		memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
+		return;
+	}
+
+	/* Ensure the lvb copied for migration matches in other valid locks */
+	if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))
+		return;
+
+	mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
+	     "node=%u\n",
+	     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+	     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+	     lock->lockres->lockname.len, lock->lockres->lockname.name,
+	     lock->ml.node);
+	dlm_print_one_lock_resource(lock->lockres);
+	BUG();
+}
 
 /* returns 1 if this lock fills the network structure,
  * 0 otherwise */
@@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
 	ml->list = queue;
 	if (lock->lksb) {
 		ml->flags = lock->lksb->flags;
-		/* send our current lvb */
-		if (ml->type == LKM_EXMODE ||
-		    ml->type == LKM_PRMODE) {
-			/* if it is already set, this had better be a PR
-			 * and it has to match */
-			if (!dlm_lvb_is_empty(mres->lvb) &&
-			    (ml->type == LKM_EXMODE ||
-			     memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
-				mlog(ML_ERROR, "mismatched lvbs!\n");
-				dlm_print_one_lock_resource(lock->lockres);
-				BUG();
-			}
-			memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
-		}
+		dlm_prepare_lvb_for_migration(lock, mres, queue);
 	}
 	ml->node = lock->ml.node;
 	mres->num_locks++;

From 26636bf6b2010aa84c54d577231e017ba71493d0 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 25 Jan 2010 16:57:40 -0800
Subject: [PATCH 102/640] ocfs2/dlm: Print more messages during lock migration

When a lock resource is migrated, the dlm compares the migrated
locks with that that was already existing on the new node. If the
comparison fails, it BUGs. This patch prints more messages when the
comparison fails inorder to help with the root cause analyis.

http://oss.oracle.com/bugzilla/show_bug.cgi?id=1206
This does not fix bz1206. However, if we run into it again, we will
have more information to chew on.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmrecovery.c | 46 +++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 9d67894cda6d..cfb2ae9ab538 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1750,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 	struct dlm_lock *lock = NULL;
 	u8 from = O2NM_MAX_NODES;
 	unsigned int added = 0;
+	__be64 c;
 
 	mlog(0, "running %d locks for this lockres\n", mres->num_locks);
 	for (i=0; i<mres->num_locks; i++) {
@@ -1797,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
 			/* lock is always created locally first, and
 			 * destroyed locally last.  it must be on the list */
 			if (!lock) {
-				__be64 c = ml->cookie;
-				mlog(ML_ERROR, "could not find local lock "
-					       "with cookie %u:%llu!\n",
+				c = ml->cookie;
+				mlog(ML_ERROR, "Could not find local lock "
+					       "with cookie %u:%llu, node %u, "
+					       "list %u, flags 0x%x, type %d, "
+					       "conv %d, highest blocked %d\n",
 				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
-				     dlm_get_lock_cookie_seq(be64_to_cpu(c)));
+				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+				     ml->node, ml->list, ml->flags, ml->type,
+				     ml->convert_type, ml->highest_blocked);
+				__dlm_print_one_lock_resource(res);
+				BUG();
+			}
+
+			if (lock->ml.node != ml->node) {
+				c = lock->ml.cookie;
+				mlog(ML_ERROR, "Mismatched node# in lock "
+				     "cookie %u:%llu, name %.*s, node %u\n",
+				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
+				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+				     res->lockname.len, res->lockname.name,
+				     lock->ml.node);
+				c = ml->cookie;
+				mlog(ML_ERROR, "Migrate lock cookie %u:%llu, "
+				     "node %u, list %u, flags 0x%x, type %d, "
+				     "conv %d, highest blocked %d\n",
+				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
+				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+				     ml->node, ml->list, ml->flags, ml->type,
+				     ml->convert_type, ml->highest_blocked);
 				__dlm_print_one_lock_resource(res);
 				BUG();
 			}
-			BUG_ON(lock->ml.node != ml->node);
 
 			if (tmpq != queue) {
-				mlog(0, "lock was on %u instead of %u for %.*s\n",
-				     j, ml->list, res->lockname.len, res->lockname.name);
+				c = ml->cookie;
+				mlog(0, "Lock cookie %u:%llu was on list %u "
+				     "instead of list %u for %.*s\n",
+				     dlm_get_lock_cookie_node(be64_to_cpu(c)),
+				     dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+				     j, ml->list, res->lockname.len,
+				     res->lockname.name);
+				__dlm_print_one_lock_resource(res);
 				spin_unlock(&res->spinlock);
 				continue;
 			}
@@ -1906,7 +1936,7 @@ skip_lvb:
 		spin_lock(&res->spinlock);
 		list_for_each_entry(lock, queue, list) {
 			if (lock->ml.cookie == ml->cookie) {
-				__be64 c = lock->ml.cookie;
+				c = lock->ml.cookie;
 				mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
 				     "exists on this lockres!\n", dlm->name,
 				     res->lockname.len, res->lockname.name,

From de3f440f8c9922afe8770dd16c84f1c87d779b59 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 14 Jan 2010 13:18:02 -0800
Subject: [PATCH 103/640] drm/i915: handle non-flip pending case when unpinning
 the scanout buffer

The first page flip queued will replace the current front buffer, which
should have a 0 pending flip count.  So at finish time we need to handle
that case (i.e. if the flip count is 0 *or* dec_and_test is 0 we need to
wake the waiters).

Also fix up an error path in the queue function and add some debug
output (only enabled with driver debugging).

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_display.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2cc489b6629e..5f14dfbf715c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3985,6 +3985,12 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe)
 	spin_lock_irqsave(&dev->event_lock, flags);
 	work = intel_crtc->unpin_work;
 	if (work == NULL || !work->pending) {
+		if (work && !work->pending) {
+			obj_priv = work->obj->driver_private;
+			DRM_DEBUG_DRIVER("flip finish: %p (%d) not pending?\n",
+					 obj_priv,
+					 atomic_read(&obj_priv->pending_flip));
+		}
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		return;
 	}
@@ -4006,7 +4012,10 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe)
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	obj_priv = work->obj->driver_private;
-	if (atomic_dec_and_test(&obj_priv->pending_flip))
+
+	/* Initial scanout buffer will have a 0 pending flip count */
+	if ((atomic_read(&obj_priv->pending_flip) == 0) ||
+	    atomic_dec_and_test(&obj_priv->pending_flip))
 		DRM_WAKEUP(&dev_priv->pending_flip_queue);
 	schedule_work(&work->work);
 }
@@ -4019,8 +4028,11 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane)
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (intel_crtc->unpin_work)
+	if (intel_crtc->unpin_work) {
 		intel_crtc->unpin_work->pending = 1;
+	} else {
+		DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n");
+	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
@@ -4054,6 +4066,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	/* We borrow the event spin lock for protecting unpin_work */
 	spin_lock_irqsave(&dev->event_lock, flags);
 	if (intel_crtc->unpin_work) {
+		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 		kfree(work);
 		mutex_unlock(&dev->struct_mutex);
@@ -4067,7 +4080,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 	ret = intel_pin_and_fence_fb_obj(dev, obj);
 	if (ret != 0) {
+		DRM_DEBUG_DRIVER("flip queue: %p pin & fence failed\n",
+			  obj->driver_private);
 		kfree(work);
+		intel_crtc->unpin_work = NULL;
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
 	}

From 9611a9b6f6de95c290efc697a3e1d0530878c047 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 22 Jan 2010 08:46:13 +0100
Subject: [PATCH 104/640] i.MX25: Allow secondary clocks in DEFINE_CLOCK

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx25/clock.c | 41 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c
index 6e838b857712..fbe42d258190 100644
--- a/arch/arm/mach-mx25/clock.c
+++ b/arch/arm/mach-mx25/clock.c
@@ -144,7 +144,7 @@ static void clk_cgcr_disable(struct clk *clk)
 	__raw_writel(reg, clk->enable_reg);
 }
 
-#define DEFINE_CLOCK(name, i, er, es, gr, sr)		\
+#define DEFINE_CLOCK(name, i, er, es, gr, sr, s)	\
 	static struct clk name = {			\
 		.id		= i,			\
 		.enable_reg	= CRM_BASE + er,	\
@@ -153,27 +153,28 @@ static void clk_cgcr_disable(struct clk *clk)
 		.set_rate	= sr,			\
 		.enable		= clk_cgcr_enable,	\
 		.disable	= clk_cgcr_disable,	\
+		.secondary	= s,			\
 	}
 
-DEFINE_CLOCK(gpt_clk,    0, CCM_CGCR0,  5, get_rate_ipg, NULL);
-DEFINE_CLOCK(cspi1_clk,  0, CCM_CGCR1,  5, get_rate_ipg, NULL);
-DEFINE_CLOCK(cspi2_clk,  0, CCM_CGCR1,  6, get_rate_ipg, NULL);
-DEFINE_CLOCK(cspi3_clk,  0, CCM_CGCR1,  7, get_rate_ipg, NULL);
-DEFINE_CLOCK(uart1_clk,  0, CCM_CGCR2, 14, get_rate_uart, NULL);
-DEFINE_CLOCK(uart2_clk,  0, CCM_CGCR2, 15, get_rate_uart, NULL);
-DEFINE_CLOCK(uart3_clk,  0, CCM_CGCR2, 16, get_rate_uart, NULL);
-DEFINE_CLOCK(uart4_clk,  0, CCM_CGCR2, 17, get_rate_uart, NULL);
-DEFINE_CLOCK(uart5_clk,  0, CCM_CGCR2, 18, get_rate_uart, NULL);
-DEFINE_CLOCK(nfc_clk,    0, CCM_CGCR0,  8, get_rate_nfc, NULL);
-DEFINE_CLOCK(usbotg_clk, 0, CCM_CGCR0, 28, get_rate_otg, NULL);
-DEFINE_CLOCK(pwm1_clk,	 0, CCM_CGCR1, 31, get_rate_ipg, NULL);
-DEFINE_CLOCK(pwm2_clk,	 0, CCM_CGCR2,  0, get_rate_ipg, NULL);
-DEFINE_CLOCK(pwm3_clk,	 0, CCM_CGCR2,  1, get_rate_ipg, NULL);
-DEFINE_CLOCK(pwm4_clk,	 0, CCM_CGCR2,  2, get_rate_ipg, NULL);
-DEFINE_CLOCK(kpp_clk,	 0, CCM_CGCR1, 28, get_rate_ipg, NULL);
-DEFINE_CLOCK(tsc_clk,	 0, CCM_CGCR2, 13, get_rate_ipg, NULL);
-DEFINE_CLOCK(i2c_clk,	 0, CCM_CGCR0,  6, get_rate_i2c, NULL);
-DEFINE_CLOCK(fec_clk,	 0, CCM_CGCR0, 23, get_rate_ipg, NULL);
+DEFINE_CLOCK(gpt_clk,    0, CCM_CGCR0,  5, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(cspi1_clk,  0, CCM_CGCR1,  5, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(cspi2_clk,  0, CCM_CGCR1,  6, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(cspi3_clk,  0, CCM_CGCR1,  7, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(uart1_clk,  0, CCM_CGCR2, 14, get_rate_uart, NULL, NULL);
+DEFINE_CLOCK(uart2_clk,  0, CCM_CGCR2, 15, get_rate_uart, NULL, NULL);
+DEFINE_CLOCK(uart3_clk,  0, CCM_CGCR2, 16, get_rate_uart, NULL, NULL);
+DEFINE_CLOCK(uart4_clk,  0, CCM_CGCR2, 17, get_rate_uart, NULL, NULL);
+DEFINE_CLOCK(uart5_clk,  0, CCM_CGCR2, 18, get_rate_uart, NULL, NULL);
+DEFINE_CLOCK(nfc_clk,    0, CCM_CGCR0,  8, get_rate_nfc, NULL, NULL);
+DEFINE_CLOCK(usbotg_clk, 0, CCM_CGCR0, 28, get_rate_otg, NULL, NULL);
+DEFINE_CLOCK(pwm1_clk,	 0, CCM_CGCR1, 31, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(pwm2_clk,	 0, CCM_CGCR2,  0, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(pwm3_clk,	 0, CCM_CGCR2,  1, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(pwm4_clk,	 0, CCM_CGCR2,  2, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(kpp_clk,	 0, CCM_CGCR1, 28, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(tsc_clk,	 0, CCM_CGCR2, 13, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(i2c_clk,	 0, CCM_CGCR0,  6, get_rate_i2c, NULL, NULL);
+DEFINE_CLOCK(fec_clk,	 0, CCM_CGCR0, 23, get_rate_ipg, NULL, NULL);
 
 #define _REGISTER_CLOCK(d, n, c)	\
 	{				\

From 4cd3f96cd4014419a4ea524d840be0fa39e3ddbc Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 22 Jan 2010 08:47:06 +0100
Subject: [PATCH 105/640] i.MX25: implement secondary clocks for uarts and fec

For uarts and fec need two clocks, implement it using the secondary clock
field in struct clk.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx25/clock.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c
index fbe42d258190..3be51dd46c67 100644
--- a/arch/arm/mach-mx25/clock.c
+++ b/arch/arm/mach-mx25/clock.c
@@ -157,14 +157,16 @@ static void clk_cgcr_disable(struct clk *clk)
 	}
 
 DEFINE_CLOCK(gpt_clk,    0, CCM_CGCR0,  5, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(uart_per_clk, 0, CCM_CGCR0, 15, get_rate_uart, NULL, NULL);
 DEFINE_CLOCK(cspi1_clk,  0, CCM_CGCR1,  5, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(cspi2_clk,  0, CCM_CGCR1,  6, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(cspi3_clk,  0, CCM_CGCR1,  7, get_rate_ipg, NULL, NULL);
-DEFINE_CLOCK(uart1_clk,  0, CCM_CGCR2, 14, get_rate_uart, NULL, NULL);
-DEFINE_CLOCK(uart2_clk,  0, CCM_CGCR2, 15, get_rate_uart, NULL, NULL);
-DEFINE_CLOCK(uart3_clk,  0, CCM_CGCR2, 16, get_rate_uart, NULL, NULL);
-DEFINE_CLOCK(uart4_clk,  0, CCM_CGCR2, 17, get_rate_uart, NULL, NULL);
-DEFINE_CLOCK(uart5_clk,  0, CCM_CGCR2, 18, get_rate_uart, NULL, NULL);
+DEFINE_CLOCK(fec_ipg_clk, 0, CCM_CGCR1, 15, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(uart1_clk,  0, CCM_CGCR2, 14, get_rate_uart, NULL, &uart_per_clk);
+DEFINE_CLOCK(uart2_clk,  0, CCM_CGCR2, 15, get_rate_uart, NULL, &uart_per_clk);
+DEFINE_CLOCK(uart3_clk,  0, CCM_CGCR2, 16, get_rate_uart, NULL, &uart_per_clk);
+DEFINE_CLOCK(uart4_clk,  0, CCM_CGCR2, 17, get_rate_uart, NULL, &uart_per_clk);
+DEFINE_CLOCK(uart5_clk,  0, CCM_CGCR2, 18, get_rate_uart, NULL, &uart_per_clk);
 DEFINE_CLOCK(nfc_clk,    0, CCM_CGCR0,  8, get_rate_nfc, NULL, NULL);
 DEFINE_CLOCK(usbotg_clk, 0, CCM_CGCR0, 28, get_rate_otg, NULL, NULL);
 DEFINE_CLOCK(pwm1_clk,	 0, CCM_CGCR1, 31, get_rate_ipg, NULL, NULL);
@@ -174,7 +176,7 @@ DEFINE_CLOCK(pwm4_clk,	 0, CCM_CGCR2,  2, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(kpp_clk,	 0, CCM_CGCR1, 28, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(tsc_clk,	 0, CCM_CGCR2, 13, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(i2c_clk,	 0, CCM_CGCR0,  6, get_rate_i2c, NULL, NULL);
-DEFINE_CLOCK(fec_clk,	 0, CCM_CGCR0, 23, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(fec_clk,	 0, CCM_CGCR0, 23, get_rate_ipg, NULL, &fec_ipg_clk);
 
 #define _REGISTER_CLOCK(d, n, c)	\
 	{				\

From fadc095622dd188cae88eb2f3ff28fd6e9d2d2f1 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Mon, 25 Jan 2010 12:58:19 +0200
Subject: [PATCH 106/640] mx25: remove unused mx25_clocks_init() argument

The fref is needless on mx25 since the reference clock is fixed at 24MHz.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx25/clock.c              | 2 +-
 arch/arm/mach-mx25/mx25pdk.c            | 2 +-
 arch/arm/plat-mxc/include/mach/common.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c
index 3be51dd46c67..abd303bc3bf2 100644
--- a/arch/arm/mach-mx25/clock.c
+++ b/arch/arm/mach-mx25/clock.c
@@ -211,7 +211,7 @@ static struct clk_lookup lookups[] = {
 	_REGISTER_CLOCK("fec.0", NULL, fec_clk)
 };
 
-int __init mx25_clocks_init(unsigned long fref)
+int __init mx25_clocks_init(void)
 {
 	int i;
 
diff --git a/arch/arm/mach-mx25/mx25pdk.c b/arch/arm/mach-mx25/mx25pdk.c
index 921bc99ea231..6f06089246eb 100644
--- a/arch/arm/mach-mx25/mx25pdk.c
+++ b/arch/arm/mach-mx25/mx25pdk.c
@@ -91,7 +91,7 @@ static void __init mx25pdk_init(void)
 
 static void __init mx25pdk_timer_init(void)
 {
-	mx25_clocks_init(26000000);
+	mx25_clocks_init();
 }
 
 static struct sys_timer mx25pdk_timer = {
diff --git a/arch/arm/plat-mxc/include/mach/common.h b/arch/arm/plat-mxc/include/mach/common.h
index 286cb9b0a25b..4bf1068ffad9 100644
--- a/arch/arm/plat-mxc/include/mach/common.h
+++ b/arch/arm/plat-mxc/include/mach/common.h
@@ -32,7 +32,7 @@ extern void mxc91231_init_irq(void);
 extern void mxc_timer_init(struct clk *timer_clk, void __iomem *, int);
 extern int mx1_clocks_init(unsigned long fref);
 extern int mx21_clocks_init(unsigned long lref, unsigned long fref);
-extern int mx25_clocks_init(unsigned long fref);
+extern int mx25_clocks_init(void);
 extern int mx27_clocks_init(unsigned long fref);
 extern int mx31_clocks_init(unsigned long fref);
 extern int mx35_clocks_init(void);

From 828df43f139c7fbf0d505c7b9a666d321a0f2c25 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Mon, 25 Jan 2010 12:58:20 +0200
Subject: [PATCH 107/640] mx25: properly initialize clocks

This patch disables all unnecessary clock in mx25_clocks_init() to make a clean
start, the same as is being done for the rest of the i.MX chips.

This patch was tested on i.MX25 PDK.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx25/clock.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c
index abd303bc3bf2..08aaa38f1f1f 100644
--- a/arch/arm/mach-mx25/clock.c
+++ b/arch/arm/mach-mx25/clock.c
@@ -218,6 +218,14 @@ int __init mx25_clocks_init(void)
 	for (i = 0; i < ARRAY_SIZE(lookups); i++)
 		clkdev_add(&lookups[i]);
 
+	/* Turn off all clocks except the ones we need to survive, namely:
+	 * EMI, GPIO1-3 (CCM_CGCR1[18:16]), GPT1, IOMUXC (CCM_CGCR1[27]), IIM,
+	 * SCC
+	 */
+	__raw_writel((1 << 19), CRM_BASE + CCM_CGCR0);
+	__raw_writel((0xf << 16) | (3 << 26), CRM_BASE + CCM_CGCR1);
+	__raw_writel((1 << 5), CRM_BASE + CCM_CGCR2);
+
 	mxc_timer_init(&gpt_clk, MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54);
 
 	return 0;

From faed40665d2d81b7e0e537d14ef680ab3da9f22d Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Mon, 25 Jan 2010 12:58:21 +0200
Subject: [PATCH 108/640] mx25: fix time accounting

The gpt_clk rate function doesn't consider the PER divider. This causes a
significant drift in time accounting. Fix this by introducing the correct rate
calculation function.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx25/clock.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c
index 08aaa38f1f1f..c003ac40f9a8 100644
--- a/arch/arm/mach-mx25/clock.c
+++ b/arch/arm/mach-mx25/clock.c
@@ -119,6 +119,11 @@ static unsigned long get_rate_nfc(struct clk *clk)
 	return get_rate_per(8);
 }
 
+static unsigned long get_rate_gpt(struct clk *clk)
+{
+	return get_rate_per(5);
+}
+
 static unsigned long get_rate_otg(struct clk *clk)
 {
 	return 48000000; /* FIXME */
@@ -156,7 +161,7 @@ static void clk_cgcr_disable(struct clk *clk)
 		.secondary	= s,			\
 	}
 
-DEFINE_CLOCK(gpt_clk,    0, CCM_CGCR0,  5, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(gpt_clk,    0, CCM_CGCR0,  5, get_rate_gpt, NULL, NULL);
 DEFINE_CLOCK(uart_per_clk, 0, CCM_CGCR0, 15, get_rate_uart, NULL, NULL);
 DEFINE_CLOCK(cspi1_clk,  0, CCM_CGCR1,  5, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(cspi2_clk,  0, CCM_CGCR1,  6, get_rate_ipg, NULL, NULL);

From 1c5740237428ca025a30f53c5615edd11201c17b Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Mon, 25 Jan 2010 12:58:22 +0200
Subject: [PATCH 109/640] mx25: make the FEC AHB clk secondary of the IPG

This makes the FEC clock configuration consistent with the UART one.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx25/clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-mx25/clock.c b/arch/arm/mach-mx25/clock.c
index c003ac40f9a8..6acc88bcdc40 100644
--- a/arch/arm/mach-mx25/clock.c
+++ b/arch/arm/mach-mx25/clock.c
@@ -166,7 +166,7 @@ DEFINE_CLOCK(uart_per_clk, 0, CCM_CGCR0, 15, get_rate_uart, NULL, NULL);
 DEFINE_CLOCK(cspi1_clk,  0, CCM_CGCR1,  5, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(cspi2_clk,  0, CCM_CGCR1,  6, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(cspi3_clk,  0, CCM_CGCR1,  7, get_rate_ipg, NULL, NULL);
-DEFINE_CLOCK(fec_ipg_clk, 0, CCM_CGCR1, 15, get_rate_ipg, NULL, NULL);
+DEFINE_CLOCK(fec_ahb_clk, 0, CCM_CGCR0, 23, NULL,	 NULL, NULL);
 DEFINE_CLOCK(uart1_clk,  0, CCM_CGCR2, 14, get_rate_uart, NULL, &uart_per_clk);
 DEFINE_CLOCK(uart2_clk,  0, CCM_CGCR2, 15, get_rate_uart, NULL, &uart_per_clk);
 DEFINE_CLOCK(uart3_clk,  0, CCM_CGCR2, 16, get_rate_uart, NULL, &uart_per_clk);
@@ -181,7 +181,7 @@ DEFINE_CLOCK(pwm4_clk,	 0, CCM_CGCR2,  2, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(kpp_clk,	 0, CCM_CGCR1, 28, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(tsc_clk,	 0, CCM_CGCR2, 13, get_rate_ipg, NULL, NULL);
 DEFINE_CLOCK(i2c_clk,	 0, CCM_CGCR0,  6, get_rate_i2c, NULL, NULL);
-DEFINE_CLOCK(fec_clk,	 0, CCM_CGCR0, 23, get_rate_ipg, NULL, &fec_ipg_clk);
+DEFINE_CLOCK(fec_clk,	 0, CCM_CGCR1, 15, get_rate_ipg, NULL, &fec_ahb_clk);
 
 #define _REGISTER_CLOCK(d, n, c)	\
 	{				\

From c9edda7140ec6a22accf7f2f86da362dfbfd41fc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 Jan 2010 15:41:34 -0500
Subject: [PATCH 110/640] NFS: Fix a reference leak in nfs_wb_cancel_page()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696017f4..dac8d7676aff 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1541,6 +1541,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 			break;
 		}
 		ret = nfs_wait_on_request(req);
+		nfs_release_request(req);
 		if (ret < 0)
 			goto out;
 	}

From 82be934a59ff891cac598727e5a862ba2b9d1fac Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 Jan 2010 15:41:53 -0500
Subject: [PATCH 111/640] NFS: Try to commit unstable writes in
 nfs_release_page()

If someone calls nfs_release_page(), we presumably already know that the
page is clean, however it may be holding an unstable write.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/file.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6b891328f332..63f2071d6445 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 {
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
+	if (gfp & __GFP_WAIT)
+		nfs_wb_page(page->mapping->host, page);
 	/* If PagePrivate() is set, then the page is not freeable */
 	if (PagePrivate(page))
 		return 0;

From 0aa05887af728b058af91197f0ae9b3ae63dd74a Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 26 Jan 2010 15:42:03 -0500
Subject: [PATCH 112/640] NFS: Make nfs_commitdata_release static

The symbol nfs_commitdata_release is only used locally
in this file. Make it static to prevent the following sparse warning:

warning: symbol 'nfs_commitdata_release' was not declared. Should it be static?

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index dac8d7676aff..7b54b8bb101f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1233,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-void nfs_commitdata_release(void *data)
+static void nfs_commitdata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
 

From b0706ca415b188ed58788420de4d5c9972b2afb2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 26 Jan 2010 15:42:11 -0500
Subject: [PATCH 113/640] NFS: Avoid warnings when CONFIG_NFS_V4=n

Avoid the following warnings when CONFIG_NFS_V4=n:

	fs/nfs/sysctl.c:19: warning: unused variable `nfs_set_port_max'
	fs/nfs/sysctl.c:18: warning: unused variable `nfs_set_port_min'

by making those variables contingent on NFSv4 being configured.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/sysctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 70e1fbbaaeab..ad4d2e787b20 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -15,8 +15,10 @@
 
 #include "callback.h"
 
+#ifdef CONFIG_NFS_V4
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
+#endif
 static struct ctl_table_header *nfs_callback_sysctl_table;
 
 static ctl_table nfs_cb_sysctls[] = {

From 2bee72a6aa1e6d0a4f5da56217f0d0bbbdd0d9a3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 Jan 2010 15:42:21 -0500
Subject: [PATCH 114/640] NFSv4: Ensure that the NFSv4 locking can recover from
 stateid errors

In most cases, we just want to mark the lock_stateid sequence id as being
uninitialised.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4proc.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 198d51d17c13..0b68238ed0c8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4088,6 +4088,22 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
 	.rpc_release = nfs4_lock_release,
 };
 
+static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_state *state = lsp->ls_state;
+
+	switch (error) {
+	case -NFS4ERR_ADMIN_REVOKED:
+	case -NFS4ERR_BAD_STATEID:
+	case -NFS4ERR_EXPIRED:
+		if (new_lock_owner != 0 ||
+		   (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+			nfs4_state_mark_reclaim_nograce(clp, state);
+		lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+	};
+}
+
 static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
 {
 	struct nfs4_lockdata *data;
@@ -4126,6 +4142,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 	ret = nfs4_wait_for_completion_rpc_task(task);
 	if (ret == 0) {
 		ret = data->rpc_status;
+		if (ret)
+			nfs4_handle_setlk_error(data->server, data->lsp,
+					data->arg.new_lock_owner, ret);
 	} else
 		data->cancelled = 1;
 	rpc_put_task(task);

From 8e469ebd6dc32cbaf620e134d79f740bf0ebab79 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 Jan 2010 15:42:30 -0500
Subject: [PATCH 115/640] NFSv4: Don't allow posix locking against servers that
 don't support it

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4_fs.h  | 1 +
 fs/nfs/nfs4proc.c | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 865265bdca03..ea2f41b26aea 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -146,6 +146,7 @@ enum {
 	NFS_O_RDWR_STATE,		/* OPEN stateid has read/write state */
 	NFS_STATE_RECLAIM_REBOOT,	/* OPEN stateid server rebooted */
 	NFS_STATE_RECLAIM_NOGRACE,	/* OPEN stateid needs to recover state */
+	NFS_STATE_POSIX_LOCKS,		/* Posix locks are supported */
 };
 
 struct nfs4_state {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0b68238ed0c8..be044b58e811 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1658,6 +1658,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
 	status = PTR_ERR(state);
 	if (IS_ERR(state))
 		goto err_opendata_put;
+	if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0)
+		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
 	*res = state;
@@ -4200,8 +4202,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	unsigned char fl_flags = request->fl_flags;
-	int status;
+	int status = -ENOLCK;
 
+	if ((fl_flags & FL_POSIX) &&
+			!test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
+		goto out;
 	/* Is this a delegated open? */
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)

From 03391693a95900875b0973569d2d73ff3aa8972e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 Jan 2010 15:42:38 -0500
Subject: [PATCH 116/640] NFSv4.1: Don't call nfs4_schedule_state_recovery()
 unnecessarily

Currently, nfs4_handle_exception() will call it twice if called with an
error of -NFS4ERR_STALE_CLIENTID, -NFS4ERR_STALE_STATEID or
-NFS4ERR_EXPIRED.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4proc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be044b58e811..afbfe673489b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -256,12 +256,8 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
 			ret = nfs4_wait_clnt_recover(clp);
 			if (ret == 0)
 				exception->retry = 1;
-#if !defined(CONFIG_NFS_V4_1)
 			break;
-#else /* !defined(CONFIG_NFS_V4_1) */
-			if (!nfs4_has_session(server->nfs_client))
-				break;
-			/* FALLTHROUGH */
+#if defined(CONFIG_NFS_V4_1)
 		case -NFS4ERR_BADSESSION:
 		case -NFS4ERR_BADSLOT:
 		case -NFS4ERR_BAD_HIGH_SLOT:
@@ -274,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
 			nfs4_schedule_state_recovery(clp);
 			exception->retry = 1;
 			break;
-#endif /* !defined(CONFIG_NFS_V4_1) */
+#endif /* defined(CONFIG_NFS_V4_1) */
 		case -NFS4ERR_FILE_OPEN:
 			if (exception->timeout > HZ) {
 				/* We have retried a decent amount, time to

From a2c0b9e291208f65221a0ad8a0c80a377707d480 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 26 Jan 2010 15:42:47 -0500
Subject: [PATCH 117/640] NFS: Ensure that we handle NFS4ERR_STALE_STATEID
 correctly

Even if the server is crazy, we should be able to mark the stateid as being
bad, to ensure it gets recovered.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4_fs.h   |  1 +
 fs/nfs/nfs4proc.c  | 44 +++++++++++++++++++++++++++++++-------------
 fs/nfs/nfs4state.c |  2 +-
 3 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ea2f41b26aea..0c6fda33d66e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -278,6 +278,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
+extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index afbfe673489b..375f0fae2c6a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -249,14 +249,14 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
 			if (state == NULL)
 				break;
 			nfs4_state_mark_reclaim_nograce(clp, state);
-		case -NFS4ERR_STALE_CLIENTID:
+			goto do_state_recovery;
 		case -NFS4ERR_STALE_STATEID:
+			if (state == NULL)
+				break;
+			nfs4_state_mark_reclaim_reboot(clp, state);
+		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(clp);
-			ret = nfs4_wait_clnt_recover(clp);
-			if (ret == 0)
-				exception->retry = 1;
-			break;
+			goto do_state_recovery;
 #if defined(CONFIG_NFS_V4_1)
 		case -NFS4ERR_BADSESSION:
 		case -NFS4ERR_BADSLOT:
@@ -289,6 +289,12 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
 	}
 	/* We failed to handle the error */
 	return nfs4_map_errors(ret);
+do_state_recovery:
+	nfs4_schedule_state_recovery(clp);
+	ret = nfs4_wait_clnt_recover(clp);
+	if (ret == 0)
+		exception->retry = 1;
+	return ret;
 }
 
 
@@ -3420,15 +3426,14 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 			if (state == NULL)
 				break;
 			nfs4_state_mark_reclaim_nograce(clp, state);
-		case -NFS4ERR_STALE_CLIENTID:
+			goto do_state_recovery;
 		case -NFS4ERR_STALE_STATEID:
+			if (state == NULL)
+				break;
+			nfs4_state_mark_reclaim_reboot(clp, state);
+		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_EXPIRED:
-			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
-			nfs4_schedule_state_recovery(clp);
-			if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
-				rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
-			task->tk_status = 0;
-			return -EAGAIN;
+			goto do_state_recovery;
 #if defined(CONFIG_NFS_V4_1)
 		case -NFS4ERR_BADSESSION:
 		case -NFS4ERR_BADSLOT:
@@ -3456,6 +3461,13 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 	}
 	task->tk_status = nfs4_map_errors(task->tk_status);
 	return 0;
+do_state_recovery:
+	rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+	nfs4_schedule_state_recovery(clp);
+	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+		rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
+	task->tk_status = 0;
+	return -EAGAIN;
 }
 
 static int
@@ -4099,6 +4111,12 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_
 		   (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
 			nfs4_state_mark_reclaim_nograce(clp, state);
 		lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+		break;
+	case -NFS4ERR_STALE_STATEID:
+		if (new_lock_owner != 0 ||
+		    (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+			nfs4_state_mark_reclaim_reboot(clp, state);
+		lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
 	};
 }
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6d263ed79e92..c1e2733f4fa4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -901,7 +901,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
 	nfs4_schedule_state_manager(clp);
 }
 
-static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
 {
 
 	set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);

From abd50713944c8ea9e0af5b7bffa0aacae21cc91a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Jan 2010 18:50:16 +0100
Subject: [PATCH 118/640] perf: Reimplement frequency driven sampling

There was a bug in the old period code that caused intel_pmu_enable_all()
or native_write_msr_safe() to show up quite high in the profiles.

In staring at that code it made my head hurt, so I rewrote it in a
hopefully simpler fashion. Its now fully symetric between tick and
overflow driven adjustments and uses less data to boot.

The only complication is that it basically wants to do a u128 division.
The code approximates that in a rather simple truncate until it fits
fashion, taking care to balance the terms while truncating.

This version does not generate that sampling artefact.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   5 +-
 kernel/perf_event.c        | 132 ++++++++++++++++++++++++++-----------
 2 files changed, 94 insertions(+), 43 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c6f812e4d058..72b2615600d8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -498,9 +498,8 @@ struct hw_perf_event {
 	atomic64_t			period_left;
 	u64				interrupts;
 
-	u64				freq_count;
-	u64				freq_interrupts;
-	u64				freq_stamp;
+	u64				freq_time_stamp;
+	u64				freq_count_stamp;
 #endif
 };
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index edc46b92b508..251fb9552492 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task)
 
 static void perf_log_throttle(struct perf_event *event, int enable);
 
-static void perf_adjust_period(struct perf_event *event, u64 events)
+static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
+{
+	u64 frequency = event->attr.sample_freq;
+	u64 sec = NSEC_PER_SEC;
+	u64 divisor, dividend;
+
+	int count_fls, nsec_fls, frequency_fls, sec_fls;
+
+	count_fls = fls64(count);
+	nsec_fls = fls64(nsec);
+	frequency_fls = fls64(frequency);
+	sec_fls = 30;
+
+	/*
+	 * We got @count in @nsec, with a target of sample_freq HZ
+	 * the target period becomes:
+	 *
+	 *             @count * 10^9
+	 * period = -------------------
+	 *          @nsec * sample_freq
+	 *
+	 */
+
+	/*
+	 * Reduce accuracy by one bit such that @a and @b converge
+	 * to a similar magnitude.
+	 */
+#define REDUCE_FLS(a, b) 		\
+do {					\
+	if (a##_fls > b##_fls) {	\
+		a >>= 1;		\
+		a##_fls--;		\
+	} else {			\
+		b >>= 1;		\
+		b##_fls--;		\
+	}				\
+} while (0)
+
+	/*
+	 * Reduce accuracy until either term fits in a u64, then proceed with
+	 * the other, so that finally we can do a u64/u64 division.
+	 */
+	while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
+		REDUCE_FLS(nsec, frequency);
+		REDUCE_FLS(sec, count);
+	}
+
+	if (count_fls + sec_fls > 64) {
+		divisor = nsec * frequency;
+
+		while (count_fls + sec_fls > 64) {
+			REDUCE_FLS(count, sec);
+			divisor >>= 1;
+		}
+
+		dividend = count * sec;
+	} else {
+		dividend = count * sec;
+
+		while (nsec_fls + frequency_fls > 64) {
+			REDUCE_FLS(nsec, frequency);
+			dividend >>= 1;
+		}
+
+		divisor = nsec * frequency;
+	}
+
+	return div64_u64(dividend, divisor);
+}
+
+static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	u64 period, sample_period;
 	s64 delta;
 
-	events *= hwc->sample_period;
-	period = div64_u64(events, event->attr.sample_freq);
+	period = perf_calculate_period(event, nsec, count);
 
 	delta = (s64)(period - hwc->sample_period);
 	delta = (delta + 7) / 8; /* low pass filter */
@@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events)
 		sample_period = 1;
 
 	hwc->sample_period = sample_period;
+
+	if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+		perf_disable();
+		event->pmu->disable(event);
+		atomic64_set(&hwc->period_left, 0);
+		event->pmu->enable(event);
+		perf_enable();
+	}
 }
 
 static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 {
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
-	u64 interrupts, freq;
+	u64 interrupts, now;
+	s64 delta;
 
 	raw_spin_lock(&ctx->lock);
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
@@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(event, 1);
 			event->pmu->unthrottle(event);
-			interrupts = 2*sysctl_perf_event_sample_rate/HZ;
 		}
 
 		if (!event->attr.freq || !event->attr.sample_freq)
 			continue;
 
-		/*
-		 * if the specified freq < HZ then we need to skip ticks
-		 */
-		if (event->attr.sample_freq < HZ) {
-			freq = event->attr.sample_freq;
+		event->pmu->read(event);
+		now = atomic64_read(&event->count);
+		delta = now - hwc->freq_count_stamp;
+		hwc->freq_count_stamp = now;
 
-			hwc->freq_count += freq;
-			hwc->freq_interrupts += interrupts;
-
-			if (hwc->freq_count < HZ)
-				continue;
-
-			interrupts = hwc->freq_interrupts;
-			hwc->freq_interrupts = 0;
-			hwc->freq_count -= HZ;
-		} else
-			freq = HZ;
-
-		perf_adjust_period(event, freq * interrupts);
-
-		/*
-		 * In order to avoid being stalled by an (accidental) huge
-		 * sample period, force reset the sample period if we didn't
-		 * get any events in this freq period.
-		 */
-		if (!interrupts) {
-			perf_disable();
-			event->pmu->disable(event);
-			atomic64_set(&hwc->period_left, 0);
-			event->pmu->enable(event);
-			perf_enable();
-		}
+		if (delta > 0)
+			perf_adjust_period(event, TICK_NSEC, delta);
 	}
 	raw_spin_unlock(&ctx->lock);
 }
@@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 
 	if (event->attr.freq) {
 		u64 now = perf_clock();
-		s64 delta = now - hwc->freq_stamp;
+		s64 delta = now - hwc->freq_time_stamp;
 
-		hwc->freq_stamp = now;
+		hwc->freq_time_stamp = now;
 
-		if (delta > 0 && delta < TICK_NSEC)
-			perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+		if (delta > 0 && delta < 2*TICK_NSEC)
+			perf_adjust_period(event, delta, hwc->last_period);
 	}
 
 	/*

From 24bfef0f924b4ac4312614422a4982b5f4d9a4c7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 21 Jan 2010 13:04:43 -0200
Subject: [PATCH 119/640] perf top: Fix sample counting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broken since "5b2bb75 perf top: Support userspace symbols too".

Reported-by: Mike Galbraith <efault@gmx.de>
Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264086284-1431-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7a8a77ec2c9d..8b049888a9dd 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -934,8 +934,11 @@ static void event__process_sample(const event_t *self,
 	struct addr_location al;
 	u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
+	++samples;
+
 	switch (origin) {
 	case PERF_RECORD_MISC_USER:
+		++userspace_samples;
 		if (hide_user_symbols)
 			return;
 		break;
@@ -960,9 +963,6 @@ static void event__process_sample(const event_t *self,
 		if (list_empty(&syme->node) || !syme->node.next)
 			__list_insert_active_sym(syme);
 		pthread_mutex_unlock(&active_symbols_lock);
-		if (origin == PERF_RECORD_MISC_USER)
-			++userspace_samples;
-		++samples;
 	}
 }
 

From 0f35cd4cea08a8893e3e2ea03cbdb65f5d2b0e7a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 21 Jan 2010 13:04:44 -0200
Subject: [PATCH 120/640] perf top: Handle PERF_RECORD_{FORK,EXIT} events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As noticed by Mike, symbols in new tasks were not being
processed as we weren't processing these events.

Reported-by: Mike Galbraith <efault@gmx.de>
Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264086284-1431-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8b049888a9dd..2227b84aa002 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -975,6 +975,10 @@ static int event__process(event_t *event, struct perf_session *session)
 	case PERF_RECORD_MMAP:
 		event__process_mmap(event, session);
 		break;
+	case PERF_RECORD_FORK:
+	case PERF_RECORD_EXIT:
+		event__process_task(event, session);
+		break;
 	default:
 		break;
 	}

From e1c7c6a40c8037478742ce134190c1a955853bfb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 Jan 2010 14:35:01 -0200
Subject: [PATCH 121/640] perf symbols: Fix inverted logic for showing kallsyms
 as the source of symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only if we parsed /proc/kallsyms (or a copy found in the buildid
cache) we should set the dso long name to "[kernel.kallsyms]".

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264178102-4203-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 6f30fe18c265..1270cf867e61 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1671,7 +1671,7 @@ do_kallsyms:
 out_try_fixup:
 	if (err > 0) {
 out_fixup:
-		if (kallsyms_filename == NULL)
+		if (kallsyms_filename != NULL)
 			dso__set_long_name(self, strdup("[kernel.kallsyms]"));
 		map__fixup_start(map);
 		map__fixup_end(map);

From 19fc2dedff448120a7aeaa3c136689c6b71777c6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 Jan 2010 14:35:02 -0200
Subject: [PATCH 122/640] perf symbols: Use the right variable to check for
 kallsyms in the cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Probably this wasn't noticed when testing this on my parisc
machine because I must have copied manually to its cache the
vmlinux file used in the x86_64 machine, now that I tried
looking on a x86-32 machine with a fresh cache, kernel symbols
weren't being resolved even with the right kallsyms copy on its
cache, duh.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264178102-4203-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1270cf867e61..f1f609dcf9a1 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1650,12 +1650,12 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 			     getenv("HOME"), sbuild_id) == -1)
 			return -1;
 
+		kallsyms_filename = kallsyms_allocated_filename;
+
 		if (access(kallsyms_filename, F_OK)) {
 			free(kallsyms_allocated_filename);
 			return -1;
 		}
-
-		kallsyms_filename = kallsyms_allocated_filename;
 	} else {
 		/*
 		 * Last resort, if we don't have a build-id and couldn't find

From 408f0d18ba6b9bb447f807f621b2c9663c5cf638 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Fri, 22 Jan 2010 22:45:29 +0900
Subject: [PATCH 123/640] perf trace: Add -i option for choosing input file

perf trace lacks -i option for choosing input file.
This patch adds it to perf trace.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264167929-6741-1-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-trace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8e9cbfe608d6..0b65779e3c10 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -515,6 +515,8 @@ static const struct option options[] = {
 		     parse_scriptname),
 	OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
 		   "generate perf-trace.xx script in specified language"),
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
 
 	OPT_END()
 };

From 339ce1a4dc2ca26444c4f65c31b71a5056f3bb0b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 18 Jan 2010 16:47:07 +1100
Subject: [PATCH 124/640] perf: Fix inconsistency between IP and callchain
 sampling

When running perf across all cpus with backtracing (-a -g), sometimes we
get samples without associated backtraces:

    23.44%         init  [kernel]                     [k] restore
    11.46%         init                       eeba0c  [k] 0x00000000eeba0c
     6.77%      swapper  [kernel]                     [k] .perf_ctx_adjust_freq
     5.73%         init  [kernel]                     [k] .__trace_hcall_entry
     4.69%         perf  libc-2.9.so                  [.] 0x0000000006bb8c
                       |
                       |--11.11%-- 0xfffa941bbbc

It turns out the backtrace code has a check for the idle task and the IP
sampling does not. This creates problems when profiling an interrupt
heavy workload (in my case 10Gbit ethernet) since we get no backtraces
for interrupts received while idle (ie most of the workload).

Right now x86 and sh check that current is not NULL, which should never
happen so remove that too.

Idle task's exclusion must be performed from the core code, on top
of perf_event_attr:exclude_idle.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
LKML-Reference: <20100118054707.GT12666@kryten>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/powerpc/kernel/perf_callchain.c | 3 ---
 arch/sh/kernel/perf_callchain.c      | 3 ---
 arch/x86/kernel/cpu/perf_event.c     | 3 ---
 3 files changed, 9 deletions(-)

diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index a3c11cac3d71..95ad9dad298e 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -495,9 +495,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 
 	entry->nr = 0;
 
-	if (current->pid == 0)		/* idle task? */
-		return entry;
-
 	if (!user_mode(regs)) {
 		perf_callchain_kernel(regs, entry);
 		if (current->mm)
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index 24ea837eac5b..a9dd3abde28e 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -68,9 +68,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
 	is_user = user_mode(regs);
 
-	if (!current || current->pid == 0)
-		return;
-
 	if (is_user && current->state != TASK_RUNNING)
 		return;
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b1bb8c550526..ed1998b28a7c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2425,9 +2425,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
 	is_user = user_mode(regs);
 
-	if (!current || current->pid == 0)
-		return;
-
 	if (is_user && current->state != TASK_RUNNING)
 		return;
 

From 430ad5a600a83956749307b13257c464c3826b55 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Thu, 28 Jan 2010 09:32:29 +0800
Subject: [PATCH 125/640] perf: Factorize trace events raw sample buffer
 operations

Introduce ftrace_perf_buf_prepare() and ftrace_perf_buf_submit() to
gather the common code that operates on raw events sampling buffer.
This cleans up redundant code between regular trace events, syscall
events and kprobe events.

Changelog v1->v2:
- Rename function name as per Masami and Frederic's suggestion
- Add __kprobes for ftrace_perf_buf_prepare() and make
  ftrace_perf_buf_submit() inline as per Masami's suggestion
- Export ftrace_perf_buf_prepare since modules will use it

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B60E92D.9000808@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h       | 18 +++++--
 include/trace/ftrace.h             | 48 +++--------------
 kernel/trace/trace_event_profile.c | 52 ++++++++++++++++--
 kernel/trace/trace_kprobe.c        | 86 ++++--------------------------
 kernel/trace/trace_syscalls.c      | 71 ++++--------------------
 5 files changed, 88 insertions(+), 187 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 0a09e758c7d3..cd95919d9ff3 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -5,6 +5,7 @@
 #include <linux/trace_seq.h>
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
+#include <linux/perf_event.h>
 
 struct trace_array;
 struct tracer;
@@ -138,9 +139,6 @@ struct ftrace_event_call {
 
 #define FTRACE_MAX_PROFILE_SIZE	2048
 
-extern char *perf_trace_buf;
-extern char *perf_trace_buf_nmi;
-
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
@@ -195,6 +193,20 @@ extern void ftrace_profile_disable(int event_id);
 extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
+extern void *
+ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
+			 unsigned long *irq_flags);
+
+static inline void
+ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
+		       u64 count, unsigned long irq_flags)
+{
+	struct trace_entry *entry = raw_data;
+
+	perf_tp_event(entry->type, addr, count, raw_data, size);
+	perf_swevent_put_recursion_context(rctx);
+	local_irq_restore(irq_flags);
+}
 #endif
 
 #endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4a46a60c2077..f2c09e4d656c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -850,22 +850,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 			    proto)					\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	extern int perf_swevent_get_recursion_context(void);		\
-	extern void perf_swevent_put_recursion_context(int rctx);	\
-	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
-	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
-	char *trace_buf;						\
-	char *raw_data;							\
-	int __cpu;							\
 	int rctx;							\
-	int pc;								\
-									\
-	pc = preempt_count();						\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
@@ -875,42 +865,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 	if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,		\
 		      "profile buffer not large enough"))		\
 		return;							\
-									\
-	local_irq_save(irq_flags);					\
-									\
-	rctx = perf_swevent_get_recursion_context();			\
-	if (rctx < 0)							\
-		goto end_recursion;					\
-									\
-	__cpu = smp_processor_id();					\
-									\
-	if (in_nmi())							\
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);	\
-	else								\
-		trace_buf = rcu_dereference(perf_trace_buf);		\
-									\
-	if (!trace_buf)							\
-		goto end;						\
-									\
-	raw_data = per_cpu_ptr(trace_buf, __cpu);			\
-									\
-	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
-	entry = (struct ftrace_raw_##call *)raw_data;			\
-	ent = &entry->ent;						\
-	tracing_generic_entry_update(ent, irq_flags, pc);		\
-	ent->type = event_call->id;					\
-									\
+	entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare(	\
+		__entry_size, event_call->id, &rctx, &irq_flags);	\
+	if (!entry)							\
+		return;							\
 	tstruct								\
 									\
 	{ assign; }							\
 									\
-	perf_tp_event(event_call->id, __addr, __count, entry,		\
-			     __entry_size);				\
-									\
-end:									\
-	perf_swevent_put_recursion_context(rctx);			\
-end_recursion:								\
-	local_irq_restore(irq_flags);					\
+	ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr,	\
+			       __count, irq_flags);			\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 9e25573242cf..f0d693005075 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,14 +6,12 @@
  */
 
 #include <linux/module.h>
+#include <linux/kprobes.h>
 #include "trace.h"
 
 
-char *perf_trace_buf;
-EXPORT_SYMBOL_GPL(perf_trace_buf);
-
-char *perf_trace_buf_nmi;
-EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
+static char *perf_trace_buf;
+static char *perf_trace_buf_nmi;
 
 typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
 
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
 	}
 	mutex_unlock(&event_mutex);
 }
+
+__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
+					int *rctxp, unsigned long *irq_flags)
+{
+	struct trace_entry *entry;
+	char *trace_buf, *raw_data;
+	int pc, cpu;
+
+	pc = preempt_count();
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(*irq_flags);
+
+	*rctxp = perf_swevent_get_recursion_context();
+	if (*rctxp < 0)
+		goto err_recursion;
+
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+	else
+		trace_buf = rcu_dereference(perf_trace_buf);
+
+	if (!trace_buf)
+		goto err;
+
+	raw_data = per_cpu_ptr(trace_buf, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	entry = (struct trace_entry *)raw_data;
+	tracing_generic_entry_update(entry, *irq_flags, pc);
+	entry->type = type;
+
+	return raw_data;
+err:
+	perf_swevent_put_recursion_context(*rctxp);
+err_recursion:
+	local_irq_restore(*irq_flags);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d6266cad6953..2e28ee36646f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kprobe_trace_entry *entry;
-	struct trace_entry *ent;
-	int size, __size, i, pc, __cpu;
+	int size, __size, i;
 	unsigned long irq_flags;
-	char *trace_buf;
-	char *raw_data;
 	int rctx;
 
-	pc = preempt_count();
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
@@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 		     "profile buffer not large enough"))
 		return 0;
 
-	/*
-	 * Protect the non nmi buffer
-	 * This also protects the rcu read side
-	 */
-	local_irq_save(irq_flags);
+	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	if (!entry)
+		return 0;
 
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	__cpu = smp_processor_id();
-
-	if (in_nmi())
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);
-	else
-		trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, __cpu);
-
-	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-	entry = (struct kprobe_trace_entry *)raw_data;
-	ent = &entry->ent;
-
-	tracing_generic_entry_update(ent, irq_flags, pc);
-	ent->type = call->id;
 	entry->nargs = tp->nr_args;
 	entry->ip = (unsigned long)kp->addr;
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
-	perf_tp_event(call->id, entry->ip, 1, entry, size);
 
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
 
 	return 0;
 }
@@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kretprobe_trace_entry *entry;
-	struct trace_entry *ent;
-	int size, __size, i, pc, __cpu;
+	int size, __size, i;
 	unsigned long irq_flags;
-	char *trace_buf;
-	char *raw_data;
 	int rctx;
 
-	pc = preempt_count();
 	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
@@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 		     "profile buffer not large enough"))
 		return 0;
 
-	/*
-	 * Protect the non nmi buffer
-	 * This also protects the rcu read side
-	 */
-	local_irq_save(irq_flags);
+	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	if (!entry)
+		return 0;
 
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	__cpu = smp_processor_id();
-
-	if (in_nmi())
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);
-	else
-		trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, __cpu);
-
-	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-	entry = (struct kretprobe_trace_entry *)raw_data;
-	ent = &entry->ent;
-
-	tracing_generic_entry_update(ent, irq_flags, pc);
-	ent->type = call->id;
 	entry->nargs = tp->nr_args;
 	entry->func = (unsigned long)tp->rp.kp.addr;
 	entry->ret_ip = (unsigned long)ri->ret_addr;
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
-	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
 
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
 
 	return 0;
 }
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f694f66d75b0..4e332b9e449c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_enter *rec;
 	unsigned long flags;
-	char *trace_buf;
-	char *raw_data;
 	int syscall_nr;
 	int rctx;
 	int size;
-	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 		      "profile buffer not large enough"))
 		return;
 
-	/* Protect the per cpu buffer, begin the rcu read side */
-	local_irq_save(flags);
+	rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
+				sys_data->enter_event->id, &rctx, &flags);
+	if (!rec)
+		return;
 
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	cpu = smp_processor_id();
-
-	trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, cpu);
-
-	/* zero the dead bytes from align to not leak stack to user */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
-	rec = (struct syscall_trace_enter *) raw_data;
-	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->enter_event->id;
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
-
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
 }
 
 int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	struct syscall_trace_exit *rec;
 	unsigned long flags;
 	int syscall_nr;
-	char *trace_buf;
-	char *raw_data;
 	int rctx;
 	int size;
-	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 		"exit event has grown above profile buffer size"))
 		return;
 
-	/* Protect the per cpu buffer, begin the rcu read side */
-	local_irq_save(flags);
+	rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
+				sys_data->exit_event->id, &rctx, &flags);
+	if (!rec)
+		return;
 
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	cpu = smp_processor_id();
-
-	trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, cpu);
-
-	/* zero the dead bytes from align to not leak stack to user */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
-	rec = (struct syscall_trace_exit *)raw_data;
-
-	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->exit_event->id;
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
-
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
 }
 
 int prof_sysexit_enable(struct ftrace_event_call *call)

From 1e12a4a7a3a78bc9c3aaf3486dde3b8ab1cdf465 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Thu, 28 Jan 2010 09:34:27 +0800
Subject: [PATCH 126/640] tracing/kprobe: Cleanup unused return value of
 tracing functions

The return values of the kprobe's tracing functions are meaningless,
lets remove these.

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B60E9A3.2040505@cn.fujitsu.com>
[fweisbec@gmail: whitespace fixes, drop useless void returns in end
of functions]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/trace/trace_kprobe.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2e28ee36646f..6178abf3637e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -942,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
 };
 
 /* Kprobe handler */
-static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
+static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct kprobe_trace_entry *entry;
@@ -962,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 	event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
 						  irq_flags, pc);
 	if (!event)
-		return 0;
+		return;
 
 	entry = ring_buffer_event_data(event);
 	entry->nargs = tp->nr_args;
@@ -972,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
 		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
-	return 0;
 }
 
 /* Kretprobe handler */
-static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
 					  struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -995,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
 	event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
 						  irq_flags, pc);
 	if (!event)
-		return 0;
+		return;
 
 	entry = ring_buffer_event_data(event);
 	entry->nargs = tp->nr_args;
@@ -1006,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
 
 	if (!filter_current_check_discard(buffer, call, entry, event))
 		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
-
-	return 0;
 }
 
 /* Event entry printers */
@@ -1237,7 +1234,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static __kprobes int kprobe_profile_func(struct kprobe *kp,
+static __kprobes void kprobe_profile_func(struct kprobe *kp,
 					 struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
@@ -1252,11 +1249,11 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	size -= sizeof(u32);
 	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
 		     "profile buffer not large enough"))
-		return 0;
+		return;
 
 	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
 	if (!entry)
-		return 0;
+		return;
 
 	entry->nargs = tp->nr_args;
 	entry->ip = (unsigned long)kp->addr;
@@ -1264,12 +1261,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
 	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
-
-	return 0;
 }
 
 /* Kretprobe profile handler */
-static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
 					    struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1284,11 +1279,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	size -= sizeof(u32);
 	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
 		     "profile buffer not large enough"))
-		return 0;
+		return;
 
 	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
 	if (!entry)
-		return 0;
+		return;
 
 	entry->nargs = tp->nr_args;
 	entry->func = (unsigned long)tp->rp.kp.addr;
@@ -1297,8 +1292,6 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
 	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
-
-	return 0;
 }
 
 static int probe_profile_enable(struct ftrace_event_call *call)

From 40f9249a73f6c251adea492b1c3d19d39e2a9bda Mon Sep 17 00:00:00 2001
From: "K.Prasad" <prasad@linux.vnet.ibm.com>
Date: Thu, 28 Jan 2010 16:44:01 +0530
Subject: [PATCH 127/640] x86/debug: Clear reserved bits of DR6 in do_debug()

Clear the reserved bits from the stored copy of debug status
register (DR6).
This will help easy bitwise operations such as quick testing
of a debug event origin.

Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20100128111401.GB13935@in.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/debugreg.h | 3 +++
 arch/x86/kernel/traps.c         | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 8240f76b531e..b81002f23614 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -14,6 +14,9 @@
    which debugging register was responsible for the trap.  The other bits
    are either reserved or not of interest to us. */
 
+/* Define reserved bits in DR6 which are always set to 1 */
+#define DR6_RESERVED	(0xFFFF0FF0)
+
 #define DR_TRAP0	(0x1)		/* db0 */
 #define DR_TRAP1	(0x2)		/* db1 */
 #define DR_TRAP2	(0x4)		/* db2 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 33399176512a..1168e4454188 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
 	get_debugreg(dr6, 6);
 
+	/* Filter out all the reserved bits which are preset to 1 */
+	dr6 &= ~DR6_RESERVED;
+
 	/* Catch kmemcheck conditions first of all! */
 	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
 		return;

From e0e53db6133c32964fd17f20b17073a402f07ed3 Mon Sep 17 00:00:00 2001
From: "K.Prasad" <prasad@linux.vnet.ibm.com>
Date: Thu, 28 Jan 2010 16:44:15 +0530
Subject: [PATCH 128/640] x86/hw-breakpoints: Optimize return code from
 notifier chain in hw_breakpoint_handler

Processing of debug exceptions in do_debug() can stop if it
originated from a hw-breakpoint exception by returning NOTIFY_STOP
in most cases.

But for certain cases such as:

a) user-space breakpoints with pending SIGTRAP signal delivery (as
in the case of ptrace induced breakpoints).

b) exceptions due to other causes than breakpoints

We will continue to process the exception by returning NOTIFY_DONE.

Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
LKML-Reference: <20100128111415.GC13935@in.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/hw_breakpoint.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 05d5fec64a94..ae90b4739435 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -502,8 +502,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 		rcu_read_lock();
 
 		bp = per_cpu(bp_per_reg[i], cpu);
-		if (bp)
-			rc = NOTIFY_DONE;
 		/*
 		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
 		 * exception handling
@@ -522,7 +520,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 
 		rcu_read_unlock();
 	}
-	if (dr6 & (~DR_TRAP_BITS))
+	/*
+	 * Further processing in do_debug() is needed for a) user-space
+	 * breakpoints (to generate signals) and b) when the system has
+	 * taken exception due to multiple causes
+	 */
+	if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
+	    (dr6 & (~DR_TRAP_BITS)))
 		rc = NOTIFY_DONE;
 
 	set_debugreg(dr7, 7);

From 1da53e023029c067ba1277a33038c65d6e4c99b3 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 18 Jan 2010 10:58:01 +0200
Subject: [PATCH 129/640] perf_events, x86: Improve x86 event scheduling

This patch improves event scheduling by maximizing the use of PMU
registers regardless of the order in which events are created in a group.

The algorithm takes into account the list of counter constraints for each
event. It assigns events to counters from the most constrained, i.e.,
works on only one counter, to the least constrained, i.e., works on any
counter.

Intel Fixed counter events and the BTS special event are also handled via
this algorithm which is designed to be fairly generic.

The patch also updates the validation of an event to use the scheduling
algorithm. This will cause early failure in perf_event_open().

The 2nd version of this patch follows the model used by PPC, by running
the scheduling algorithm and the actual assignment separately. Actual
assignment takes place in hw_perf_enable() whereas scheduling is
implemented in hw_perf_group_sched_in() and x86_pmu_enable().

Signed-off-by: Stephane Eranian <eranian@google.com>
[ fixup whitespace and style nits as well as adding is_x86_event() ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4b5430c6.0f975e0a.1bf9.ffff85fe@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h |  16 +-
 arch/x86/kernel/cpu/perf_event.c  | 771 +++++++++++++++++++++---------
 2 files changed, 572 insertions(+), 215 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8d9f8548a870..dbc082685d52 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -26,7 +26,14 @@
 /*
  * Includes eventsel and unit mask as well:
  */
-#define ARCH_PERFMON_EVENT_MASK				    0xffff
+
+
+#define INTEL_ARCH_EVTSEL_MASK		0x000000FFULL
+#define INTEL_ARCH_UNIT_MASK		0x0000FF00ULL
+#define INTEL_ARCH_EDGE_MASK		0x00040000ULL
+#define INTEL_ARCH_INV_MASK		0x00800000ULL
+#define INTEL_ARCH_CNT_MASK		0xFF000000ULL
+#define INTEL_ARCH_EVENT_MASK	(INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
 
 /*
  * filter mask to validate fixed counter events.
@@ -37,7 +44,12 @@
  *  The other filters are supported by fixed counters.
  *  The any-thread option is supported starting with v3.
  */
-#define ARCH_PERFMON_EVENT_FILTER_MASK			0xff840000
+#define INTEL_ARCH_FIXED_MASK \
+	(INTEL_ARCH_CNT_MASK| \
+	 INTEL_ARCH_INV_MASK| \
+	 INTEL_ARCH_EDGE_MASK|\
+	 INTEL_ARCH_UNIT_MASK|\
+	 INTEL_ARCH_EVENT_MASK)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed1998b28a7c..995ac4ae379c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
  *
  *  For licencing details see kernel-base/COPYING
  */
@@ -68,26 +69,37 @@ struct debug_store {
 	u64	pebs_event_reset[MAX_PEBS_EVENTS];
 };
 
+#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+
+struct event_constraint {
+	u64	idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)];
+	int	code;
+	int	cmask;
+};
+
 struct cpu_hw_events {
-	struct perf_event	*events[X86_PMC_IDX_MAX];
-	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
 	int			enabled;
 	struct debug_store	*ds;
+
+	int			n_events;
+	int			n_added;
+	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
-struct event_constraint {
-	unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	int		code;
-};
+#define EVENT_CONSTRAINT(c, n, m) { \
+	.code = (c),	\
+	.cmask = (m),	\
+	.idxmsk[0] = (n) }
 
-#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
-#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
+#define EVENT_CONSTRAINT_END \
+	{ .code = 0, .cmask = 0, .idxmsk[0] = 0 }
 
 #define for_each_event_constraint(e, c) \
-	for ((e) = (c); (e)->idxmsk[0]; (e)++)
-
+	for ((e) = (c); (e)->cmask; (e)++)
 
 /*
  * struct x86_pmu - generic x86 pmu
@@ -114,8 +126,9 @@ struct x86_pmu {
 	u64		intel_ctrl;
 	void		(*enable_bts)(u64 config);
 	void		(*disable_bts)(void);
-	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
-					 struct hw_perf_event *hwc);
+	void		(*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk);
+	void		(*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event);
+	const struct event_constraint *event_constraints;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -124,7 +137,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
 
-static const struct event_constraint *event_constraints;
+static int x86_perf_event_set_period(struct perf_event *event,
+			     struct hw_perf_event *hwc, int idx);
 
 /*
  * Not sure about some of these
@@ -171,14 +185,14 @@ static u64 p6_pmu_raw_event(u64 hw_event)
 	return hw_event & P6_EVNTSEL_MASK;
 }
 
-static const struct event_constraint intel_p6_event_constraints[] =
+static struct event_constraint intel_p6_event_constraints[] =
 {
-	EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
-	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
-	EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
-	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
-	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
-	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
+	EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK),	/* FLOPS */
+	EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK),	/* FP_COMP_OPS_EXE */
+	EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK),	/* FP_ASSIST */
+	EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK),	/* MUL */
+	EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK),	/* DIV */
+	EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK),	/* CYCLES_DIV_BUSY */
 	EVENT_CONSTRAINT_END
 };
 
@@ -196,32 +210,43 @@ static const u64 intel_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
-static const struct event_constraint intel_core_event_constraints[] =
+static struct event_constraint intel_core_event_constraints[] =
 {
-	EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
-	EVENT_CONSTRAINT(0x11, 0x2),	/* FP_ASSIST */
-	EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
-	EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
-	EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
-	EVENT_CONSTRAINT(0x18, 0x1),	/* IDLE_DURING_DIV */
-	EVENT_CONSTRAINT(0x19, 0x2),	/* DELAYED_BYPASS */
-	EVENT_CONSTRAINT(0xa1, 0x1),	/* RS_UOPS_DISPATCH_CYCLES */
-	EVENT_CONSTRAINT(0xcb, 0x1),	/* MEM_LOAD_RETIRED */
+	EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
+	EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
+	EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */
+	EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */
+	EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */
+	EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */
+	EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */
+	EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */
+	EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */
+	EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */
+	EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */
 	EVENT_CONSTRAINT_END
 };
 
-static const struct event_constraint intel_nehalem_event_constraints[] =
+static struct event_constraint intel_nehalem_event_constraints[] =
 {
-	EVENT_CONSTRAINT(0x40, 0x3),	/* L1D_CACHE_LD */
-	EVENT_CONSTRAINT(0x41, 0x3),	/* L1D_CACHE_ST */
-	EVENT_CONSTRAINT(0x42, 0x3),	/* L1D_CACHE_LOCK */
-	EVENT_CONSTRAINT(0x43, 0x3),	/* L1D_ALL_REF */
-	EVENT_CONSTRAINT(0x4e, 0x3),	/* L1D_PREFETCH */
-	EVENT_CONSTRAINT(0x4c, 0x3),	/* LOAD_HIT_PRE */
-	EVENT_CONSTRAINT(0x51, 0x3),	/* L1D */
-	EVENT_CONSTRAINT(0x52, 0x3),	/* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
-	EVENT_CONSTRAINT(0x53, 0x3),	/* L1D_CACHE_LOCK_FB_HIT */
-	EVENT_CONSTRAINT(0xc5, 0x3),	/* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
+	EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
+	EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */
+	EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */
+	EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */
+	EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */
+	EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */
+	EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */
+	EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */
+	EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
+	EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */
+	EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] =
+{
+	EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
+	EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
 	EVENT_CONSTRAINT_END
 };
 
@@ -527,11 +552,11 @@ static u64 intel_pmu_raw_event(u64 hw_event)
 #define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
 
 #define CORE_EVNTSEL_MASK		\
-	(CORE_EVNTSEL_EVENT_MASK |	\
-	 CORE_EVNTSEL_UNIT_MASK  |	\
-	 CORE_EVNTSEL_EDGE_MASK  |	\
-	 CORE_EVNTSEL_INV_MASK  |	\
-	 CORE_EVNTSEL_REG_MASK)
+	(INTEL_ARCH_EVTSEL_MASK |	\
+	 INTEL_ARCH_UNIT_MASK   |	\
+	 INTEL_ARCH_EDGE_MASK   |	\
+	 INTEL_ARCH_INV_MASK    |	\
+	 INTEL_ARCH_CNT_MASK)
 
 	return hw_event & CORE_EVNTSEL_MASK;
 }
@@ -1120,9 +1145,15 @@ static void amd_pmu_disable_all(void)
 
 void hw_perf_disable(void)
 {
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
 	if (!x86_pmu_initialized())
 		return;
-	return x86_pmu.disable_all();
+
+	if (cpuc->enabled)
+		cpuc->n_added = 0;
+
+	x86_pmu.disable_all();
 }
 
 static void p6_pmu_enable_all(void)
@@ -1189,10 +1220,237 @@ static void amd_pmu_enable_all(void)
 	}
 }
 
+static const struct pmu pmu;
+
+static inline int is_x86_event(struct perf_event *event)
+{
+	return event->pmu == &pmu;
+}
+
+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+	int i, j , w, num;
+	int weight, wmax;
+	unsigned long *c;
+	u64 constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	struct hw_perf_event *hwc;
+
+	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+	for (i = 0; i < n; i++) {
+		x86_pmu.get_event_constraints(cpuc,
+					      cpuc->event_list[i],
+					      constraints[i]);
+	}
+
+	/*
+	 * weight = number of possible counters
+	 *
+	 * 1    = most constrained, only works on one counter
+	 * wmax = least constrained, works on any counter
+	 *
+	 * assign events to counters starting with most
+	 * constrained events.
+	 */
+	wmax = x86_pmu.num_events;
+
+	/*
+	 * when fixed event counters are present,
+	 * wmax is incremented by 1 to account
+	 * for one more choice
+	 */
+	if (x86_pmu.num_events_fixed)
+		wmax++;
+
+	num = n;
+	for (w = 1; num && w <= wmax; w++) {
+		/* for each event */
+		for (i = 0; i < n; i++) {
+			c = (unsigned long *)constraints[i];
+			hwc = &cpuc->event_list[i]->hw;
+
+			weight = bitmap_weight(c, X86_PMC_IDX_MAX);
+			if (weight != w)
+				continue;
+
+			/*
+			 * try to reuse previous assignment
+			 *
+			 * This is possible despite the fact that
+			 * events or events order may have changed.
+			 *
+			 * What matters is the level of constraints
+			 * of an event and this is constant for now.
+			 *
+			 * This is possible also because we always
+			 * scan from most to least constrained. Thus,
+			 * if a counter can be reused, it means no,
+			 * more constrained events, needed it. And
+			 * next events will either compete for it
+			 * (which cannot be solved anyway) or they
+			 * have fewer constraints, and they can use
+			 * another counter.
+			 */
+			j = hwc->idx;
+			if (j != -1 && !test_bit(j, used_mask))
+				goto skip;
+
+			for_each_bit(j, c, X86_PMC_IDX_MAX) {
+				if (!test_bit(j, used_mask))
+					break;
+			}
+
+			if (j == X86_PMC_IDX_MAX)
+				break;
+skip:
+			set_bit(j, used_mask);
+
+#if 0
+			pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n",
+				smp_processor_id(),
+				hwc->config,
+				j,
+				assign ? 'y' : 'n');
+#endif
+
+			if (assign)
+				assign[i] = j;
+			num--;
+		}
+	}
+	/*
+	 * scheduling failed or is just a simulation,
+	 * free resources if necessary
+	 */
+	if (!assign || num) {
+		for (i = 0; i < n; i++) {
+			if (x86_pmu.put_event_constraints)
+				x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+		}
+	}
+	return num ? -ENOSPC : 0;
+}
+
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+{
+	struct perf_event *event;
+	int n, max_count;
+
+	max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+
+	/* current number of events already accepted */
+	n = cpuc->n_events;
+
+	if (is_x86_event(leader)) {
+		if (n >= max_count)
+			return -ENOSPC;
+		cpuc->event_list[n] = leader;
+		n++;
+	}
+	if (!dogrp)
+		return n;
+
+	list_for_each_entry(event, &leader->sibling_list, group_entry) {
+		if (!is_x86_event(event) ||
+		    event->state == PERF_EVENT_STATE_OFF)
+			continue;
+
+		if (n >= max_count)
+			return -ENOSPC;
+
+		cpuc->event_list[n] = event;
+		n++;
+	}
+	return n;
+}
+
+
+static inline void x86_assign_hw_event(struct perf_event *event,
+				struct hw_perf_event *hwc, int idx)
+{
+	hwc->idx = idx;
+
+	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+		hwc->config_base = 0;
+		hwc->event_base	= 0;
+	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		/*
+		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
+		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+		 */
+		hwc->event_base =
+			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+	} else {
+		hwc->config_base = x86_pmu.eventsel;
+		hwc->event_base  = x86_pmu.perfctr;
+	}
+}
+
 void hw_perf_enable(void)
 {
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int i;
+
 	if (!x86_pmu_initialized())
 		return;
+	if (cpuc->n_added) {
+		/*
+		 * apply assignment obtained either from
+		 * hw_perf_group_sched_in() or x86_pmu_enable()
+		 *
+		 * step1: save events moving to new counters
+		 * step2: reprogram moved events into new counters
+		 */
+		for (i = 0; i < cpuc->n_events; i++) {
+
+			event = cpuc->event_list[i];
+			hwc = &event->hw;
+
+			if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
+				continue;
+
+			x86_pmu.disable(hwc, hwc->idx);
+
+			clear_bit(hwc->idx, cpuc->active_mask);
+			barrier();
+			cpuc->events[hwc->idx] = NULL;
+
+			x86_perf_event_update(event, hwc, hwc->idx);
+
+			hwc->idx = -1;
+		}
+
+		for (i = 0; i < cpuc->n_events; i++) {
+
+			event = cpuc->event_list[i];
+			hwc = &event->hw;
+
+			if (hwc->idx == -1) {
+				x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+				x86_perf_event_set_period(event, hwc, hwc->idx);
+			}
+			/*
+			 * need to mark as active because x86_pmu_disable()
+			 * clear active_mask and eventsp[] yet it preserves
+			 * idx
+			 */
+			set_bit(hwc->idx, cpuc->active_mask);
+			cpuc->events[hwc->idx] = event;
+
+			x86_pmu.enable(hwc, hwc->idx);
+			perf_event_update_userpage(event);
+		}
+		cpuc->n_added = 0;
+		perf_events_lapic_init();
+	}
 	x86_pmu.enable_all();
 }
 
@@ -1391,148 +1649,43 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		x86_pmu_enable_event(hwc, idx);
 }
 
-static int fixed_mode_idx(struct hw_perf_event *hwc)
-{
-	unsigned int hw_event;
-
-	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
-	if (unlikely((hw_event ==
-		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
-		     (hwc->sample_period == 1)))
-		return X86_PMC_IDX_FIXED_BTS;
-
-	if (!x86_pmu.num_events_fixed)
-		return -1;
-
-	/*
-	 * fixed counters do not take all possible filters
-	 */
-	if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
-		return -1;
-
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
-		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
-		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
-		return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
-	return -1;
-}
-
 /*
- * generic counter allocator: get next free counter
- */
-static int
-gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
-	int idx;
-
-	idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
-	return idx == x86_pmu.num_events ? -1 : idx;
-}
-
-/*
- * intel-specific counter allocator: check event constraints
- */
-static int
-intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
-	const struct event_constraint *event_constraint;
-	int i, code;
-
-	if (!event_constraints)
-		goto skip;
-
-	code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
-
-	for_each_event_constraint(event_constraint, event_constraints) {
-		if (code == event_constraint->code) {
-			for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
-				if (!test_and_set_bit(i, cpuc->used_mask))
-					return i;
-			}
-			return -1;
-		}
-	}
-skip:
-	return gen_get_event_idx(cpuc, hwc);
-}
-
-static int
-x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
-	int idx;
-
-	idx = fixed_mode_idx(hwc);
-	if (idx == X86_PMC_IDX_FIXED_BTS) {
-		/* BTS is already occupied. */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			return -EAGAIN;
-
-		hwc->config_base	= 0;
-		hwc->event_base		= 0;
-		hwc->idx		= idx;
-	} else if (idx >= 0) {
-		/*
-		 * Try to get the fixed event, if that is already taken
-		 * then try to get a generic event:
-		 */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			goto try_generic;
-
-		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		/*
-		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
-		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
-		 */
-		hwc->event_base =
-			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
-		hwc->idx = idx;
-	} else {
-		idx = hwc->idx;
-		/* Try to get the previous generic event again */
-		if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
-			idx = x86_pmu.get_event_idx(cpuc, hwc);
-			if (idx == -1)
-				return -EAGAIN;
-
-			set_bit(idx, cpuc->used_mask);
-			hwc->idx = idx;
-		}
-		hwc->config_base = x86_pmu.eventsel;
-		hwc->event_base  = x86_pmu.perfctr;
-	}
-
-	return idx;
-}
-
-/*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * activate a single event
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ *
+ * Called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
  */
 static int x86_pmu_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx;
+	struct hw_perf_event *hwc;
+	int assign[X86_PMC_IDX_MAX];
+	int n, n0, ret;
 
-	idx = x86_schedule_event(cpuc, hwc);
-	if (idx < 0)
-		return idx;
+	hwc = &event->hw;
 
-	perf_events_lapic_init();
+	n0 = cpuc->n_events;
+	n = collect_events(cpuc, event, false);
+	if (n < 0)
+		return n;
 
-	x86_pmu.disable(hwc, idx);
+	ret = x86_schedule_events(cpuc, n, assign);
+	if (ret)
+		return ret;
+	/*
+	 * copy new assignment, now we know it is possible
+	 * will be used by hw_perf_enable()
+	 */
+	memcpy(cpuc->assign, assign, n*sizeof(int));
 
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
+	cpuc->n_events = n;
+	cpuc->n_added  = n - n0;
 
-	x86_perf_event_set_period(event, hwc, idx);
-	x86_pmu.enable(hwc, idx);
-
-	perf_event_update_userpage(event);
+	if (hwc->idx != -1)
+		x86_perf_event_set_period(event, hwc, hwc->idx);
 
 	return 0;
 }
@@ -1576,7 +1729,7 @@ void perf_event_print_debug(void)
 		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 	}
-	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+	pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1664,7 +1817,7 @@ static void x86_pmu_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
+	int i, idx = hwc->idx;
 
 	/*
 	 * Must be done before we disable, otherwise the nmi handler
@@ -1690,8 +1843,19 @@ static void x86_pmu_disable(struct perf_event *event)
 		intel_pmu_drain_bts_buffer(cpuc);
 
 	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
 
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (event == cpuc->event_list[i]) {
+
+			if (x86_pmu.put_event_constraints)
+				x86_pmu.put_event_constraints(cpuc, event);
+
+			while (++i < cpuc->n_events)
+				cpuc->event_list[i-1] = cpuc->event_list[i];
+
+			--cpuc->n_events;
+		}
+	}
 	perf_event_update_userpage(event);
 }
 
@@ -1962,6 +2126,176 @@ perf_event_nmi_handler(struct notifier_block *self,
 	return NOTIFY_STOP;
 }
 
+static struct event_constraint bts_constraint = {
+	.code = 0,
+	.cmask = 0,
+	.idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS
+};
+
+static int intel_special_constraints(struct perf_event *event,
+				     u64 *idxmsk)
+{
+	unsigned int hw_event;
+
+	hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+
+	if (unlikely((hw_event ==
+		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+		     (event->hw.sample_period == 1))) {
+
+		bitmap_copy((unsigned long *)idxmsk,
+			    (unsigned long *)bts_constraint.idxmsk,
+			    X86_PMC_IDX_MAX);
+		return 1;
+	}
+	return 0;
+}
+
+static void intel_get_event_constraints(struct cpu_hw_events *cpuc,
+					struct perf_event *event,
+					u64 *idxmsk)
+{
+	const struct event_constraint *c;
+
+	/*
+	 * cleanup bitmask
+	 */
+	bitmap_zero((unsigned long *)idxmsk, X86_PMC_IDX_MAX);
+
+	if (intel_special_constraints(event, idxmsk))
+		return;
+
+	if (x86_pmu.event_constraints) {
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			if ((event->hw.config & c->cmask) == c->code) {
+
+				bitmap_copy((unsigned long *)idxmsk,
+					    (unsigned long *)c->idxmsk,
+					    X86_PMC_IDX_MAX);
+				return;
+			}
+		}
+	}
+	/* no constraints, means supports all generic counters */
+	bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
+}
+
+static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
+				      struct perf_event *event,
+				      u64 *idxmsk)
+{
+}
+
+static int x86_event_sched_in(struct perf_event *event,
+			  struct perf_cpu_context *cpuctx, int cpu)
+{
+	int ret = 0;
+
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;
+	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+
+	if (!is_x86_event(event))
+		ret = event->pmu->enable(event);
+
+	if (!ret && !is_software_event(event))
+		cpuctx->active_oncpu++;
+
+	if (!ret && event->attr.exclusive)
+		cpuctx->exclusive = 1;
+
+	return ret;
+}
+
+static void x86_event_sched_out(struct perf_event *event,
+			    struct perf_cpu_context *cpuctx, int cpu)
+{
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	event->oncpu = -1;
+
+	if (!is_x86_event(event))
+		event->pmu->disable(event);
+
+	event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
+
+	if (!is_software_event(event))
+		cpuctx->active_oncpu--;
+
+	if (event->attr.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ *
+ * called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
+ */
+int hw_perf_group_sched_in(struct perf_event *leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu)
+{
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	struct perf_event *sub;
+	int assign[X86_PMC_IDX_MAX];
+	int n0, n1, ret;
+
+	/* n0 = total number of events */
+	n0 = collect_events(cpuc, leader, true);
+	if (n0 < 0)
+		return n0;
+
+	ret = x86_schedule_events(cpuc, n0, assign);
+	if (ret)
+		return ret;
+
+	ret = x86_event_sched_in(leader, cpuctx, cpu);
+	if (ret)
+		return ret;
+
+	n1 = 1;
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		if (sub->state != PERF_EVENT_STATE_OFF) {
+			ret = x86_event_sched_in(sub, cpuctx, cpu);
+			if (ret)
+				goto undo;
+			++n1;
+		}
+	}
+	/*
+	 * copy new assignment, now we know it is possible
+	 * will be used by hw_perf_enable()
+	 */
+	memcpy(cpuc->assign, assign, n0*sizeof(int));
+
+	cpuc->n_events  = n0;
+	cpuc->n_added   = n1;
+	ctx->nr_active += n1;
+
+	/*
+	 * 1 means successful and events are active
+	 * This is not quite true because we defer
+	 * actual activation until hw_perf_enable() but
+	 * this way we* ensure caller won't try to enable
+	 * individual events
+	 */
+	return 1;
+undo:
+	x86_event_sched_out(leader, cpuctx, cpu);
+	n0  = 1;
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+			x86_event_sched_out(sub, cpuctx, cpu);
+			if (++n0 == n1)
+				break;
+		}
+	}
+	return ret;
+}
+
 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
 	.notifier_call		= perf_event_nmi_handler,
 	.next			= NULL,
@@ -1993,7 +2327,8 @@ static __initconst struct x86_pmu p6_pmu = {
 	 */
 	.event_bits		= 32,
 	.event_mask		= (1ULL << 32) - 1,
-	.get_event_idx		= intel_get_event_idx,
+	.get_event_constraints	= intel_get_event_constraints,
+	.event_constraints	= intel_p6_event_constraints
 };
 
 static __initconst struct x86_pmu intel_pmu = {
@@ -2017,7 +2352,7 @@ static __initconst struct x86_pmu intel_pmu = {
 	.max_period		= (1ULL << 31) - 1,
 	.enable_bts		= intel_pmu_enable_bts,
 	.disable_bts		= intel_pmu_disable_bts,
-	.get_event_idx		= intel_get_event_idx,
+	.get_event_constraints	= intel_get_event_constraints
 };
 
 static __initconst struct x86_pmu amd_pmu = {
@@ -2038,7 +2373,7 @@ static __initconst struct x86_pmu amd_pmu = {
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
-	.get_event_idx		= gen_get_event_idx,
+	.get_event_constraints	= amd_get_event_constraints
 };
 
 static __init int p6_pmu_init(void)
@@ -2051,12 +2386,9 @@ static __init int p6_pmu_init(void)
 	case 7:
 	case 8:
 	case 11: /* Pentium III */
-		event_constraints = intel_p6_event_constraints;
-		break;
 	case 9:
 	case 13:
 		/* Pentium M */
-		event_constraints = intel_p6_event_constraints;
 		break;
 	default:
 		pr_cont("unsupported p6 CPU model %d ",
@@ -2121,23 +2453,29 @@ static __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
+		x86_pmu.event_constraints = intel_core_event_constraints;
 		pr_cont("Core2 events, ");
-		event_constraints = intel_core_event_constraints;
 		break;
-	default:
 	case 26:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
-		event_constraints = intel_nehalem_event_constraints;
+		x86_pmu.event_constraints = intel_nehalem_event_constraints;
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
 	case 28:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
+		x86_pmu.event_constraints = intel_gen_event_constraints;
 		pr_cont("Atom events, ");
 		break;
+	default:
+		/*
+		 * default constraints for v2 and up
+		 */
+		x86_pmu.event_constraints = intel_gen_event_constraints;
+		pr_cont("generic architected perfmon, ");
 	}
 	return 0;
 }
@@ -2234,36 +2572,43 @@ static const struct pmu pmu = {
 	.unthrottle	= x86_pmu_unthrottle,
 };
 
-static int
-validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
-	struct hw_perf_event fake_event = event->hw;
-
-	if (event->pmu && event->pmu != &pmu)
-		return 0;
-
-	return x86_schedule_event(cpuc, &fake_event) >= 0;
-}
-
+/*
+ * validate a single event group
+ *
+ * validation include:
+ * 	- check events are compatible which each other
+ * 	- events do not compete for the same counter
+ * 	- number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
 static int validate_group(struct perf_event *event)
 {
-	struct perf_event *sibling, *leader = event->group_leader;
-	struct cpu_hw_events fake_pmu;
+	struct perf_event *leader = event->group_leader;
+	struct cpu_hw_events fake_cpuc;
+	int n;
 
-	memset(&fake_pmu, 0, sizeof(fake_pmu));
+	memset(&fake_cpuc, 0, sizeof(fake_cpuc));
 
-	if (!validate_event(&fake_pmu, leader))
+	/*
+	 * the event is not yet connected with its
+	 * siblings therefore we must first collect
+	 * existing siblings, then add the new event
+	 * before we can simulate the scheduling
+	 */
+	n = collect_events(&fake_cpuc, leader, true);
+	if (n < 0)
 		return -ENOSPC;
 
-	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(&fake_pmu, sibling))
-			return -ENOSPC;
-	}
-
-	if (!validate_event(&fake_pmu, event))
+	fake_cpuc.n_events = n;
+	n = collect_events(&fake_cpuc, event, false);
+	if (n < 0)
 		return -ENOSPC;
 
-	return 0;
+	fake_cpuc.n_events = n;
+
+	return x86_schedule_events(&fake_cpuc, n, NULL);
 }
 
 const struct pmu *hw_perf_event_init(struct perf_event *event)

From 8113070d6639d2245c6c79afb8df42cedab30540 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 21 Jan 2010 17:39:01 +0200
Subject: [PATCH 130/640] perf_events: Add fast-path to the rescheduling code

Implement correct fastpath scheduling, i.e., reuse previous assignment.

Signed-off-by: Stephane Eranian <eranian@google.com>
[ split from larger patch]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4b588464.1818d00a.4456.383b@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 91 +++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 995ac4ae379c..0bd23d01af34 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1244,6 +1244,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 					      constraints[i]);
 	}
 
+	/*
+	 * fastpath, try to reuse previous register
+	 */
+	for (i = 0, num = n; i < n; i++, num--) {
+		hwc = &cpuc->event_list[i]->hw;
+		c = (unsigned long *)constraints[i];
+
+		/* never assigned */
+		if (hwc->idx == -1)
+			break;
+
+		/* constraint still honored */
+		if (!test_bit(hwc->idx, c))
+			break;
+
+		/* not already used */
+		if (test_bit(hwc->idx, used_mask))
+			break;
+
+#if 0
+		pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
+			 smp_processor_id(),
+			 hwc->config,
+			 hwc->idx,
+			 assign ? 'y' : 'n');
+#endif
+
+		set_bit(hwc->idx, used_mask);
+		if (assign)
+			assign[i] = hwc->idx;
+	}
+	if (!num)
+		goto done;
+
+	/*
+	 * begin slow path
+	 */
+
+	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
 	/*
 	 * weight = number of possible counters
 	 *
@@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	if (x86_pmu.num_events_fixed)
 		wmax++;
 
-	num = n;
-	for (w = 1; num && w <= wmax; w++) {
+	for (w = 1, num = n; num && w <= wmax; w++) {
 		/* for each event */
-		for (i = 0; i < n; i++) {
+		for (i = 0; num && i < n; i++) {
 			c = (unsigned long *)constraints[i];
 			hwc = &cpuc->event_list[i]->hw;
 
@@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			if (weight != w)
 				continue;
 
-			/*
-			 * try to reuse previous assignment
-			 *
-			 * This is possible despite the fact that
-			 * events or events order may have changed.
-			 *
-			 * What matters is the level of constraints
-			 * of an event and this is constant for now.
-			 *
-			 * This is possible also because we always
-			 * scan from most to least constrained. Thus,
-			 * if a counter can be reused, it means no,
-			 * more constrained events, needed it. And
-			 * next events will either compete for it
-			 * (which cannot be solved anyway) or they
-			 * have fewer constraints, and they can use
-			 * another counter.
-			 */
-			j = hwc->idx;
-			if (j != -1 && !test_bit(j, used_mask))
-				goto skip;
-
 			for_each_bit(j, c, X86_PMC_IDX_MAX) {
 				if (!test_bit(j, used_mask))
 					break;
@@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 
 			if (j == X86_PMC_IDX_MAX)
 				break;
-skip:
-			set_bit(j, used_mask);
 
 #if 0
-			pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n",
+			pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
 				smp_processor_id(),
 				hwc->config,
 				j,
 				assign ? 'y' : 'n');
 #endif
 
+			set_bit(j, used_mask);
+
 			if (assign)
 				assign[i] = j;
 			num--;
 		}
 	}
+done:
 	/*
 	 * scheduling failed or is just a simulation,
 	 * free resources if necessary
@@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
 		if (!is_x86_event(event) ||
-		    event->state == PERF_EVENT_STATE_OFF)
+		    event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 
 		if (n >= max_count)
@@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
 				      struct perf_event *event,
 				      u64 *idxmsk)
 {
+	/* no constraints, means supports all generic counters */
+	bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
 }
 
 static int x86_event_sched_in(struct perf_event *event,
@@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
 
 	n1 = 1;
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		if (sub->state != PERF_EVENT_STATE_OFF) {
+		if (sub->state > PERF_EVENT_STATE_OFF) {
 			ret = x86_event_sched_in(sub, cpuctx, cpu);
 			if (ret)
 				goto undo;
@@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event)
 
 const struct pmu *hw_perf_event_init(struct perf_event *event)
 {
+	const struct pmu *tmp;
 	int err;
 
 	err = __hw_perf_event_init(event);
 	if (!err) {
+		/*
+		 * we temporarily connect event to its pmu
+		 * such that validate_group() can classify
+		 * it as an x86 event using is_x86_event()
+		 */
+		tmp = event->pmu;
+		event->pmu = &pmu;
+
 		if (event->group_leader != event)
 			err = validate_group(event);
+
+		event->pmu = tmp;
 	}
 	if (err) {
 		if (event->destroy)

From 502568d563bcc37ac505a83341c0c95b88c015a8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 14:35:46 +0100
Subject: [PATCH 131/640] perf_event: x86: Allocate the fake_cpuc

GCC was complaining the stack usage was too large, so allocate the
structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100122155535.411197266@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0bd23d01af34..7bd359a57839 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2606,10 +2606,13 @@ static const struct pmu pmu = {
 static int validate_group(struct perf_event *event)
 {
 	struct perf_event *leader = event->group_leader;
-	struct cpu_hw_events fake_cpuc;
-	int n;
+	struct cpu_hw_events *fake_cpuc;
+	int ret, n;
 
-	memset(&fake_cpuc, 0, sizeof(fake_cpuc));
+	ret = -ENOMEM;
+	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+	if (!fake_cpuc)
+		goto out;
 
 	/*
 	 * the event is not yet connected with its
@@ -2617,18 +2620,24 @@ static int validate_group(struct perf_event *event)
 	 * existing siblings, then add the new event
 	 * before we can simulate the scheduling
 	 */
-	n = collect_events(&fake_cpuc, leader, true);
+	ret = -ENOSPC;
+	n = collect_events(fake_cpuc, leader, true);
 	if (n < 0)
-		return -ENOSPC;
+		goto out_free;
 
-	fake_cpuc.n_events = n;
-	n = collect_events(&fake_cpuc, event, false);
+	fake_cpuc->n_events = n;
+	n = collect_events(fake_cpuc, event, false);
 	if (n < 0)
-		return -ENOSPC;
+		goto out_free;
 
-	fake_cpuc.n_events = n;
+	fake_cpuc->n_events = n;
 
-	return x86_schedule_events(&fake_cpuc, n, NULL);
+	ret = x86_schedule_events(fake_cpuc, n, NULL);
+
+out_free:
+	kfree(fake_cpuc);
+out:
+	return ret;
 }
 
 const struct pmu *hw_perf_event_init(struct perf_event *event)

From 81269a085669b5130058a0275aa7ba9f94abd1fa Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 14:55:22 +0100
Subject: [PATCH 132/640] perf_event: x86: Fixup constraints typing issue

Constraints gets defined an u64 but in long quantities and then cast to
long.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100122155535.504916780@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 7bd359a57839..7e181a5097ea 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1232,7 +1232,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	int i, j , w, num;
 	int weight, wmax;
 	unsigned long *c;
-	u64 constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct hw_perf_event *hwc;
 
@@ -1249,7 +1249,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	 */
 	for (i = 0, num = n; i < n; i++, num--) {
 		hwc = &cpuc->event_list[i]->hw;
-		c = (unsigned long *)constraints[i];
+		c = constraints[i];
 
 		/* never assigned */
 		if (hwc->idx == -1)
@@ -1306,7 +1306,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	for (w = 1, num = n; num && w <= wmax; w++) {
 		/* for each event */
 		for (i = 0; num && i < n; i++) {
-			c = (unsigned long *)constraints[i];
+			c = constraints[i];
 			hwc = &cpuc->event_list[i]->hw;
 
 			weight = bitmap_weight(c, X86_PMC_IDX_MAX);

From c91e0f5da81c6f3a611a1bd6d0cca6717c90fdab Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 15:25:59 +0100
Subject: [PATCH 133/640] perf_event: x86: Clean up some of the u64/long
 bitmask casting

We need this to be u64 for direct assigment, but the bitmask functions
all work on unsigned long, leading to cast heaven, solve this by using a
union.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100122155535.595961269@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 47 ++++++++++++++++----------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 7e181a5097ea..921bbf732e77 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -69,10 +69,11 @@ struct debug_store {
 	u64	pebs_event_reset[MAX_PEBS_EVENTS];
 };
 
-#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
-
 struct event_constraint {
-	u64	idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)];
+	union {
+		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+		u64		idxmsk64[1];
+	};
 	int	code;
 	int	cmask;
 };
@@ -90,13 +91,14 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
-#define EVENT_CONSTRAINT(c, n, m) { \
-	.code = (c),	\
-	.cmask = (m),	\
-	.idxmsk[0] = (n) }
+#define EVENT_CONSTRAINT(c, n, m) { 	\
+	{ .idxmsk64[0] = (n) },		\
+	.code = (c),			\
+	.cmask = (m),			\
+}
 
 #define EVENT_CONSTRAINT_END \
-	{ .code = 0, .cmask = 0, .idxmsk[0] = 0 }
+	EVENT_CONSTRAINT(0, 0, 0)
 
 #define for_each_event_constraint(e, c) \
 	for ((e) = (c); (e)->cmask; (e)++)
@@ -126,8 +128,11 @@ struct x86_pmu {
 	u64		intel_ctrl;
 	void		(*enable_bts)(u64 config);
 	void		(*disable_bts)(void);
-	void		(*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk);
-	void		(*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event);
+	void		(*get_event_constraints)(struct cpu_hw_events *cpuc,
+						 struct perf_event *event,
+						 unsigned long *idxmsk);
+	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
+						 struct perf_event *event);
 	const struct event_constraint *event_constraints;
 };
 
@@ -2144,14 +2149,11 @@ perf_event_nmi_handler(struct notifier_block *self,
 	return NOTIFY_STOP;
 }
 
-static struct event_constraint bts_constraint = {
-	.code = 0,
-	.cmask = 0,
-	.idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS
-};
+static struct event_constraint bts_constraint =
+	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
 
 static int intel_special_constraints(struct perf_event *event,
-				     u64 *idxmsk)
+				     unsigned long *idxmsk)
 {
 	unsigned int hw_event;
 
@@ -2171,14 +2173,14 @@ static int intel_special_constraints(struct perf_event *event,
 
 static void intel_get_event_constraints(struct cpu_hw_events *cpuc,
 					struct perf_event *event,
-					u64 *idxmsk)
+					unsigned long *idxmsk)
 {
 	const struct event_constraint *c;
 
 	/*
 	 * cleanup bitmask
 	 */
-	bitmap_zero((unsigned long *)idxmsk, X86_PMC_IDX_MAX);
+	bitmap_zero(idxmsk, X86_PMC_IDX_MAX);
 
 	if (intel_special_constraints(event, idxmsk))
 		return;
@@ -2186,10 +2188,7 @@ static void intel_get_event_constraints(struct cpu_hw_events *cpuc,
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
 			if ((event->hw.config & c->cmask) == c->code) {
-
-				bitmap_copy((unsigned long *)idxmsk,
-					    (unsigned long *)c->idxmsk,
-					    X86_PMC_IDX_MAX);
+				bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX);
 				return;
 			}
 		}
@@ -2200,10 +2199,10 @@ static void intel_get_event_constraints(struct cpu_hw_events *cpuc,
 
 static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
 				      struct perf_event *event,
-				      u64 *idxmsk)
+				      unsigned long *idxmsk)
 {
 	/* no constraints, means supports all generic counters */
-	bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
+	bitmap_fill(idxmsk, x86_pmu.num_events);
 }
 
 static int x86_event_sched_in(struct perf_event *event,

From 8433be1184e4f22c37d4b8ed36cde529a47882f4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 15:38:26 +0100
Subject: [PATCH 134/640] perf_event: x86: Reduce some overly long lines with
 some MACROs

Introduce INTEL_EVENT_CONSTRAINT and FIXED_EVENT_CONSTRAINT to reduce
some line length and typing work.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100122155535.688730371@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 68 +++++++++++++++++---------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 921bbf732e77..4d1ed101c10d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -97,6 +97,12 @@ struct cpu_hw_events {
 	.cmask = (m),			\
 }
 
+#define INTEL_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+
+#define FIXED_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+
 #define EVENT_CONSTRAINT_END \
 	EVENT_CONSTRAINT(0, 0, 0)
 
@@ -192,12 +198,12 @@ static u64 p6_pmu_raw_event(u64 hw_event)
 
 static struct event_constraint intel_p6_event_constraints[] =
 {
-	EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK),	/* FLOPS */
-	EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK),	/* FP_COMP_OPS_EXE */
-	EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK),	/* FP_ASSIST */
-	EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK),	/* MUL */
-	EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK),	/* DIV */
-	EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK),	/* CYCLES_DIV_BUSY */
+	INTEL_EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
+	INTEL_EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
+	INTEL_EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
 	EVENT_CONSTRAINT_END
 };
 
@@ -217,41 +223,41 @@ static const u64 intel_perfmon_event_map[] =
 
 static struct event_constraint intel_core_event_constraints[] =
 {
-	EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
-	EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
-	EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */
-	EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */
-	EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */
-	EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */
-	EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */
-	EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */
-	EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */
-	EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */
-	EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
 	EVENT_CONSTRAINT_END
 };
 
 static struct event_constraint intel_nehalem_event_constraints[] =
 {
-	EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
-	EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
-	EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */
-	EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */
-	EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */
-	EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */
-	EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */
-	EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */
-	EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */
-	EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
-	EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */
-	EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+	INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
+	INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
+	INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
 	EVENT_CONSTRAINT_END
 };
 
 static struct event_constraint intel_gen_event_constraints[] =
 {
-	EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */
-	EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
 	EVENT_CONSTRAINT_END
 };
 

From 9f41699ed067fa695faff8e2e9981b2550abec62 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 15:59:29 +0100
Subject: [PATCH 135/640] bitops: Provide compile time HWEIGHT{8,16,32,64}

Provide compile time versions of hweight.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20100122155535.797688466@chello.nl>
[ Remove some whitespace damage while we are at it ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/bitops.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c05a29cb9bb2..ba0fd1eb4af7 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -25,7 +25,7 @@
 static __inline__ int get_bitmask_order(unsigned int count)
 {
 	int order;
-	
+
 	order = fls(count);
 	return order;	/* We could be slightly more clever with -1 here... */
 }
@@ -33,7 +33,7 @@ static __inline__ int get_bitmask_order(unsigned int count)
 static __inline__ int get_count_order(unsigned int count)
 {
 	int order;
-	
+
 	order = fls(count) - 1;
 	if (count & (count - 1))
 		order++;
@@ -45,6 +45,20 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
+#define HWEIGHT8(w)			\
+      (	(!!((w) & (1ULL << 0))) +	\
+	(!!((w) & (1ULL << 1))) +	\
+	(!!((w) & (1ULL << 2))) +	\
+	(!!((w) & (1ULL << 3))) +	\
+	(!!((w) & (1ULL << 4))) +	\
+	(!!((w) & (1ULL << 5))) +	\
+	(!!((w) & (1ULL << 6))) +	\
+	(!!((w) & (1ULL << 7)))	)
+
+#define HWEIGHT16(w) (HWEIGHT8(w)  + HWEIGHT8(w >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+
 /**
  * rol32 - rotate a 32-bit value left
  * @word: value to rotate

From 63b146490befc027a7e0923e333269e68b20d380 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 16:32:17 +0100
Subject: [PATCH 136/640] perf_event: x86: Optimize the constraint searching
 bits

Instead of copying bitmasks around, pass pointers to the constraint
structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100122155535.887853503@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 75 +++++++++++++++-----------------
 1 file changed, 34 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4d1ed101c10d..092ad566734c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -134,12 +134,14 @@ struct x86_pmu {
 	u64		intel_ctrl;
 	void		(*enable_bts)(u64 config);
 	void		(*disable_bts)(void);
-	void		(*get_event_constraints)(struct cpu_hw_events *cpuc,
-						 struct perf_event *event,
-						 unsigned long *idxmsk);
+
+	struct event_constraint *
+			(*get_event_constraints)(struct cpu_hw_events *cpuc,
+						 struct perf_event *event);
+
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
-	const struct event_constraint *event_constraints;
+	struct event_constraint *event_constraints;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -1242,17 +1244,15 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
 	int i, j , w, num;
 	int weight, wmax;
-	unsigned long *c;
-	unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
 	for (i = 0; i < n; i++) {
-		x86_pmu.get_event_constraints(cpuc,
-					      cpuc->event_list[i],
-					      constraints[i]);
+		constraints[i] =
+		  x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
 	}
 
 	/*
@@ -1267,7 +1267,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			break;
 
 		/* constraint still honored */
-		if (!test_bit(hwc->idx, c))
+		if (!test_bit(hwc->idx, c->idxmsk))
 			break;
 
 		/* not already used */
@@ -1320,11 +1320,11 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			c = constraints[i];
 			hwc = &cpuc->event_list[i]->hw;
 
-			weight = bitmap_weight(c, X86_PMC_IDX_MAX);
+			weight = bitmap_weight(c->idxmsk, X86_PMC_IDX_MAX);
 			if (weight != w)
 				continue;
 
-			for_each_bit(j, c, X86_PMC_IDX_MAX) {
+			for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
 				if (!test_bit(j, used_mask))
 					break;
 			}
@@ -2155,11 +2155,13 @@ perf_event_nmi_handler(struct notifier_block *self,
 	return NOTIFY_STOP;
 }
 
+static struct event_constraint unconstrained;
+
 static struct event_constraint bts_constraint =
 	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
 
-static int intel_special_constraints(struct perf_event *event,
-				     unsigned long *idxmsk)
+static struct event_constraint *
+intel_special_constraints(struct perf_event *event)
 {
 	unsigned int hw_event;
 
@@ -2169,46 +2171,34 @@ static int intel_special_constraints(struct perf_event *event,
 		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
 		     (event->hw.sample_period == 1))) {
 
-		bitmap_copy((unsigned long *)idxmsk,
-			    (unsigned long *)bts_constraint.idxmsk,
-			    X86_PMC_IDX_MAX);
-		return 1;
+		return &bts_constraint;
 	}
-	return 0;
+	return NULL;
 }
 
-static void intel_get_event_constraints(struct cpu_hw_events *cpuc,
-					struct perf_event *event,
-					unsigned long *idxmsk)
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	const struct event_constraint *c;
+	struct event_constraint *c;
 
-	/*
-	 * cleanup bitmask
-	 */
-	bitmap_zero(idxmsk, X86_PMC_IDX_MAX);
-
-	if (intel_special_constraints(event, idxmsk))
-		return;
+	c = intel_special_constraints(event);
+	if (c)
+		return c;
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if ((event->hw.config & c->cmask) == c->code) {
-				bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX);
-				return;
-			}
+			if ((event->hw.config & c->cmask) == c->code)
+				return c;
 		}
 	}
-	/* no constraints, means supports all generic counters */
-	bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
+
+	return &unconstrained;
 }
 
-static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
-				      struct perf_event *event,
-				      unsigned long *idxmsk)
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	/* no constraints, means supports all generic counters */
-	bitmap_fill(idxmsk, x86_pmu.num_events);
+	return &unconstrained;
 }
 
 static int x86_event_sched_in(struct perf_event *event,
@@ -2576,6 +2566,9 @@ void __init init_hw_perf_events(void)
 	perf_events_lapic_init();
 	register_die_notifier(&perf_event_nmi_notifier);
 
+	unconstrained = (struct event_constraint)
+		EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
+
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
 	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);

From 272d30be622c9c6cbd514b1211ff359292001baa Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 16:32:17 +0100
Subject: [PATCH 137/640] perf_event: x86: Optimize constraint weight
 computation

Add a weight member to the constraint structure and avoid recomputing the
weight at runtime.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100122155535.963944926@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 092ad566734c..2c22ce4fa784 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -23,6 +23,7 @@
 #include <linux/uaccess.h>
 #include <linux/highmem.h>
 #include <linux/cpu.h>
+#include <linux/bitops.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -76,6 +77,7 @@ struct event_constraint {
 	};
 	int	code;
 	int	cmask;
+	int	weight;
 };
 
 struct cpu_hw_events {
@@ -95,6 +97,7 @@ struct cpu_hw_events {
 	{ .idxmsk64[0] = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
+	.weight = HWEIGHT64((u64)(n)),	\
 }
 
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
@@ -1242,8 +1245,7 @@ static inline int is_x86_event(struct perf_event *event)
 
 static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
-	int i, j , w, num;
-	int weight, wmax;
+	int i, j, w, num, wmax;
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct hw_perf_event *hwc;
@@ -1320,8 +1322,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			c = constraints[i];
 			hwc = &cpuc->event_list[i]->hw;
 
-			weight = bitmap_weight(c->idxmsk, X86_PMC_IDX_MAX);
-			if (weight != w)
+			if (c->weight != w)
 				continue;
 
 			for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {

From c933c1a603d5bf700ddce79216c1be0ec3bc0e6c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 16:40:12 +0100
Subject: [PATCH 138/640] perf_event: x86: Optimize the fast path a little more

Remove num from the fast path and save a few ops.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100122155536.056430539@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2c22ce4fa784..33c889ff21ae 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1245,9 +1245,9 @@ static inline int is_x86_event(struct perf_event *event)
 
 static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
-	int i, j, w, num, wmax;
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	int i, j, w, wmax, num = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
@@ -1260,7 +1260,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	/*
 	 * fastpath, try to reuse previous register
 	 */
-	for (i = 0, num = n; i < n; i++, num--) {
+	for (i = 0; i < n; i++) {
 		hwc = &cpuc->event_list[i]->hw;
 		c = constraints[i];
 
@@ -1288,7 +1288,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		if (assign)
 			assign[i] = hwc->idx;
 	}
-	if (!num)
+	if (i == n)
 		goto done;
 
 	/*

From 6c9687abeb24d5b7aae7db5be070c2139ad29e29 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Jan 2010 11:57:25 +0100
Subject: [PATCH 139/640] perf_event: x86: Optimize x86_pmu_disable()

x86_pmu_disable() removes the event from the cpuc->event_list[], however
since an event can only be on that list once, stop looking after we found
it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 33c889ff21ae..66de282ad2fb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1884,6 +1884,7 @@ static void x86_pmu_disable(struct perf_event *event)
 				cpuc->event_list[i-1] = cpuc->event_list[i];
 
 			--cpuc->n_events;
+			break;
 		}
 	}
 	perf_event_update_userpage(event);

From 184f412c3341cd24fbd26604634a5800b83dbdc3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 Jan 2010 08:39:39 +0100
Subject: [PATCH 140/640] perf, x86: Clean up event constraints code a bit

- Remove stray debug code
 - Improve ugly macros a bit
 - Remove some whitespace damage
 - (Also fix up some accumulated damage in perf_event.h)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Stephane Eranian <eranian@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
 arch/x86/kernel/cpu/perf_event.c | 37 +++++++-------------------------
 include/linux/perf_event.h       | 24 ++++++++++-----------
 2 files changed, 19 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 66de282ad2fb..fdbe24842271 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,24 +93,19 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
-#define EVENT_CONSTRAINT(c, n, m) { 	\
+#define EVENT_CONSTRAINT(c, n, m) {	\
 	{ .idxmsk64[0] = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
 	.weight = HWEIGHT64((u64)(n)),	\
 }
 
-#define INTEL_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define INTEL_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define FIXED_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
 
-#define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+#define EVENT_CONSTRAINT_END			EVENT_CONSTRAINT(0, 0, 0)
 
-#define EVENT_CONSTRAINT_END \
-	EVENT_CONSTRAINT(0, 0, 0)
-
-#define for_each_event_constraint(e, c) \
-	for ((e) = (c); (e)->cmask; (e)++)
+#define for_each_event_constraint(e, c)		for ((e) = (c); (e)->cmask; (e)++)
 
 /*
  * struct x86_pmu - generic x86 pmu
@@ -1276,14 +1271,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		if (test_bit(hwc->idx, used_mask))
 			break;
 
-#if 0
-		pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
-			 smp_processor_id(),
-			 hwc->config,
-			 hwc->idx,
-			 assign ? 'y' : 'n');
-#endif
-
 		set_bit(hwc->idx, used_mask);
 		if (assign)
 			assign[i] = hwc->idx;
@@ -1333,14 +1320,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			if (j == X86_PMC_IDX_MAX)
 				break;
 
-#if 0
-			pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
-				smp_processor_id(),
-				hwc->config,
-				j,
-				assign ? 'y' : 'n');
-#endif
-
 			set_bit(j, used_mask);
 
 			if (assign)
@@ -2596,9 +2575,9 @@ static const struct pmu pmu = {
  * validate a single event group
  *
  * validation include:
- * 	- check events are compatible which each other
- * 	- events do not compete for the same counter
- * 	- number of events <= number of counters
+ *	- check events are compatible which each other
+ *	- events do not compete for the same counter
+ *	- number of events <= number of counters
  *
  * validation ensures the group can be loaded onto the
  * PMU if it was the only group available.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 72b2615600d8..953c17731e0d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -290,7 +290,7 @@ struct perf_event_mmap_page {
 };
 
 #define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_RECORD_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
 #define PERF_RECORD_MISC_KERNEL			(1 << 0)
 #define PERF_RECORD_MISC_USER			(2 << 0)
 #define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
@@ -356,8 +356,8 @@ enum perf_event_type {
 	 *	u64				stream_id;
 	 * };
 	 */
-	PERF_RECORD_THROTTLE		= 5,
-	PERF_RECORD_UNTHROTTLE		= 6,
+	PERF_RECORD_THROTTLE			= 5,
+	PERF_RECORD_UNTHROTTLE			= 6,
 
 	/*
 	 * struct {
@@ -371,10 +371,10 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u32				pid, tid;
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
 	 *
-	 * 	struct read_format		values;
+	 *	struct read_format		values;
 	 * };
 	 */
 	PERF_RECORD_READ			= 8,
@@ -412,7 +412,7 @@ enum perf_event_type {
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 * };
 	 */
-	PERF_RECORD_SAMPLE		= 9,
+	PERF_RECORD_SAMPLE			= 9,
 
 	PERF_RECORD_MAX,			/* non-ABI */
 };
@@ -752,8 +752,7 @@ extern int perf_max_events;
 extern const struct pmu *hw_perf_event_init(struct perf_event *event);
 
 extern void perf_event_task_sched_in(struct task_struct *task);
-extern void perf_event_task_sched_out(struct task_struct *task,
-					struct task_struct *next);
+extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
 extern void perf_event_task_tick(struct task_struct *task);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
@@ -853,8 +852,7 @@ extern int sysctl_perf_event_mlock;
 extern int sysctl_perf_event_sample_rate;
 
 extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count,
-				 void *record, int entry_size);
+extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
@@ -895,13 +893,13 @@ static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
 static inline void
-perf_bp_event(struct perf_event *event, void *data)		{ }
+perf_bp_event(struct perf_event *event, void *data)			{ }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
-static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
+static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }

From 2e8418736dff9c6fdadb2f87dcc2087cebf32167 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Jan 2010 15:58:43 +0100
Subject: [PATCH 141/640] perf_event: x86: Deduplicate the disable code

Share the meat of the x86_pmu_disable() code with hw_perf_enable().

Also remove the barrier() from that code, since I could not convince
myself we actually need it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fdbe24842271..07fa0c2faa09 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1401,6 +1401,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	}
 }
 
+static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+
 void hw_perf_enable(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1426,13 +1428,7 @@ void hw_perf_enable(void)
 			if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
 				continue;
 
-			x86_pmu.disable(hwc, hwc->idx);
-
-			clear_bit(hwc->idx, cpuc->active_mask);
-			barrier();
-			cpuc->events[hwc->idx] = NULL;
-
-			x86_perf_event_update(event, hwc, hwc->idx);
+			__x86_pmu_disable(event, cpuc);
 
 			hwc->idx = -1;
 		}
@@ -1822,11 +1818,10 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
 	event->pending_kill = POLL_IN;
 }
 
-static void x86_pmu_disable(struct perf_event *event)
+static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	int i, idx = hwc->idx;
+	int idx = hwc->idx;
 
 	/*
 	 * Must be done before we disable, otherwise the nmi handler
@@ -1835,12 +1830,6 @@ static void x86_pmu_disable(struct perf_event *event)
 	clear_bit(idx, cpuc->active_mask);
 	x86_pmu.disable(hwc, idx);
 
-	/*
-	 * Make sure the cleared pointer becomes visible before we
-	 * (potentially) free the event:
-	 */
-	barrier();
-
 	/*
 	 * Drain the remaining delta count out of a event
 	 * that we are disabling:
@@ -1852,6 +1841,14 @@ static void x86_pmu_disable(struct perf_event *event)
 		intel_pmu_drain_bts_buffer(cpuc);
 
 	cpuc->events[idx] = NULL;
+}
+
+static void x86_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int i;
+
+	__x86_pmu_disable(event, cpuc);
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i]) {

From ed8777fc132e589d48a0ba854fdbb5d8203b58e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 Jan 2010 23:07:46 +0100
Subject: [PATCH 142/640] perf_events, x86: Fix event constraint masks

Since constraints are specified on the event number, not number
and unit mask shorten the constraint masks so that we'll
actually match something.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221121.967610372@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h |  2 +-
 arch/x86/kernel/cpu/perf_event.c  | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dbc082685d52..ff5ede128bae 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -49,7 +49,7 @@
 	 INTEL_ARCH_INV_MASK| \
 	 INTEL_ARCH_EDGE_MASK|\
 	 INTEL_ARCH_UNIT_MASK|\
-	 INTEL_ARCH_EVENT_MASK)
+	 INTEL_ARCH_EVTSEL_MASK)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 07fa0c2faa09..951213a51489 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -100,12 +100,17 @@ struct cpu_hw_events {
 	.weight = HWEIGHT64((u64)(n)),	\
 }
 
-#define INTEL_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-#define FIXED_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+#define INTEL_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
-#define EVENT_CONSTRAINT_END			EVENT_CONSTRAINT(0, 0, 0)
+#define FIXED_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
 
-#define for_each_event_constraint(e, c)		for ((e) = (c); (e)->cmask; (e)++)
+#define EVENT_CONSTRAINT_END		\
+	EVENT_CONSTRAINT(0, 0, 0)
+
+#define for_each_event_constraint(e, c)	\
+	for ((e) = (c); (e)->cmask; (e)++)
 
 /*
  * struct x86_pmu - generic x86 pmu

From 1a6e21f791fe85b40a9ddbafe999ab8ccffc3f78 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 Jan 2010 23:07:47 +0100
Subject: [PATCH 143/640] perf_events, x86: Clean up hw_perf_*_all()
 implementation

Put the recursion avoidance code in the generic hook instead of
replicating it in each implementation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221122.057507285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 59 ++++++++------------------------
 1 file changed, 14 insertions(+), 45 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 951213a51489..cf10839f20ea 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1099,15 +1099,8 @@ static int __hw_perf_event_init(struct perf_event *event)
 
 static void p6_pmu_disable_all(void)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
 	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1118,12 +1111,6 @@ static void intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
@@ -1135,17 +1122,6 @@ static void amd_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	/*
-	 * ensure we write the disable before we start disabling the
-	 * events proper, so that amd_pmu_enable_event() does the
-	 * right thing.
-	 */
-	barrier();
-
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		u64 val;
 
@@ -1166,23 +1142,20 @@ void hw_perf_disable(void)
 	if (!x86_pmu_initialized())
 		return;
 
-	if (cpuc->enabled)
-		cpuc->n_added = 0;
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->n_added = 0;
+	cpuc->enabled = 0;
+	barrier();
 
 	x86_pmu.disable_all();
 }
 
 static void p6_pmu_enable_all(void)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	unsigned long val;
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
 	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1193,12 +1166,6 @@ static void intel_pmu_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -1217,12 +1184,6 @@ static void amd_pmu_enable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		u64 val;
@@ -1417,6 +1378,10 @@ void hw_perf_enable(void)
 
 	if (!x86_pmu_initialized())
 		return;
+
+	if (cpuc->enabled)
+		return;
+
 	if (cpuc->n_added) {
 		/*
 		 * apply assignment obtained either from
@@ -1461,6 +1426,10 @@ void hw_perf_enable(void)
 		cpuc->n_added = 0;
 		perf_events_lapic_init();
 	}
+
+	cpuc->enabled = 1;
+	barrier();
+
 	x86_pmu.enable_all();
 }
 

From 452a339a976e7f782c786eb3f73080401e2fa3a6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 Jan 2010 23:07:48 +0100
Subject: [PATCH 144/640] perf_events, x86: Implement Intel Westmere support

The new Intel documentation includes Westmere arch specific
event maps that are significantly different from the Nehalem
ones. Add support for this generation.

Found the CPUID model numbers on wikipedia.

Also ammend some Nehalem constraints, spotted those when looking
for the differences between Nehalem and Westmere.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221122.151865645@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 124 +++++++++++++++++++++++++++++--
 1 file changed, 117 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index cf10839f20ea..3fac0bfc2dee 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -244,18 +244,26 @@ static struct event_constraint intel_core_event_constraints[] =
 
 static struct event_constraint intel_nehalem_event_constraints[] =
 {
-	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
 	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
 	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
 	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
 	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
 	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
-	INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
 	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-	INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
-	INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
-	INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
 	EVENT_CONSTRAINT_END
 };
 
@@ -286,6 +294,97 @@ static u64 __read_mostly hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
+static __initconst u64 westmere_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
 static __initconst u64 nehalem_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2423,7 +2522,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_core_event_constraints;
 		pr_cont("Core2 events, ");
 		break;
-	case 26:
+
+	case 26: /* 45 nm nehalem, "Bloomfield" */
+	case 30: /* 45 nm nehalem, "Lynnfield" */
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -2437,6 +2538,15 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_gen_event_constraints;
 		pr_cont("Atom events, ");
 		break;
+
+	case 37: /* 32 nm nehalem, "Clarkdale" */
+	case 44: /* 32 nm nehalem, "Gulftown" */
+		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_westmere_event_constraints;
+		pr_cont("Westmere events, ");
+		break;
 	default:
 		/*
 		 * default constraints for v2 and up

From 18c01f8abff51e4910cc5ffb4b710e8c6eea60c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 27 Jan 2010 23:07:49 +0100
Subject: [PATCH 145/640] perf_events, x86: Remove spurious counter reset from
 x86_pmu_enable()

At enable time the counter might still have a ->idx pointing to
a previously occupied location that might now be taken by
another event. Resetting the counter at that location with data
from this event will destroy the other counter's count.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221122.261477183@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3fac0bfc2dee..518eb3e39577 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1762,9 +1762,6 @@ static int x86_pmu_enable(struct perf_event *event)
 	cpuc->n_events = n;
 	cpuc->n_added  = n - n0;
 
-	if (hwc->idx != -1)
-		x86_perf_event_set_period(event, hwc, hwc->idx);
-
 	return 0;
 }
 

From 75c9f3284a7ff957829f44baace82406a6354ceb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 29 Jan 2010 09:04:26 +0100
Subject: [PATCH 146/640] perf_events: Fix sample_period transfer on inherit

One problem with frequency driven counters is that we cannot
predict the rate at which they trigger, therefore we have to
start them at period=1, this causes a ramp up effect. However,
if we fail to propagate the stable state on fork each new child
will have to ramp up again. This can lead to significant
artifacts in sample data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: eranian@google.com
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264752266.4283.2121.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 251fb9552492..53dc2a362111 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5002,8 +5002,15 @@ inherit_event(struct perf_event *parent_event,
 	else
 		child_event->state = PERF_EVENT_STATE_OFF;
 
-	if (parent_event->attr.freq)
-		child_event->hw.sample_period = parent_event->hw.sample_period;
+	if (parent_event->attr.freq) {
+		u64 sample_period = parent_event->hw.sample_period;
+		struct hw_perf_event *hwc = &child_event->hw;
+
+		hwc->sample_period = sample_period;
+		hwc->last_period   = sample_period;
+
+		atomic64_set(&hwc->period_left, sample_period);
+	}
 
 	child_event->overflow_handler = parent_event->overflow_handler;
 

From 72b8fa1730207274f6818b47b891ce5dff79287e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 27 Jan 2010 21:05:49 -0200
Subject: [PATCH 147/640] perf top: Exit if specified --vmlinux can't be used
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As we do lazy loading of symtabs we only will know if the
specified vmlinux file is invalid when we actually have a hit in
kernel space and then try to load it. So if we get kernel hits
and there are _no_ symbols in the DSO backing the kernel map,
bail out.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264633557-17597-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2227b84aa002..78f9c4576a0c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -951,9 +951,31 @@ static void event__process_sample(const event_t *self,
 	}
 
 	if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
-	    al.sym == NULL || al.filtered)
+	    al.filtered)
 		return;
 
+	if (al.sym == NULL) {
+		/*
+		 * As we do lazy loading of symtabs we only will know if the
+		 * specified vmlinux file is invalid when we actually have a
+		 * hit in kernel space and then try to load it. So if we get
+		 * here and there are _no_ symbols in the DSO backing the
+		 * kernel map, bail out.
+		 *
+		 * We may never get here, for instance, if we use -K/
+		 * --hide-kernel-symbols, even if the user specifies an
+		 * invalid --vmlinux ;-)
+		 */
+		if (al.map == session->vmlinux_maps[MAP__FUNCTION] &&
+		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
+			pr_err("The %s file can't be used\n",
+			       symbol_conf.vmlinux_name);
+			exit(1);
+		}
+
+		return;
+	}
+
 	syme = symbol__priv(al.sym);
 	if (!syme->skip) {
 		syme->count[counter]++;

From a19afe46412452fef89cc623873a8931b3685944 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 27 Jan 2010 21:05:50 -0200
Subject: [PATCH 148/640] perf symbols: Factor out dso__load_vmlinux_path()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So that we can call it directly from regression tests, and also
to reduce the size of dso__load_kernel_sym(), making it more
clear.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264633557-17597-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 38 ++++++++++++++++++++++++--------------
 tools/perf/util/symbol.h |  2 ++
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f1f609dcf9a1..26ec603083e0 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1578,6 +1578,27 @@ static int dso__load_vmlinux(struct dso *self, struct map *map,
 	return err;
 }
 
+int dso__load_vmlinux_path(struct dso *self, struct map *map,
+			   struct perf_session *session, symbol_filter_t filter)
+{
+	int i, err = 0;
+
+	pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+		 vmlinux_path__nr_entries);
+
+	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+		err = dso__load_vmlinux(self, map, session, vmlinux_path[i],
+					filter);
+		if (err > 0) {
+			pr_debug("Using %s for symbols\n", vmlinux_path[i]);
+			dso__set_long_name(self, strdup(vmlinux_path[i]));
+			break;
+		}
+	}
+
+	return err;
+}
+
 static int dso__load_kernel_sym(struct dso *self, struct map *map,
 				struct perf_session *session, symbol_filter_t filter)
 {
@@ -1606,20 +1627,9 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	}
 
 	if (vmlinux_path != NULL) {
-		int i;
-		pr_debug("Looking at the vmlinux_path (%d entries long)\n",
-			 vmlinux_path__nr_entries);
-		for (i = 0; i < vmlinux_path__nr_entries; ++i) {
-			err = dso__load_vmlinux(self, map, session,
-						vmlinux_path[i], filter);
-			if (err > 0) {
-				pr_debug("Using %s for symbols\n",
-					 vmlinux_path[i]);
-				dso__set_long_name(self,
-						   strdup(vmlinux_path[i]));
-				goto out_fixup;
-			}
-		}
+		err = dso__load_vmlinux_path(self, map, session, filter);
+		if (err > 0)
+			goto out_fixup;
 	}
 
 	/*
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ffe0b0f2e5d3..a94997aeb334 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -129,6 +129,8 @@ struct perf_session;
 
 int dso__load(struct dso *self, struct map *map, struct perf_session *session,
 	      symbol_filter_t filter);
+int dso__load_vmlinux_path(struct dso *self, struct map *map,
+			   struct perf_session *session, symbol_filter_t filter);
 void dsos__fprintf(FILE *fp);
 size_t dsos__fprintf_buildid(FILE *fp, bool with_hits);
 

From fd1d908c543fbdfae82839d24b0872c542fceedc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 27 Jan 2010 21:05:51 -0200
Subject: [PATCH 149/640] perf symbols: Split helpers used when creating kernel
 dso object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make it clear and allow for direct usage by, for instance,
regression test suites.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264633557-17597-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 28 +++++++++++++++++++++-------
 tools/perf/util/symbol.h |  2 ++
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 26ec603083e0..f9049d12ead6 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1762,24 +1762,38 @@ size_t dsos__fprintf_buildid(FILE *fp, bool with_hits)
 		__dsos__fprintf_buildid(&dsos__user, fp, with_hits));
 }
 
+struct dso *dso__new_kernel(const char *name)
+{
+	struct dso *self = dso__new(name ?: "[kernel.kallsyms]");
+
+	if (self != NULL) {
+		self->short_name = "[kernel]";
+		self->kernel	 = 1;
+	}
+
+	return self;
+}
+
+void dso__read_running_kernel_build_id(struct dso *self)
+{
+	if (sysfs__read_build_id("/sys/kernel/notes", self->build_id,
+				 sizeof(self->build_id)) == 0)
+		self->has_build_id = true;
+}
+
 static struct dso *dsos__create_kernel(const char *vmlinux)
 {
-	struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
+	struct dso *kernel = dso__new_kernel(vmlinux);
 
 	if (kernel == NULL)
 		return NULL;
 
-	kernel->short_name = "[kernel]";
-	kernel->kernel	   = 1;
-
 	vdso = dso__new("[vdso]");
 	if (vdso == NULL)
 		goto out_delete_kernel_dso;
 	dso__set_loaded(vdso, MAP__FUNCTION);
 
-	if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id,
-				 sizeof(kernel->build_id)) == 0)
-		kernel->has_build_id = true;
+	dso__read_running_kernel_build_id(kernel);
 
 	dsos__add(&dsos__kernel, kernel);
 	dsos__add(&dsos__user, vdso);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index a94997aeb334..124302778c09 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -109,6 +109,7 @@ struct dso {
 };
 
 struct dso *dso__new(const char *name);
+struct dso *dso__new_kernel(const char *name);
 void dso__delete(struct dso *self);
 
 bool dso__loaded(const struct dso *self, enum map_type type);
@@ -139,6 +140,7 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
 char dso__symtab_origin(const struct dso *self);
 void dso__set_long_name(struct dso *self, char *name);
 void dso__set_build_id(struct dso *self, void *build_id);
+void dso__read_running_kernel_build_id(struct dso *self);
 struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
 					const char *name);

From 64abebf731df87e6f4ae7d9ffc340bdf0c033e44 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 27 Jan 2010 21:05:52 -0200
Subject: [PATCH 150/640] perf session: Create kernel maps in the constructor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removing one extra step needed in the tools that need this,
fixing a bug in 'perf probe' where this was not being done.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1264633557-17597-4-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-kmem.c   |  5 -----
 tools/perf/builtin-record.c |  5 -----
 tools/perf/builtin-top.c    |  5 -----
 tools/perf/util/session.c   | 13 +++++++++++--
 4 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 7323d9dfbce8..38b8ca900eda 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -491,11 +491,6 @@ static int __cmd_kmem(void)
 	if (!perf_session__has_traces(session, "kmem record"))
 		goto out_delete;
 
-	if (perf_session__create_kernel_maps(session) < 0) {
-		pr_err("Problems creating kernel maps\n");
-		return -1;
-	}
-
 	setup_pager();
 	err = perf_session__process_events(session, &event_ops);
 	if (err != 0)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bb9ca1b30fa..90345223908c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -477,11 +477,6 @@ static int __cmd_record(int argc, const char **argv)
 		return -1;
 	}
 
-	if (perf_session__create_kernel_maps(session) < 0) {
-		pr_err("Problems creating kernel maps\n");
-		return -1;
-	}
-
 	if (!file_new) {
 		err = perf_header__read(&session->header, output);
 		if (err < 0)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 78f9c4576a0c..1fc018e048e1 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1191,11 +1191,6 @@ static int __cmd_top(void)
 	if (session == NULL)
 		return -ENOMEM;
 
-	if (perf_session__create_kernel_maps(session) < 0) {
-		pr_err("Problems creating kernel maps\n");
-		return -1;
-	}
-
 	if (target_pid != -1)
 		event__synthesize_thread(target_pid, event__process, session);
 	else
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1951e330377c..8e7c1896eaa2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -70,8 +70,17 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
 	self->unknown_events = 0;
 	map_groups__init(&self->kmaps);
 
-	if (mode == O_RDONLY && perf_session__open(self, force) < 0)
-		goto out_delete;
+	if (mode == O_RDONLY) {
+		if (perf_session__open(self, force) < 0)
+			goto out_delete;
+	} else if (mode == O_WRONLY) {
+		/*
+		 * In O_RDONLY mode this will be performed when reading the
+		 * kernel MMAP event, in event__process_mmap().
+		 */
+		if (perf_session__create_kernel_maps(self) < 0)
+			goto out_delete;
+	}
 
 	self->sample_type = perf_header__sample_type(&self->header);
 out:

From 66ddfc62caec65a25fd5a8b20f535a2958ee94e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 28 Jan 2010 20:50:39 +0100
Subject: [PATCH 151/640] mx35: add a missing comma in a pad definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported-by: Tim Sander <tstone@vlsi.informatik.tu-darmstadt.de>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/plat-mxc/include/mach/iomux-mx35.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx35.h b/arch/arm/plat-mxc/include/mach/iomux-mx35.h
index 00b0ac1db225..c88d40795f7a 100644
--- a/arch/arm/plat-mxc/include/mach/iomux-mx35.h
+++ b/arch/arm/plat-mxc/include/mach/iomux-mx35.h
@@ -671,7 +671,7 @@
 #define MX35_PAD_LD8__SDMA_SDMA_DEBUG_PC_8			IOMUX_PAD(0x634, 0x1d0, 6, 0x0,   0, NO_PAD_CTRL)
 
 #define MX35_PAD_LD9__IPU_DISPB_DAT_9				IOMUX_PAD(0x638, 0x1d4, 0, 0x0,   0, NO_PAD_CTRL)
-#define MX35_PAD_LD9__GPIO2_9					IOMUX_PAD(0x638, 0x1d4, 5, 0x8e4  0, NO_PAD_CTRL)
+#define MX35_PAD_LD9__GPIO2_9					IOMUX_PAD(0x638, 0x1d4, 5, 0x8e4, 0, NO_PAD_CTRL)
 #define MX35_PAD_LD9__SDMA_SDMA_DEBUG_PC_9			IOMUX_PAD(0x638, 0x1d4, 6, 0x0,   0, NO_PAD_CTRL)
 
 #define MX35_PAD_LD10__IPU_DISPB_DAT_10				IOMUX_PAD(0x63c, 0x1d8, 0, 0x0,   0, NO_PAD_CTRL)

From 4c574159d03f4d8a136a7adff2d0b1d82cadcb18 Mon Sep 17 00:00:00 2001
From: Thiago Farina <tfransosi@gmail.com>
Date: Wed, 27 Jan 2010 21:05:55 -0200
Subject: [PATCH 152/640] tools/perf/perf.c: Clean up trivial style issues

Checked with:
./../scripts/checkpatch.pl --terse --file perf.c

 perf.c: 51: ERROR: open brace '{' following function declarations go on the next line
 perf.c: 73: ERROR: "foo*** bar" should be "foo ***bar"
 perf.c:112: ERROR: space prohibited before that close parenthesis ')'
 perf.c:127: ERROR: space prohibited before that close parenthesis ')'
 perf.c:171: ERROR: "foo** bar" should be "foo **bar"
 perf.c:213: ERROR: "(foo*)" should be "(foo *)"
 perf.c:216: ERROR: "(foo*)" should be "(foo *)"
 perf.c:217: ERROR: space required before that '*' (ctx:OxV)
 perf.c:452: ERROR: do not initialise statics to 0 or NULL
 perf.c:453: ERROR: do not initialise statics to 0 or NULL

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
LKML-Reference: <1264633557-17597-7-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/perf.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 05c861c045d5..109b89b30ced 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -48,7 +48,8 @@ int check_pager_config(const char *cmd)
 	return c.val;
 }
 
-static void commit_pager_choice(void) {
+static void commit_pager_choice(void)
+{
 	switch (use_pager) {
 	case 0:
 		setenv("PERF_PAGER", "cat", 1);
@@ -70,7 +71,7 @@ static void set_debugfs_path(void)
 		 "tracing/events");
 }
 
-static int handle_options(const char*** argv, int* argc, int* envchanged)
+static int handle_options(const char ***argv, int *argc, int *envchanged)
 {
 	int handled = 0;
 
@@ -109,7 +110,7 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
 				*envchanged = 1;
 		} else if (!strcmp(cmd, "--perf-dir")) {
 			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --perf-dir.\n" );
+				fprintf(stderr, "No directory given for --perf-dir.\n");
 				usage(perf_usage_string);
 			}
 			setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
@@ -124,7 +125,7 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
 				*envchanged = 1;
 		} else if (!strcmp(cmd, "--work-tree")) {
 			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --work-tree.\n" );
+				fprintf(stderr, "No directory given for --work-tree.\n");
 				usage(perf_usage_string);
 			}
 			setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
@@ -168,7 +169,7 @@ static int handle_alias(int *argcp, const char ***argv)
 {
 	int envchanged = 0, ret = 0, saved_errno = errno;
 	int count, option_count;
-	const char** new_argv;
+	const char **new_argv;
 	const char *alias_command;
 	char *alias_string;
 
@@ -210,11 +211,11 @@ static int handle_alias(int *argcp, const char ***argv)
 		if (!strcmp(alias_command, new_argv[0]))
 			die("recursive alias: %s", alias_command);
 
-		new_argv = realloc(new_argv, sizeof(char*) *
+		new_argv = realloc(new_argv, sizeof(char *) *
 				    (count + *argcp + 1));
 		/* insert after command name */
-		memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp);
-		new_argv[count+*argcp] = NULL;
+		memcpy(new_argv + count, *argv + 1, sizeof(char *) * *argcp);
+		new_argv[count + *argcp] = NULL;
 
 		*argv = new_argv;
 		*argcp += count - 1;
@@ -450,8 +451,8 @@ int main(int argc, const char **argv)
 	setup_path();
 
 	while (1) {
-		static int done_help = 0;
-		static int was_alias = 0;
+		static int done_help;
+		static int was_alias;
 
 		was_alias = run_argv(&argc, &argv);
 		if (errno != ENOENT)

From 6a1b751fb89b61ef7240f2e3ed65a2e2776e7cfd Mon Sep 17 00:00:00 2001
From: John Kacur <jkacur@redhat.com>
Date: Wed, 27 Jan 2010 21:05:54 -0200
Subject: [PATCH 153/640] perf: Ignore perf-archive temp file

Tell git to ignore perf-archive.

Signed-off-by: John Kacur <jkacur@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264633557-17597-6-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 124760bb37b5..e1d60d780784 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -14,6 +14,7 @@ perf*.html
 common-cmds.h
 perf.data
 perf.data.old
+perf-archive
 tags
 TAGS
 cscope*

From 12b336a8b4ea8652372f88521bf55cef7f5d5283 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 27 Jan 2010 20:43:21 +0000
Subject: [PATCH 154/640] wm97xx_battery: Handle missing platform data
 gracefully

Don't unconditionally dereference the WM97xx core platform data since
it may not be present, causing an oops.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 drivers/power/wm97xx_battery.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index fa39e759a275..6ea3cb5837c7 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -175,8 +175,14 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 		dev_err(&dev->dev, "Do not pass platform_data through "
 			"wm97xx_bat_set_pdata!\n");
 		return -EINVAL;
-	} else
-		pdata = wmdata->batt_pdata;
+	}
+
+	if (!wmdata) {
+		dev_err(&dev->dev, "No platform data supplied\n");
+		return -EINVAL;
+	}
+
+	pdata = wmdata->batt_pdata;
 
 	if (dev->id != -1)
 		return -EINVAL;

From 7415c7602ddb14a9a9c39bc8c38afa667092a527 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 29 Jan 2010 06:51:08 -0300
Subject: [PATCH 155/640] V4L/DVB: saa7134: remove stray unlock_kernel

An earlier commit removed the lock_kernel/unlock_kernel pair but forgot
to remove the unlock_kernel call in the cleanup path at the end of the
function.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/saa7134/saa7134-empress.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 7dfecfc6017c..ee5bff02a92c 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -93,9 +93,9 @@ static int ts_open(struct file *file)
 	dprintk("open dev=%s\n", video_device_node_name(vdev));
 	err = -EBUSY;
 	if (!mutex_trylock(&dev->empress_tsq.vb_lock))
-		goto done;
+		return err;
 	if (atomic_read(&dev->empress_users))
-		goto done_up;
+		goto done;
 
 	/* Unmute audio */
 	saa_writeb(SAA7134_AUDIO_MUTE_CTRL,
@@ -105,10 +105,8 @@ static int ts_open(struct file *file)
 	file->private_data = dev;
 	err = 0;
 
-done_up:
-	mutex_unlock(&dev->empress_tsq.vb_lock);
 done:
-	unlock_kernel();
+	mutex_unlock(&dev->empress_tsq.vb_lock);
 	return err;
 }
 

From 9198bcd39f558dd56823f1c9983e2252fc99a501 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 29 Jan 2010 14:20:05 -0800
Subject: [PATCH 156/640] omap: define _toggle_gpio_edge_triggering only for
 OMAP1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only usage of _toggle_gpio_edge_triggering is in
an #ifdef CONFIG_ARCH_OMAP1 block, so only provide it if
CONFIG_ARCH_OMAP1 is defined, too.

This fixes a compiler warning:

	arch/arm/plat-omap/gpio.c:758: warning: '_toggle_gpio_edge_triggering' defined but not used

when compiling for ARCH_OMAP2, ARCH_OMAP3 or ARCH_OMAP4.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/plat-omap/gpio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index d17620c50c28..d2422c766cca 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -750,6 +750,7 @@ static inline void set_24xx_gpio_triggering(struct gpio_bank *bank, int gpio,
 }
 #endif
 
+#ifdef CONFIG_ARCH_OMAP1
 /*
  * This only applies to chips that can't do both rising and falling edge
  * detection at once.  For all other chips, this function is a noop.
@@ -760,11 +761,9 @@ static void _toggle_gpio_edge_triggering(struct gpio_bank *bank, int gpio)
 	u32 l = 0;
 
 	switch (bank->method) {
-#ifdef CONFIG_ARCH_OMAP1
 	case METHOD_MPUIO:
 		reg += OMAP_MPUIO_GPIO_INT_EDGE;
 		break;
-#endif
 #ifdef CONFIG_ARCH_OMAP15XX
 	case METHOD_GPIO_1510:
 		reg += OMAP1510_GPIO_INT_CONTROL;
@@ -787,6 +786,7 @@ static void _toggle_gpio_edge_triggering(struct gpio_bank *bank, int gpio)
 
 	__raw_writel(l, reg);
 }
+#endif
 
 static int _set_gpio_triggering(struct gpio_bank *bank, int gpio, int trigger)
 {

From 643ced9b0b4810b5725910667604f1a373f30f2f Mon Sep 17 00:00:00 2001
From: Li Peng <peng.li@linux.intel.com>
Date: Thu, 28 Jan 2010 01:05:09 +0800
Subject: [PATCH 157/640] drm/i915: don't trigger ironlake vblank interrupt at
 irq install

Zhenyu noticed that the ironlake vblank enabling patch has one
issue that it will trigger vblank starting from irq postinstall,
this isn't necessary. This patch addresses this issue by only
adding the vblank into DEIER but mask them in DEIMR, so that it
won't trigger vblank interrupt at irq install.

Signed-off-by: Li Peng <peng.li@intel.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_irq.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e7472d82132a..fcd87ad75fec 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1022,14 +1022,13 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	/* enable kind of interrupts always enabled */
-	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
-			   DE_PIPEA_VBLANK | DE_PIPEB_VBLANK;
+	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT;
 	u32 render_mask = GT_USER_INTERRUPT;
 	u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
 			   SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;
 
 	dev_priv->irq_mask_reg = ~display_mask;
-	dev_priv->de_irq_enable_reg = display_mask;
+	dev_priv->de_irq_enable_reg = display_mask | DE_PIPEA_VBLANK | DE_PIPEB_VBLANK;
 
 	/* should always can generate irq */
 	I915_WRITE(DEIIR, I915_READ(DEIIR));

From 2dd873838805a6e84c1afdfbf13e8709bfb5c70f Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Wed, 27 Jan 2010 16:32:46 +0800
Subject: [PATCH 158/640] drm/i915: Add support for SDVO composite TV

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_sdvo.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index eaacfd0920df..82678d30ab06 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2345,6 +2345,14 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags)
 		connector->connector_type = DRM_MODE_CONNECTOR_VGA;
 		intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
 					(1 << INTEL_ANALOG_CLONE_BIT);
+	} else if (flags & SDVO_OUTPUT_CVBS0) {
+
+		sdvo_priv->controlled_output = SDVO_OUTPUT_CVBS0;
+		encoder->encoder_type = DRM_MODE_ENCODER_TVDAC;
+		connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO;
+		sdvo_priv->is_tv = true;
+		intel_output->needs_tv_clock = true;
+		intel_output->clone_mask = 1 << INTEL_SDVO_TV_CLONE_BIT;
 	} else if (flags & SDVO_OUTPUT_LVDS0) {
 
 		sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;

From f034b12dbb5749b11e9390e15e93ffa87ece8038 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Thu, 21 Jan 2010 15:20:18 +0800
Subject: [PATCH 159/640] drm/i915: Fix the incorrect DMI string for Samsung
 SX20S laptop

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Reported-by: Philipp Kohlbecher <xt28@gmx.de>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_lvds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index aa74e59bec61..75a9772061cb 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -611,7 +611,7 @@ static const struct dmi_system_id bad_lid_status[] = {
 	{
 		.ident = "Samsung SX20S",
 		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_SYS_VENDOR, "Samsung Electronics"),
 			DMI_MATCH(DMI_BOARD_NAME, "SX20S"),
 		},
 	},

From 013d5aa2bbb2ceacba7a0dad7f2a0eb20133323f Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 29 Jan 2010 11:18:31 -0800
Subject: [PATCH 160/640] drm/i915: page flip support for Ironlake

This patch adds support for page flipping on Ironlake, which uses
different interrupt bits for triggering flip submit IRQs.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[anholt: hand-resolved for rebasing off of render power saving patch]
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_irq.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fcd87ad75fec..50ddf4a95c5e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -309,11 +309,21 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 	if (de_iir & DE_GSE)
 		ironlake_opregion_gse_intr(dev);
 
-	if (de_iir & DE_PIPEA_VBLANK)
-		drm_handle_vblank(dev, 0);
+	if (de_iir & DE_PLANEA_FLIP_DONE)
+		intel_prepare_page_flip(dev, 0);
 
-	if (de_iir & DE_PIPEB_VBLANK)
+	if (de_iir & DE_PLANEB_FLIP_DONE)
+		intel_prepare_page_flip(dev, 1);
+
+	if (de_iir & DE_PIPEA_VBLANK) {
+		drm_handle_vblank(dev, 0);
+		intel_finish_page_flip(dev, 0);
+	}
+
+	if (de_iir & DE_PIPEB_VBLANK) {
 		drm_handle_vblank(dev, 1);
+		intel_finish_page_flip(dev, 1);
+	}
 
 	/* check event from PCH */
 	if ((de_iir & DE_PCH_EVENT) &&
@@ -1022,7 +1032,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	/* enable kind of interrupts always enabled */
-	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT;
+	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
+			   DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE;
 	u32 render_mask = GT_USER_INTERRUPT;
 	u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
 			   SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;

From df2e615a3b3a66d0731e3309e9731970a6c51268 Mon Sep 17 00:00:00 2001
From: Colin Tuckley <colin.tuckley@arm.com>
Date: Fri, 29 Jan 2010 12:52:55 +0100
Subject: [PATCH 161/640] ARM: 5907/1: ARM: Fix the reset on the RealView PBX
 Development board

Signed-off-by: Colin Tuckley <colin.tuckley@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-realview/realview_pbx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index a21a4b395f73..d94857eb0690 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -334,8 +334,8 @@ static void realview_pbx_reset(char mode)
 	 * in the system FPGA
 	 */
 	__raw_writel(REALVIEW_SYS_LOCK_VAL, lock_ctrl);
-	__raw_writel(0x0000, reset_ctrl);
-	__raw_writel(0x0004, reset_ctrl);
+	__raw_writel(0x00F0, reset_ctrl);
+	__raw_writel(0x00F4, reset_ctrl);
 }
 
 static void __init realview_pbx_init(void)

From c540b9ff0f8679ba924fac072aeb7d63fa473190 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 26 Jan 2010 19:09:48 +0100
Subject: [PATCH 162/640] ARM: 5904/1: ARM: Always generate the IT instruction
 when compiling for Thumb-2

Current behaviour is to generate the IT instruction only for Thumb-2
code. However, the kernel helpers in entry-armv.S are compiled to ARM in
a unified syntax file (if THUMB2_KERNEL). Recent compilers warn about
missing IT instruction in unified assembly syntax files. The patch
changes the "-mimplicit-it" gas option to "always".

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 9e7582572741..356d702c0808 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -94,7 +94,7 @@ CFLAGS_ABI	+=-funwind-tables
 endif
 
 ifeq ($(CONFIG_THUMB2_KERNEL),y)
-AFLAGS_AUTOIT	:=$(call as-option,-Wa$(comma)-mimplicit-it=thumb,-Wa$(comma)-mauto-it)
+AFLAGS_AUTOIT	:=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it)
 AFLAGS_NOWARN	:=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
 CFLAGS_THUMB2	:=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN)
 AFLAGS_THUMB2	:=$(CFLAGS_THUMB2) -Wa$(comma)-mthumb

From 3256a05531b1164a9c138da701b922a113bddf82 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sun, 31 Jan 2010 12:39:50 +0900
Subject: [PATCH 163/640] nilfs2: fix potential leak of dirty data on umount

This fixes incorrect usage of nilfs_segctor_confirm() test function in
nilfs_segctor_destroy(); nilfs_segctor_confirm() returns zero if the
filesystem is not clean, so its use in nilfs_segctor_destroy() needs
inversion.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 17584c524486..105b508b47a8 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2829,7 +2829,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
 		|| sci->sc_seq_request != sci->sc_seq_done);
 	spin_unlock(&sci->sc_state_lock);
 
-	if (flag || nilfs_segctor_confirm(sci))
+	if (flag || !nilfs_segctor_confirm(sci))
 		nilfs_segctor_write_out(sci);
 
 	WARN_ON(!list_empty(&sci->sc_copied_buffers));

From a8e6f734ce9a79d44ebb296f2a341f435227b34e Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Sat, 30 Jan 2010 20:55:41 +0900
Subject: [PATCH 164/640] Revert "perf record: Intercept all events"

This reverts commit f5a2c3dce03621b55f84496f58adc2d1a87ca16f.

This patch is required for making "perf lock rec" work.
The commit f5a2c3dce0 changes write_event() of builtin-record.c
. And changed write_event() sometimes doesn't stop with perf
lock rec.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
[ that commit also causes perf record to not be Ctrl-C-able,
  and it's concetually wrong to parse the data at record time
  (unconditionally - even when not needed), as we eventually
  want to be able to do zero-copy recording, at least for
  non-archive recordings.  ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 90345223908c..eea56910b91c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -113,24 +113,16 @@ static void write_output(void *buf, size_t size)
 
 static void write_event(event_t *buf, size_t size)
 {
-	size_t processed_size = buf->header.size;
-	event_t *ev = buf;
-
-	do {
-		/*
-		* Add it to the list of DSOs, so that when we finish this
-		 * record session we can pick the available build-ids.
-		 */
-		if (ev->header.type == PERF_RECORD_MMAP) {
-			struct list_head *head = &dsos__user;
-			if (ev->header.misc == 1)
-				head = &dsos__kernel;
-			__dsos__findnew(head, ev->mmap.filename);
-		}
-
-		ev = ((void *)ev) + ev->header.size;
-		processed_size += ev->header.size;
-	} while (processed_size < size);
+	/*
+	* Add it to the list of DSOs, so that when we finish this
+	 * record session we can pick the available build-ids.
+	 */
+	if (buf->header.type == PERF_RECORD_MMAP) {
+		struct list_head *head = &dsos__user;
+		if (buf->mmap.header.misc == 1)
+			head = &dsos__kernel;
+		__dsos__findnew(head, buf->mmap.filename);
+	}
 
 	write_output(buf, size);
 }

From 86d8d29634de4464d568e7c335c0da6cba64e8ab Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Sat, 30 Jan 2010 20:43:23 +0900
Subject: [PATCH 165/640] perf tools: Add __data_loc support

This patch is required to test the next patch for perf lock.

At 064739bc4b3d7f424b2f25547e6611bcf0132415 ,
support for the modifier "__data_loc" of format is added.

But, when I wanted to parse format of lock_acquired (or some
event else), raw_field_ptr() did not returned correct pointer.

So I modified raw_field_ptr() like this patch. Then
raw_field_ptr() works well.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <srostedt@redhat.com>
LKML-Reference: <1264851813-8413-2-git-send-email-mitake@dcl.info.waseda.ac.jp>
[ v3: fixed minor stylistic detail ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/trace-event-parse.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index c5c32be040bf..c4b3cb8a02b1 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -1925,6 +1925,15 @@ void *raw_field_ptr(struct event *event, const char *name, void *data)
 	if (!field)
 		return NULL;
 
+	if (field->flags & FIELD_IS_STRING) {
+		int offset;
+
+		offset = *(int *)(data + field->offset);
+		offset &= 0xffff;
+
+		return data + offset;
+	}
+
 	return data + field->offset;
 }
 

From 18e97e06b5fb2d7f6cf272ca07d26d8247db8723 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Sat, 30 Jan 2010 20:43:24 +0900
Subject: [PATCH 166/640] perf: Add util/include/linuxhash.h to include hash.h
 of kernel

linux/hash.h, hash header of kernel, is also useful for perf.

util/include/linuxhash.h includes linux/hash.h, so we can use
hash facilities (e.g. hash_long()) in perf now.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264851813-8413-3-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile                  | 1 +
 tools/perf/util/include/linux/hash.h | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 tools/perf/util/include/linux/hash.h

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 9b173e66fb41..b2bce1fb4ae1 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -333,6 +333,7 @@ LIB_FILE=libperf.a
 LIB_H += ../../include/linux/perf_event.h
 LIB_H += ../../include/linux/rbtree.h
 LIB_H += ../../include/linux/list.h
+LIB_H += ../../include/linux/hash.h
 LIB_H += ../../include/linux/stringify.h
 LIB_H += util/include/linux/bitmap.h
 LIB_H += util/include/linux/bitops.h
diff --git a/tools/perf/util/include/linux/hash.h b/tools/perf/util/include/linux/hash.h
new file mode 100644
index 000000000000..201f57397997
--- /dev/null
+++ b/tools/perf/util/include/linux/hash.h
@@ -0,0 +1,5 @@
+#include "../../../../include/linux/hash.h"
+
+#ifndef PERF_HASH_H
+#define PERF_HASH_H
+#endif

From c965be10ca3cb0bdd04016c852764afaf8e647c8 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Sat, 30 Jan 2010 20:43:32 +0900
Subject: [PATCH 167/640] perf lock: Enhance information of lock trace events

Add wait time and lock identification details.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264851813-8413-11-git-send-email-mitake@dcl.info.waseda.ac.jp>
[ removed the file/line bits as we can do that better via IPs ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/lock.h | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index a870ba125aa8..5c1dcfc16c60 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -20,14 +20,17 @@ TRACE_EVENT(lock_acquire,
 	TP_STRUCT__entry(
 		__field(unsigned int, flags)
 		__string(name, lock->name)
+		__field(void *, lockdep_addr)
 	),
 
 	TP_fast_assign(
 		__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
 		__assign_str(name, lock->name);
+		__entry->lockdep_addr = lock;
 	),
 
-	TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
+	TP_printk("%p %s%s%s", __entry->lockdep_addr,
+		  (__entry->flags & 1) ? "try " : "",
 		  (__entry->flags & 2) ? "read " : "",
 		  __get_str(name))
 );
@@ -40,13 +43,16 @@ TRACE_EVENT(lock_release,
 
 	TP_STRUCT__entry(
 		__string(name, lock->name)
+		__field(void *, lockdep_addr)
 	),
 
 	TP_fast_assign(
 		__assign_str(name, lock->name);
+		__entry->lockdep_addr = lock;
 	),
 
-	TP_printk("%s", __get_str(name))
+	TP_printk("%p %s",
+		  __entry->lockdep_addr, __get_str(name))
 );
 
 #ifdef CONFIG_LOCK_STAT
@@ -59,13 +65,16 @@ TRACE_EVENT(lock_contended,
 
 	TP_STRUCT__entry(
 		__string(name, lock->name)
+		__field(void *, lockdep_addr)
 	),
 
 	TP_fast_assign(
 		__assign_str(name, lock->name);
+		__entry->lockdep_addr = lock;
 	),
 
-	TP_printk("%s", __get_str(name))
+	TP_printk("%p %s",
+		  __entry->lockdep_addr, __get_str(name))
 );
 
 TRACE_EVENT(lock_acquired,
@@ -75,16 +84,18 @@ TRACE_EVENT(lock_acquired,
 
 	TP_STRUCT__entry(
 		__string(name, lock->name)
-		__field(unsigned long, wait_usec)
-		__field(unsigned long, wait_nsec_rem)
+		__field(s64, wait_nsec)
+		__field(void *, lockdep_addr)
 	),
+
 	TP_fast_assign(
 		__assign_str(name, lock->name);
-		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
-		__entry->wait_usec = (unsigned long) waittime;
+		__entry->wait_nsec = waittime;
+		__entry->lockdep_addr = lock;
 	),
-	TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
-				       __entry->wait_nsec_rem)
+	TP_printk("%p %s (%llu ns)", __entry->lockdep_addr,
+		  __get_str(name),
+		  __entry->wait_nsec)
 );
 
 #endif

From 9b5e350c7a46a471d5b452836dbafe9aeaeca435 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Sat, 30 Jan 2010 20:43:33 +0900
Subject: [PATCH 168/640] perf lock: Introduce new tool "perf lock", for
 analyzing lock statistics

Adding new subcommand "perf lock" to perf.

I have a lot of remaining ToDos, but for now perf lock can
already provide minimal functionality for analyzing lock
statistics.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264851813-8413-12-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile       |   1 +
 tools/perf/builtin-lock.c | 724 ++++++++++++++++++++++++++++++++++++++
 tools/perf/builtin.h      |   1 +
 tools/perf/perf.c         |   1 +
 4 files changed, 727 insertions(+)
 create mode 100644 tools/perf/builtin-lock.c

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index b2bce1fb4ae1..42969303e20b 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -456,6 +456,7 @@ BUILTIN_OBJS += builtin-top.o
 BUILTIN_OBJS += builtin-trace.o
 BUILTIN_OBJS += builtin-probe.o
 BUILTIN_OBJS += builtin-kmem.o
+BUILTIN_OBJS += builtin-lock.o
 
 PERFLIBS = $(LIB_FILE)
 
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
new file mode 100644
index 000000000000..2b5f88754c26
--- /dev/null
+++ b/tools/perf/builtin-lock.c
@@ -0,0 +1,724 @@
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+#include "util/session.h"
+
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <math.h>
+#include <limits.h>
+
+#include <linux/list.h>
+#include <linux/hash.h>
+
+/* based on kernel/lockdep.c */
+#define LOCKHASH_BITS		12
+#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
+
+static struct list_head lockhash_table[LOCKHASH_SIZE];
+
+#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
+#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
+
+#define LOCK_STATE_UNLOCKED 0	       /* initial state */
+#define LOCK_STATE_LOCKED 1
+
+struct lock_stat {
+	struct list_head hash_entry;
+	struct rb_node rb;	/* used for sorting */
+
+	/* FIXME: raw_field_value() returns unsigned long long,
+	 * so address of lockdep_map should be dealed as 64bit.
+	 * Is there more better solution? */
+	void *addr;	       /* address of lockdep_map, used as ID */
+	char *name;	       /* for strcpy(), we cannot use const */
+	char *file;
+	unsigned int line;
+
+	int state;
+	u64 prev_event_time;	/* timestamp of previous event */
+
+	unsigned int nr_acquired;
+	unsigned int nr_acquire;
+	unsigned int nr_contended;
+	unsigned int nr_release;
+
+	/* these times are in nano sec. */
+	u64 wait_time_total;
+	u64 wait_time_min;
+	u64 wait_time_max;
+};
+
+/* build simple key function one is bigger than two */
+#define SINGLE_KEY(member)					\
+	static int lock_stat_key_ ## member(struct lock_stat *one,	\
+					 struct lock_stat *two)		\
+	{								\
+		return one->member > two->member;			\
+	}
+
+SINGLE_KEY(nr_acquired)
+SINGLE_KEY(nr_contended)
+SINGLE_KEY(wait_time_total)
+SINGLE_KEY(wait_time_min)
+SINGLE_KEY(wait_time_max)
+
+struct lock_key {
+	/*
+	 * name: the value for specify by user
+	 * this should be simpler than raw name of member
+	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
+	 */
+	const char *name;
+	int (*key)(struct lock_stat*, struct lock_stat*);
+};
+
+static const char *sort_key = "acquired";
+static int (*compare)(struct lock_stat *, struct lock_stat *);
+
+#define DEF_KEY_LOCK(name, fn_suffix)	\
+	{ #name, lock_stat_key_ ## fn_suffix }
+struct lock_key keys[] = {
+	DEF_KEY_LOCK(acquired, nr_acquired),
+	DEF_KEY_LOCK(contended, nr_contended),
+	DEF_KEY_LOCK(wait_total, wait_time_total),
+	DEF_KEY_LOCK(wait_min, wait_time_min),
+	DEF_KEY_LOCK(wait_max, wait_time_max),
+
+	/* extra comparisons much complicated should be here */
+
+	{ NULL, NULL }
+};
+
+static void select_key(void)
+{
+	int i;
+
+	for (i = 0; keys[i].name; i++) {
+		if (!strcmp(keys[i].name, sort_key)) {
+			compare = keys[i].key;
+			return;
+		}
+	}
+
+	die("Unknown compare key:%s\n", sort_key);
+}
+
+static struct rb_root result;	/* place to store sorted data */
+
+static void insert_to_result(struct lock_stat *st,
+			     int (*bigger)(struct lock_stat *,
+					   struct lock_stat *))
+{
+	struct rb_node **rb = &result.rb_node;
+	struct rb_node *parent = NULL;
+	struct lock_stat *p;
+
+	while (*rb) {
+		p = container_of(*rb, struct lock_stat, rb);
+		parent = *rb;
+
+		if (bigger(st, p))
+			rb = &(*rb)->rb_left;
+		else
+			rb = &(*rb)->rb_right;
+	}
+
+	rb_link_node(&st->rb, parent, rb);
+	rb_insert_color(&st->rb, &result);
+}
+
+/* returns left most element of result, and erase it */
+static struct lock_stat *pop_from_result(void)
+{
+	struct rb_node *node = result.rb_node;
+
+	if (!node)
+		return NULL;
+
+	while (node->rb_left)
+		node = node->rb_left;
+
+	rb_erase(node, &result);
+	return container_of(node, struct lock_stat, rb);
+}
+
+static struct lock_stat *lock_stat_findnew(void *addr, const char *name,
+					   const char *file, unsigned int line)
+{
+	struct list_head *entry = lockhashentry(addr);
+	struct lock_stat *ret, *new;
+
+	list_for_each_entry(ret, entry, hash_entry) {
+		if (ret->addr == addr)
+			return ret;
+	}
+
+	new = zalloc(sizeof(struct lock_stat));
+	if (!new)
+		goto alloc_failed;
+
+	new->addr = addr;
+	new->name = zalloc(sizeof(char) * strlen(name) + 1);
+	if (!new->name)
+		goto alloc_failed;
+	strcpy(new->name, name);
+	new->file = zalloc(sizeof(char) * strlen(file) + 1);
+	if (!new->file)
+		goto alloc_failed;
+	strcpy(new->file, file);
+	new->line = line;
+
+	/* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */
+	new->state = LOCK_STATE_UNLOCKED;
+	new->wait_time_min = ULLONG_MAX;
+
+	list_add(&new->hash_entry, entry);
+	return new;
+
+alloc_failed:
+	die("memory allocation failed\n");
+}
+
+static char			const *input_name = "perf.data";
+
+static int			profile_cpu = -1;
+
+struct raw_event_sample {
+	u32 size;
+	char data[0];
+};
+
+struct trace_acquire_event {
+	void *addr;
+	const char *name;
+	const char *file;
+	unsigned int line;
+};
+
+struct trace_acquired_event {
+	void *addr;
+	const char *name;
+	const char *file;
+	unsigned int line;
+};
+
+struct trace_contended_event {
+	void *addr;
+	const char *name;
+	const char *file;
+	unsigned int line;
+};
+
+struct trace_release_event {
+	void *addr;
+	const char *name;
+	const char *file;
+	unsigned int line;
+};
+
+struct trace_lock_handler {
+	void (*acquire_event)(struct trace_acquire_event *,
+			      struct event *,
+			      int cpu,
+			      u64 timestamp,
+			      struct thread *thread);
+
+	void (*acquired_event)(struct trace_acquired_event *,
+			       struct event *,
+			       int cpu,
+			       u64 timestamp,
+			       struct thread *thread);
+
+	void (*contended_event)(struct trace_contended_event *,
+				struct event *,
+				int cpu,
+				u64 timestamp,
+				struct thread *thread);
+
+	void (*release_event)(struct trace_release_event *,
+			      struct event *,
+			      int cpu,
+			      u64 timestamp,
+			      struct thread *thread);
+};
+
+static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event,
+			struct event *__event __used,
+			int cpu __used,
+			u64 timestamp,
+			struct thread *thread __used)
+{
+	struct lock_stat *st;
+
+	st = lock_stat_findnew(acquire_event->addr, acquire_event->name,
+			       acquire_event->file, acquire_event->line);
+
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
+		break;
+	case LOCK_STATE_LOCKED:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	st->prev_event_time = timestamp;
+}
+
+static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event,
+			 struct event *__event __used,
+			 int cpu __used,
+			 u64 timestamp,
+			 struct thread *thread __used)
+{
+	struct lock_stat *st;
+
+	st = lock_stat_findnew(acquired_event->addr, acquired_event->name,
+			       acquired_event->file, acquired_event->line);
+
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
+		st->state = LOCK_STATE_LOCKED;
+		st->nr_acquired++;
+		break;
+	case LOCK_STATE_LOCKED:
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	st->prev_event_time = timestamp;
+}
+
+static void prof_lock_contended_event(struct trace_contended_event *contended_event,
+			  struct event *__event __used,
+			  int cpu __used,
+			  u64 timestamp,
+			  struct thread *thread __used)
+{
+	struct lock_stat *st;
+
+	st = lock_stat_findnew(contended_event->addr, contended_event->name,
+			       contended_event->file, contended_event->line);
+
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
+		break;
+	case LOCK_STATE_LOCKED:
+		st->nr_contended++;
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	st->prev_event_time = timestamp;
+}
+
+static void prof_lock_release_event(struct trace_release_event *release_event,
+			struct event *__event __used,
+			int cpu __used,
+			u64 timestamp,
+			struct thread *thread __used)
+{
+	struct lock_stat *st;
+	u64 hold_time;
+
+	st = lock_stat_findnew(release_event->addr, release_event->name,
+			       release_event->file, release_event->line);
+
+	switch (st->state) {
+	case LOCK_STATE_UNLOCKED:
+		break;
+	case LOCK_STATE_LOCKED:
+		st->state = LOCK_STATE_UNLOCKED;
+		hold_time = timestamp - st->prev_event_time;
+
+		if (timestamp < st->prev_event_time) {
+			/* terribly, this can happen... */
+			goto end;
+		}
+
+		if (st->wait_time_min > hold_time)
+			st->wait_time_min = hold_time;
+		if (st->wait_time_max < hold_time)
+			st->wait_time_max = hold_time;
+		st->wait_time_total += hold_time;
+
+		st->nr_release++;
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+end:
+	st->prev_event_time = timestamp;
+}
+
+/* lock oriented handlers */
+/* TODO: handlers for CPU oriented, thread oriented */
+static struct trace_lock_handler prof_lock_ops  = {
+	.acquire_event		= prof_lock_acquire_event,
+	.acquired_event		= prof_lock_acquired_event,
+	.contended_event	= prof_lock_contended_event,
+	.release_event		= prof_lock_release_event,
+};
+
+static struct trace_lock_handler *trace_handler;
+
+static void
+process_lock_acquire_event(void *data,
+			   struct event *event __used,
+			   int cpu __used,
+			   u64 timestamp __used,
+			   struct thread *thread __used)
+{
+	struct trace_acquire_event acquire_event;
+	u64 tmp;		/* this is required for casting... */
+
+	tmp = raw_field_value(event, "lockdep_addr", data);
+	memcpy(&acquire_event.addr, &tmp, sizeof(void *));
+	acquire_event.name = (char *)raw_field_ptr(event, "name", data);
+	acquire_event.file = (char *)raw_field_ptr(event, "file", data);
+	acquire_event.line =
+		(unsigned int)raw_field_value(event, "line", data);
+
+	if (trace_handler->acquire_event) {
+		trace_handler->acquire_event(&acquire_event,
+					     event, cpu, timestamp, thread);
+	}
+}
+
+static void
+process_lock_acquired_event(void *data,
+			    struct event *event __used,
+			    int cpu __used,
+			    u64 timestamp __used,
+			    struct thread *thread __used)
+{
+	struct trace_acquired_event acquired_event;
+	u64 tmp;		/* this is required for casting... */
+
+	tmp = raw_field_value(event, "lockdep_addr", data);
+	memcpy(&acquired_event.addr, &tmp, sizeof(void *));
+	acquired_event.name = (char *)raw_field_ptr(event, "name", data);
+	acquired_event.file = (char *)raw_field_ptr(event, "file", data);
+	acquired_event.line =
+		(unsigned int)raw_field_value(event, "line", data);
+
+	if (trace_handler->acquire_event) {
+		trace_handler->acquired_event(&acquired_event,
+					     event, cpu, timestamp, thread);
+	}
+}
+
+static void
+process_lock_contended_event(void *data,
+			     struct event *event __used,
+			     int cpu __used,
+			     u64 timestamp __used,
+			     struct thread *thread __used)
+{
+	struct trace_contended_event contended_event;
+	u64 tmp;		/* this is required for casting... */
+
+	tmp = raw_field_value(event, "lockdep_addr", data);
+	memcpy(&contended_event.addr, &tmp, sizeof(void *));
+	contended_event.name = (char *)raw_field_ptr(event, "name", data);
+	contended_event.file = (char *)raw_field_ptr(event, "file", data);
+	contended_event.line =
+		(unsigned int)raw_field_value(event, "line", data);
+
+	if (trace_handler->acquire_event) {
+		trace_handler->contended_event(&contended_event,
+					     event, cpu, timestamp, thread);
+	}
+}
+
+static void
+process_lock_release_event(void *data,
+			   struct event *event __used,
+			   int cpu __used,
+			   u64 timestamp __used,
+			   struct thread *thread __used)
+{
+	struct trace_release_event release_event;
+	u64 tmp;		/* this is required for casting... */
+
+	tmp = raw_field_value(event, "lockdep_addr", data);
+	memcpy(&release_event.addr, &tmp, sizeof(void *));
+	release_event.name = (char *)raw_field_ptr(event, "name", data);
+	release_event.file = (char *)raw_field_ptr(event, "file", data);
+	release_event.line =
+		(unsigned int)raw_field_value(event, "line", data);
+
+	if (trace_handler->acquire_event) {
+		trace_handler->release_event(&release_event,
+					     event, cpu, timestamp, thread);
+	}
+}
+
+static void
+process_raw_event(void *data, int cpu,
+		  u64 timestamp, struct thread *thread)
+{
+	struct event *event;
+	int type;
+
+	type = trace_parse_common_type(data);
+	event = trace_find_event(type);
+
+	if (!strcmp(event->name, "lock_acquire"))
+		process_lock_acquire_event(data, event, cpu, timestamp, thread);
+	if (!strcmp(event->name, "lock_acquired"))
+		process_lock_acquired_event(data, event, cpu, timestamp, thread);
+	if (!strcmp(event->name, "lock_contended"))
+		process_lock_contended_event(data, event, cpu, timestamp, thread);
+	if (!strcmp(event->name, "lock_release"))
+		process_lock_release_event(data, event, cpu, timestamp, thread);
+}
+
+static int process_sample_event(event_t *event, struct perf_session *session)
+{
+	struct thread *thread;
+	struct sample_data data;
+
+	bzero(&data, sizeof(struct sample_data));
+	event__parse_sample(event, session->sample_type, &data);
+	thread = perf_session__findnew(session, data.pid);
+
+	/*
+	 * FIXME: this causes warn on 32bit environment
+	 * because of (void *)data.ip (type of data.ip is u64)
+	 */
+/* 	dump_printf("(IP, %d): %d/%d: %p period: %llu\n", */
+/* 		    event->header.misc, */
+/* 		    data.pid, data.tid, (void *)data.ip, data.period); */
+
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
+		return -1;
+	}
+
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+	if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
+		return 0;
+
+	process_raw_event(data.raw_data, data.cpu, data.time, thread);
+
+	return 0;
+}
+
+/* TODO: various way to print, coloring, nano or milli sec */
+static void print_result(void)
+{
+	struct lock_stat *st;
+	char cut_name[20];
+
+	printf("%18s ", "ID");
+	printf("%20s ", "Name");
+	printf("%10s ", "acquired");
+	printf("%10s ", "contended");
+
+	printf("%15s ", "total wait (ns)");
+	printf("%15s ", "max wait (ns)");
+	printf("%15s ", "min wait (ns)");
+
+	printf("\n\n");
+
+	while ((st = pop_from_result())) {
+		bzero(cut_name, 20);
+
+		printf("%p ", st->addr);
+
+		if (strlen(st->name) < 16) {
+			/* output raw name */
+			printf("%20s ", st->name);
+		} else {
+			strncpy(cut_name, st->name, 16);
+			cut_name[16] = '.';
+			cut_name[17] = '.';
+			cut_name[18] = '.';
+			cut_name[19] = '\0';
+			/* cut off name for saving output style */
+			printf("%20s ", cut_name);
+		}
+
+		printf("%10u ", st->nr_acquired);
+		printf("%10u ", st->nr_contended);
+
+		printf("%15llu ", st->wait_time_total);
+		printf("%15llu ", st->wait_time_max);
+		printf("%15llu ", st->wait_time_min == ULLONG_MAX ?
+		       0 : st->wait_time_min);
+		printf("\n");
+	}
+}
+
+static void dump_map(void)
+{
+	unsigned int i;
+	struct lock_stat *st;
+
+	for (i = 0; i < LOCKHASH_SIZE; i++) {
+		list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+			printf("%p: %s (src: %s, line: %u)\n",
+			       st->addr, st->name, st->file, st->line);
+		}
+	}
+}
+
+static struct perf_event_ops eops = {
+	.sample	= process_sample_event,
+	.comm	= event__process_comm,
+};
+
+static struct perf_session *session;
+
+static int read_events(void)
+{
+	session = perf_session__new(input_name, O_RDONLY, 0);
+	if (!session)
+		die("Initializing perf session failed\n");
+
+	return perf_session__process_events(session, &eops);
+}
+
+static void sort_result(void)
+{
+	unsigned int i;
+	struct lock_stat *st;
+
+	for (i = 0; i < LOCKHASH_SIZE; i++) {
+		list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+			insert_to_result(st, compare);
+		}
+	}
+}
+
+static void __cmd_prof(void)
+{
+	setup_pager();
+	select_key();
+	read_events();
+	sort_result();
+	print_result();
+}
+
+static const char * const prof_usage[] = {
+	"perf sched prof [<options>]",
+	NULL
+};
+
+static const struct option prof_options[] = {
+	OPT_STRING('k', "key", &sort_key, "acquired",
+		    "key for sorting"),
+	/* TODO: type */
+	OPT_END()
+};
+
+static const char * const lock_usage[] = {
+	"perf lock [<options>] {record|trace|prof}",
+	NULL
+};
+
+static const struct option lock_options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_BOOLEAN('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_END()
+};
+
+static const char *record_args[] = {
+	"record",
+	"-a",
+	"-R",
+	"-M",
+	"-f",
+	"-m", "1024",
+	"-c", "1",
+	"-e", "lock:lock_acquire:r",
+	"-e", "lock:lock_acquired:r",
+	"-e", "lock:lock_contended:r",
+	"-e", "lock:lock_release:r",
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+
+	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = strdup(record_args[i]);
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_record(i, rec_argv, NULL);
+}
+
+int cmd_lock(int argc, const char **argv, const char *prefix __used)
+{
+	unsigned int i;
+
+	symbol__init();
+	for (i = 0; i < LOCKHASH_SIZE; i++)
+		INIT_LIST_HEAD(lockhash_table + i);
+
+	argc = parse_options(argc, argv, lock_options, lock_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc)
+		usage_with_options(lock_usage, lock_options);
+
+	if (!strncmp(argv[0], "rec", 3)) {
+		return __cmd_record(argc, argv);
+	} else if (!strncmp(argv[0], "prof", 4)) {
+		trace_handler = &prof_lock_ops;
+		if (argc) {
+			argc = parse_options(argc, argv,
+					     prof_options, prof_usage, 0);
+			if (argc)
+				usage_with_options(prof_usage, prof_options);
+		}
+		__cmd_prof();
+	} else if (!strcmp(argv[0], "trace")) {
+		/* Aliased to 'perf trace' */
+		return cmd_trace(argc, argv, prefix);
+	} else if (!strcmp(argv[0], "map")) {
+		/* recycling prof_lock_ops */
+		trace_handler = &prof_lock_ops;
+		setup_pager();
+		read_events();
+		dump_map();
+	} else {
+		usage_with_options(lock_usage, lock_options);
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index dee97cfe3794..10fe49e7048a 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -31,5 +31,6 @@ extern int cmd_trace(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_probe(int argc, const char **argv, const char *prefix);
 extern int cmd_kmem(int argc, const char **argv, const char *prefix);
+extern int cmd_lock(int argc, const char **argv, const char *prefix);
 
 #endif
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 109b89b30ced..57cb107c1f13 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -303,6 +303,7 @@ static void handle_internal_command(int argc, const char **argv)
 		{ "sched",	cmd_sched,	0 },
 		{ "probe",	cmd_probe,	0 },
 		{ "kmem",	cmd_kmem,	0 },
+		{ "lock",	cmd_lock,	0 },
 	};
 	unsigned int i;
 	static const char ext[] = STRIP_EXTENSION;

From 59f411b62c9282891274e721fea29026b0eda3cc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 31 Jan 2010 08:27:58 +0100
Subject: [PATCH 169/640] perf lock: Clean up various details

Fix up a few small stylistic details:

 - use consistent vertical spacing/alignment
 - remove line80 artifacts
 - group some global variables better
 - remove dead code

Plus rename 'prof' to 'report' to make it more in line with other
tools, and remove the line/file keying as we really want to use
IPs like the other tools do.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264851813-8413-12-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-lock.c | 210 +++++++++++++++-----------------------
 1 file changed, 82 insertions(+), 128 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2b5f88754c26..fb9ab2ad3f92 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -32,37 +32,37 @@ static struct list_head lockhash_table[LOCKHASH_SIZE];
 #define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
 #define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
 
-#define LOCK_STATE_UNLOCKED 0	       /* initial state */
-#define LOCK_STATE_LOCKED 1
+#define LOCK_STATE_UNLOCKED	0	       /* initial state */
+#define LOCK_STATE_LOCKED	1
 
 struct lock_stat {
-	struct list_head hash_entry;
-	struct rb_node rb;	/* used for sorting */
+	struct list_head	hash_entry;
+	struct rb_node		rb;		/* used for sorting */
 
-	/* FIXME: raw_field_value() returns unsigned long long,
+	/*
+	 * FIXME: raw_field_value() returns unsigned long long,
 	 * so address of lockdep_map should be dealed as 64bit.
-	 * Is there more better solution? */
-	void *addr;	       /* address of lockdep_map, used as ID */
-	char *name;	       /* for strcpy(), we cannot use const */
-	char *file;
-	unsigned int line;
+	 * Is there more better solution?
+	 */
+	void			*addr;		/* address of lockdep_map, used as ID */
+	char			*name;		/* for strcpy(), we cannot use const */
 
-	int state;
-	u64 prev_event_time;	/* timestamp of previous event */
+	int			state;
+	u64			prev_event_time; /* timestamp of previous event */
 
-	unsigned int nr_acquired;
-	unsigned int nr_acquire;
-	unsigned int nr_contended;
-	unsigned int nr_release;
+	unsigned int		nr_acquired;
+	unsigned int		nr_acquire;
+	unsigned int		nr_contended;
+	unsigned int		nr_release;
 
 	/* these times are in nano sec. */
-	u64 wait_time_total;
-	u64 wait_time_min;
-	u64 wait_time_max;
+	u64			wait_time_total;
+	u64			wait_time_min;
+	u64			wait_time_max;
 };
 
 /* build simple key function one is bigger than two */
-#define SINGLE_KEY(member)					\
+#define SINGLE_KEY(member)						\
 	static int lock_stat_key_ ## member(struct lock_stat *one,	\
 					 struct lock_stat *two)		\
 	{								\
@@ -81,12 +81,15 @@ struct lock_key {
 	 * this should be simpler than raw name of member
 	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
 	 */
-	const char *name;
-	int (*key)(struct lock_stat*, struct lock_stat*);
+	const char		*name;
+	int			(*key)(struct lock_stat*, struct lock_stat*);
 };
 
-static const char *sort_key = "acquired";
-static int (*compare)(struct lock_stat *, struct lock_stat *);
+static const char		*sort_key = "acquired";
+
+static int			(*compare)(struct lock_stat *, struct lock_stat *);
+
+static struct rb_root		result;	/* place to store sorted data */
 
 #define DEF_KEY_LOCK(name, fn_suffix)	\
 	{ #name, lock_stat_key_ ## fn_suffix }
@@ -116,11 +119,8 @@ static void select_key(void)
 	die("Unknown compare key:%s\n", sort_key);
 }
 
-static struct rb_root result;	/* place to store sorted data */
-
 static void insert_to_result(struct lock_stat *st,
-			     int (*bigger)(struct lock_stat *,
-					   struct lock_stat *))
+			     int (*bigger)(struct lock_stat *, struct lock_stat *))
 {
 	struct rb_node **rb = &result.rb_node;
 	struct rb_node *parent = NULL;
@@ -155,8 +155,7 @@ static struct lock_stat *pop_from_result(void)
 	return container_of(node, struct lock_stat, rb);
 }
 
-static struct lock_stat *lock_stat_findnew(void *addr, const char *name,
-					   const char *file, unsigned int line)
+static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
 {
 	struct list_head *entry = lockhashentry(addr);
 	struct lock_stat *ret, *new;
@@ -175,11 +174,6 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name,
 	if (!new->name)
 		goto alloc_failed;
 	strcpy(new->name, name);
-	new->file = zalloc(sizeof(char) * strlen(file) + 1);
-	if (!new->file)
-		goto alloc_failed;
-	strcpy(new->file, file);
-	new->line = line;
 
 	/* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */
 	new->state = LOCK_STATE_UNLOCKED;
@@ -197,36 +191,28 @@ static char			const *input_name = "perf.data";
 static int			profile_cpu = -1;
 
 struct raw_event_sample {
-	u32 size;
-	char data[0];
+	u32			size;
+	char			data[0];
 };
 
 struct trace_acquire_event {
-	void *addr;
-	const char *name;
-	const char *file;
-	unsigned int line;
+	void			*addr;
+	const char		*name;
 };
 
 struct trace_acquired_event {
-	void *addr;
-	const char *name;
-	const char *file;
-	unsigned int line;
+	void			*addr;
+	const char		*name;
 };
 
 struct trace_contended_event {
-	void *addr;
-	const char *name;
-	const char *file;
-	unsigned int line;
+	void			*addr;
+	const char		*name;
 };
 
 struct trace_release_event {
-	void *addr;
-	const char *name;
-	const char *file;
-	unsigned int line;
+	void			*addr;
+	const char		*name;
 };
 
 struct trace_lock_handler {
@@ -255,7 +241,8 @@ struct trace_lock_handler {
 			      struct thread *thread);
 };
 
-static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event,
+static void
+report_lock_acquire_event(struct trace_acquire_event *acquire_event,
 			struct event *__event __used,
 			int cpu __used,
 			u64 timestamp,
@@ -263,8 +250,7 @@ static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event,
 {
 	struct lock_stat *st;
 
-	st = lock_stat_findnew(acquire_event->addr, acquire_event->name,
-			       acquire_event->file, acquire_event->line);
+	st = lock_stat_findnew(acquire_event->addr, acquire_event->name);
 
 	switch (st->state) {
 	case LOCK_STATE_UNLOCKED:
@@ -279,7 +265,8 @@ static void prof_lock_acquire_event(struct trace_acquire_event *acquire_event,
 	st->prev_event_time = timestamp;
 }
 
-static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event,
+static void
+report_lock_acquired_event(struct trace_acquired_event *acquired_event,
 			 struct event *__event __used,
 			 int cpu __used,
 			 u64 timestamp,
@@ -287,8 +274,7 @@ static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event
 {
 	struct lock_stat *st;
 
-	st = lock_stat_findnew(acquired_event->addr, acquired_event->name,
-			       acquired_event->file, acquired_event->line);
+	st = lock_stat_findnew(acquired_event->addr, acquired_event->name);
 
 	switch (st->state) {
 	case LOCK_STATE_UNLOCKED:
@@ -305,7 +291,8 @@ static void prof_lock_acquired_event(struct trace_acquired_event *acquired_event
 	st->prev_event_time = timestamp;
 }
 
-static void prof_lock_contended_event(struct trace_contended_event *contended_event,
+static void
+report_lock_contended_event(struct trace_contended_event *contended_event,
 			  struct event *__event __used,
 			  int cpu __used,
 			  u64 timestamp,
@@ -313,8 +300,7 @@ static void prof_lock_contended_event(struct trace_contended_event *contended_ev
 {
 	struct lock_stat *st;
 
-	st = lock_stat_findnew(contended_event->addr, contended_event->name,
-			       contended_event->file, contended_event->line);
+	st = lock_stat_findnew(contended_event->addr, contended_event->name);
 
 	switch (st->state) {
 	case LOCK_STATE_UNLOCKED:
@@ -330,7 +316,8 @@ static void prof_lock_contended_event(struct trace_contended_event *contended_ev
 	st->prev_event_time = timestamp;
 }
 
-static void prof_lock_release_event(struct trace_release_event *release_event,
+static void
+report_lock_release_event(struct trace_release_event *release_event,
 			struct event *__event __used,
 			int cpu __used,
 			u64 timestamp,
@@ -339,8 +326,7 @@ static void prof_lock_release_event(struct trace_release_event *release_event,
 	struct lock_stat *st;
 	u64 hold_time;
 
-	st = lock_stat_findnew(release_event->addr, release_event->name,
-			       release_event->file, release_event->line);
+	st = lock_stat_findnew(release_event->addr, release_event->name);
 
 	switch (st->state) {
 	case LOCK_STATE_UNLOCKED:
@@ -373,11 +359,11 @@ end:
 
 /* lock oriented handlers */
 /* TODO: handlers for CPU oriented, thread oriented */
-static struct trace_lock_handler prof_lock_ops  = {
-	.acquire_event		= prof_lock_acquire_event,
-	.acquired_event		= prof_lock_acquired_event,
-	.contended_event	= prof_lock_contended_event,
-	.release_event		= prof_lock_release_event,
+static struct trace_lock_handler report_lock_ops  = {
+	.acquire_event		= report_lock_acquire_event,
+	.acquired_event		= report_lock_acquired_event,
+	.contended_event	= report_lock_contended_event,
+	.release_event		= report_lock_release_event,
 };
 
 static struct trace_lock_handler *trace_handler;
@@ -395,14 +381,9 @@ process_lock_acquire_event(void *data,
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&acquire_event.addr, &tmp, sizeof(void *));
 	acquire_event.name = (char *)raw_field_ptr(event, "name", data);
-	acquire_event.file = (char *)raw_field_ptr(event, "file", data);
-	acquire_event.line =
-		(unsigned int)raw_field_value(event, "line", data);
 
-	if (trace_handler->acquire_event) {
-		trace_handler->acquire_event(&acquire_event,
-					     event, cpu, timestamp, thread);
-	}
+	if (trace_handler->acquire_event)
+		trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread);
 }
 
 static void
@@ -418,14 +399,9 @@ process_lock_acquired_event(void *data,
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&acquired_event.addr, &tmp, sizeof(void *));
 	acquired_event.name = (char *)raw_field_ptr(event, "name", data);
-	acquired_event.file = (char *)raw_field_ptr(event, "file", data);
-	acquired_event.line =
-		(unsigned int)raw_field_value(event, "line", data);
 
-	if (trace_handler->acquire_event) {
-		trace_handler->acquired_event(&acquired_event,
-					     event, cpu, timestamp, thread);
-	}
+	if (trace_handler->acquire_event)
+		trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread);
 }
 
 static void
@@ -441,14 +417,9 @@ process_lock_contended_event(void *data,
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&contended_event.addr, &tmp, sizeof(void *));
 	contended_event.name = (char *)raw_field_ptr(event, "name", data);
-	contended_event.file = (char *)raw_field_ptr(event, "file", data);
-	contended_event.line =
-		(unsigned int)raw_field_value(event, "line", data);
 
-	if (trace_handler->acquire_event) {
-		trace_handler->contended_event(&contended_event,
-					     event, cpu, timestamp, thread);
-	}
+	if (trace_handler->acquire_event)
+		trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread);
 }
 
 static void
@@ -464,14 +435,9 @@ process_lock_release_event(void *data,
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&release_event.addr, &tmp, sizeof(void *));
 	release_event.name = (char *)raw_field_ptr(event, "name", data);
-	release_event.file = (char *)raw_field_ptr(event, "file", data);
-	release_event.line =
-		(unsigned int)raw_field_value(event, "line", data);
 
-	if (trace_handler->acquire_event) {
-		trace_handler->release_event(&release_event,
-					     event, cpu, timestamp, thread);
-	}
+	if (trace_handler->acquire_event)
+		trace_handler->release_event(&release_event, event, cpu, timestamp, thread);
 }
 
 static void
@@ -503,14 +469,6 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	event__parse_sample(event, session->sample_type, &data);
 	thread = perf_session__findnew(session, data.pid);
 
-	/*
-	 * FIXME: this causes warn on 32bit environment
-	 * because of (void *)data.ip (type of data.ip is u64)
-	 */
-/* 	dump_printf("(IP, %d): %d/%d: %p period: %llu\n", */
-/* 		    event->header.misc, */
-/* 		    data.pid, data.tid, (void *)data.ip, data.period); */
-
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
@@ -580,15 +538,14 @@ static void dump_map(void)
 
 	for (i = 0; i < LOCKHASH_SIZE; i++) {
 		list_for_each_entry(st, &lockhash_table[i], hash_entry) {
-			printf("%p: %s (src: %s, line: %u)\n",
-			       st->addr, st->name, st->file, st->line);
+			printf("%p: %s\n", st->addr, st->name);
 		}
 	}
 }
 
 static struct perf_event_ops eops = {
-	.sample	= process_sample_event,
-	.comm	= event__process_comm,
+	.sample			= process_sample_event,
+	.comm			= event__process_comm,
 };
 
 static struct perf_session *session;
@@ -614,7 +571,7 @@ static void sort_result(void)
 	}
 }
 
-static void __cmd_prof(void)
+static void __cmd_report(void)
 {
 	setup_pager();
 	select_key();
@@ -623,12 +580,12 @@ static void __cmd_prof(void)
 	print_result();
 }
 
-static const char * const prof_usage[] = {
-	"perf sched prof [<options>]",
+static const char * const report_usage[] = {
+	"perf lock report [<options>]",
 	NULL
 };
 
-static const struct option prof_options[] = {
+static const struct option report_options[] = {
 	OPT_STRING('k', "key", &sort_key, "acquired",
 		    "key for sorting"),
 	/* TODO: type */
@@ -636,17 +593,14 @@ static const struct option prof_options[] = {
 };
 
 static const char * const lock_usage[] = {
-	"perf lock [<options>] {record|trace|prof}",
+	"perf lock [<options>] {record|trace|report}",
 	NULL
 };
 
 static const struct option lock_options[] = {
-	OPT_STRING('i', "input", &input_name, "file",
-		    "input file name"),
-	OPT_BOOLEAN('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-		    "dump raw trace in ASCII"),
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
 	OPT_END()
 };
 
@@ -698,21 +652,21 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
 
 	if (!strncmp(argv[0], "rec", 3)) {
 		return __cmd_record(argc, argv);
-	} else if (!strncmp(argv[0], "prof", 4)) {
-		trace_handler = &prof_lock_ops;
+	} else if (!strncmp(argv[0], "report", 6)) {
+		trace_handler = &report_lock_ops;
 		if (argc) {
 			argc = parse_options(argc, argv,
-					     prof_options, prof_usage, 0);
+					     report_options, report_usage, 0);
 			if (argc)
-				usage_with_options(prof_usage, prof_options);
+				usage_with_options(report_usage, report_options);
 		}
-		__cmd_prof();
+		__cmd_report();
 	} else if (!strcmp(argv[0], "trace")) {
 		/* Aliased to 'perf trace' */
 		return cmd_trace(argc, argv, prefix);
 	} else if (!strcmp(argv[0], "map")) {
-		/* recycling prof_lock_ops */
-		trace_handler = &prof_lock_ops;
+		/* recycling report_lock_ops */
+		trace_handler = &report_lock_ops;
 		setup_pager();
 		read_events();
 		dump_map();

From d2f6650a950dadd20667a04a9dc785f240d43695 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 29 Jan 2010 17:48:51 +0100
Subject: [PATCH 170/640] ACPI: Add NULL pointer check in acpi_bus_start

If acpi_bus_add does not return a device and it's passed
to acpi_bus_start, bad things will happen:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
IP: [<ffffffff8128402d>] acpi_bus_start+0x14/0x24
...
[<ffffffffa008977a>] acpiphp_bus_add+0xba/0x130 [acpiphp]
[<ffffffffa008aa72>] enable_device+0x132/0x2ff [acpiphp]
[<ffffffffa0089b68>] acpiphp_enable_slot+0xb8/0x130 [acpiphp]
[<ffffffffa0089df7>] handle_hotplug_event_func+0x87/0x190 [acpiphp]

Next patch would make this NULL pointer check obsolete, but
better having one more than one missing...

Signed-off-by: Thomas Renninger <trenn@suse.de>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
CC: stable@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index ff9f6226085d..8044583f3034 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1357,6 +1357,9 @@ int acpi_bus_start(struct acpi_device *device)
 {
 	struct acpi_bus_ops ops;
 
+	if (!device)
+		return -EINVAL;
+
 	memset(&ops, 0, sizeof(ops));
 	ops.acpi_op_start = 1;
 

From 7779688fc3d1ceddad84846a7b0affbe8e78ec6e Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 29 Jan 2010 17:48:52 +0100
Subject: [PATCH 171/640] ACPI: acpi_bus_{scan,bus,add}: return -ENODEV if no
 device was found

Callers (acpi_memhotplug.c, dock.c and others) check for the return
value of acpi_bus_add() and assume a valid device was returned in
case zero was returned.

Thus return -ENODEV if no device was found in acpi_bus_scan and
propagate this through acpi_bus_add and acpi_bus_start.

Also remove a confusing comment in acpiphp_glue.c, acpi_bus_scan
will and cannot invoke if acpi_bus_add returns no valid device.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c                | 24 +++++++++++++++++++-----
 drivers/pci/hotplug/acpiphp_glue.c |  6 ------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 8044583f3034..3e009674f333 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1336,9 +1336,25 @@ static int acpi_bus_scan(acpi_handle handle, struct acpi_bus_ops *ops,
 
 	if (child)
 		*child = device;
-	return 0;
+
+	if (device)
+		return 0;
+	else
+		return -ENODEV;
 }
 
+/*
+ * acpi_bus_add and acpi_bus_start
+ *
+ * scan a given ACPI tree and (probably recently hot-plugged)
+ * create and add or starts found devices.
+ *
+ * If no devices were found -ENODEV is returned which does not
+ * mean that this is a real error, there just have been no suitable
+ * ACPI objects in the table trunk from which the kernel could create
+ * a device and add/start an appropriate driver.
+ */
+
 int
 acpi_bus_add(struct acpi_device **child,
 	     struct acpi_device *parent, acpi_handle handle, int type)
@@ -1348,8 +1364,7 @@ acpi_bus_add(struct acpi_device **child,
 	memset(&ops, 0, sizeof(ops));
 	ops.acpi_op_add = 1;
 
-	acpi_bus_scan(handle, &ops, child);
-	return 0;
+	return acpi_bus_scan(handle, &ops, child);
 }
 EXPORT_SYMBOL(acpi_bus_add);
 
@@ -1363,8 +1378,7 @@ int acpi_bus_start(struct acpi_device *device)
 	memset(&ops, 0, sizeof(ops));
 	ops.acpi_op_start = 1;
 
-	acpi_bus_scan(device->handle, &ops, NULL);
-	return 0;
+	return acpi_bus_scan(device->handle, &ops, NULL);
 }
 EXPORT_SYMBOL(acpi_bus_start);
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 8e952fdab764..cb2fd01eddae 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -720,12 +720,6 @@ static int acpiphp_bus_add(struct acpiphp_func *func)
 			-ret_val);
 		goto acpiphp_bus_add_out;
 	}
-	/*
-	 * try to start anyway.  We could have failed to add
-	 * simply because this bus had previously been added
-	 * on another add.  Don't bother with the return value
-	 * we just keep going.
-	 */
 	ret_val = acpi_bus_start(device);
 
 acpiphp_bus_add_out:

From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 27 Jan 2010 16:25:22 -0600
Subject: [PATCH 172/640] softlockup: Add sched_clock_tick() to avoid kernel
 warning on kgdb resume

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets
the time from hardware such as the TSC on x86. In this
configuration kgdb will report a softlock warning message on
resuming or detaching from a debug session.

Sequence of events in the problem case:

 1) "cpu sched clock" and "hardware time" are at 100 sec prior
    to a call to kgdb_handle_exception()

 2) Debugger waits in kgdb_handle_exception() for 80 sec and on
    exit the following is called ...  touch_softlockup_watchdog() -->
    __raw_get_cpu_var(touch_timestamp) = 0;

 3) "cpu sched clock" = 100s (it was not updated, because the
    interrupt was disabled in kgdb) but the "hardware time" = 180 sec

 4) The first timer interrupt after resuming from
    kgdb_handle_exception updates the watchdog from the "cpu sched clock"

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> check (touch_timestamp == 0) (it is "YES"
here, we have set "touch_timestamp = 0" at kgdb) -->
__touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp"
to "get_timestamp()" (Here, the "touch_timestamp" will still be
set to 100s.)  ...

    scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched
    clock" to "hardware time" = 180s) ...  }

 5) The Second timer interrupt handler appears to have a large
    jump and trips the softlockup warning.

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> "cpu sched clock" - "touch_timestamp" =
180s-100s > 60s --> printk "soft lockup error messages" ...  }

note: ***(A) reset "touch_timestamp" to
"get_timestamp(this_cpu)"

Why is "touch_timestamp" 100 sec, instead of 180 sec?

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of
get_timestamp() is:

get_timestamp(this_cpu)
 -->cpu_clock(this_cpu)
 -->sched_clock_cpu(this_cpu)
 -->__update_sched_clock(sched_clock_data, now)

The __update_sched_clock() function uses the GTOD tick value to
create a window to normalize the "now" values.  So if "now"
value is too big for sched_clock_data, it will be ignored.

The fix is to invoke sched_clock_tick() to update "cpu sched
clock" in order to recover from this state.  This is done by
introducing the function touch_softlockup_watchdog_sync(). This
allows kgdb to request that the sched clock is updated when the
watchdog thread runs the first time after a resume from kgdb.

[yong.zhang0@gmail.com: Use per cpu instead of an array]
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Dongdong Deng <Dongdong.Deng@windriver.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: peterz@infradead.org
LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 ++++
 kernel/kgdb.c         |  6 +++---
 kernel/softlockup.c   | 15 +++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..89232151a9d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p);
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
 				    void __user *buffer,
@@ -323,6 +324,9 @@ static inline void softlockup_tick(void)
 static inline void touch_softlockup_watchdog(void)
 {
 }
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..87f2cc557553 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -596,7 +596,7 @@ static void kgdb_wait(struct pt_regs *regs)
 
 	/* Signal the primary CPU that we are done: */
 	atomic_set(&cpu_in_kgdb[cpu], 0);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 }
@@ -1450,7 +1450,7 @@ acquirelock:
 	    (kgdb_info[cpu].task &&
 	     kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
 		atomic_set(&kgdb_active, -1);
-		touch_softlockup_watchdog();
+		touch_softlockup_watchdog_sync();
 		clocksource_touch_watchdog();
 		local_irq_restore(flags);
 
@@ -1550,7 +1550,7 @@ kgdb_restore:
 	}
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(bool, softlock_touch_sync);
 
 static int __read_mostly did_panic;
 int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
+void touch_softlockup_watchdog_sync(void)
+{
+	__raw_get_cpu_var(softlock_touch_sync) = true;
+	__raw_get_cpu_var(softlockup_touch_ts) = 0;
+}
+
 void touch_all_softlockup_watchdogs(void)
 {
 	int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
 	}
 
 	if (touch_ts == 0) {
+		if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
+			/*
+			 * If the time stamp was touched atomically
+			 * make sure the scheduler tick is up to date.
+			 */
+			per_cpu(softlock_touch_sync, this_cpu) = false;
+			sched_clock_tick();
+		}
 		__touch_softlockup_watchdog();
 		return;
 	}

From dbead405801c8d5aa1bc21ab6e2a47f060d47c06 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 1 Feb 2010 18:50:40 +0100
Subject: [PATCH 173/640] ARM: 5909/1: ARM: Correct the FPSCR bits setting when
 raising exceptions

Commit c98929c07a removed the clearing of the FPSCR[31:28] bits from the
vfp_raise_exceptions() function and the new bits are or'ed with the old
FPSCR bits leading to unexpected results (the original commit was
referring to the cumulative bits - FPSCR[4:0]).

Reported-by: Tom Hameenanttila <tmhameen@marvell.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/vfp/vfpmodule.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index f60a5400a25b..a63c4be99b36 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -197,10 +197,13 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_
 	}
 
 	/*
-	 * Update the FPSCR with the additional exception flags.
+	 * If any of the status flags are set, update the FPSCR.
 	 * Comparison instructions always return at least one of
 	 * these flags set.
 	 */
+	if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
+		fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
+
 	fpscr |= exceptions;
 
 	fmxr(FPSCR, fpscr);

From 110f82d7a2e0ff5a17617a9672f1ccb7e44bc0c6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Mon, 18 Jan 2010 22:36:49 +0100
Subject: [PATCH 174/640] firewire: net: fix panic in fwnet_write_complete
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the transmit path of firewire-net (IPv4 over 1394), the following
race condition may occur:
  - The networking soft IRQ inserts a datagram into the 1394 async
    request transmit DMA.
  - The 1394 async transmit completion tasklet runs to finish cleaning
    up (unlink datagram from list of pending ones, release skb and
    outbound 1394 transaction object) --- before the networking soft IRQ
    had a chance to proceed and add the datagram to the list of pending
    datagrams.

This caused a panic in the 1394 async transmit completion tasklet when
it dereferenced unitialized list heads:
http://bugzilla.kernel.org/show_bug.cgi?id=15077

The fix is to add checks in the tx soft IRQ and in the tasklet to
determine which of these two is the last referrer to the transaction
object.  Then handle the cleanup of the object by the last referrer
rather than assuming that the tasklet is always the last one.

There is another similar race:  Between said tasklet and fwnet_close,
i.e. at ifdown.  However, that race is much less likely to occur in
practice and shall be fixed in a separate update.

Reported-by: Илья Басин <basinilya@gmail.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/net.c | 53 +++++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index cbaf420c36c5..2d3dc7ded0a9 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -893,20 +893,31 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context,
 
 static struct kmem_cache *fwnet_packet_task_cache;
 
+static void fwnet_free_ptask(struct fwnet_packet_task *ptask)
+{
+	dev_kfree_skb_any(ptask->skb);
+	kmem_cache_free(fwnet_packet_task_cache, ptask);
+}
+
 static int fwnet_send_packet(struct fwnet_packet_task *ptask);
 
 static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 {
-	struct fwnet_device *dev;
+	struct fwnet_device *dev = ptask->dev;
 	unsigned long flags;
-
-	dev = ptask->dev;
+	bool free;
 
 	spin_lock_irqsave(&dev->lock, flags);
-	list_del(&ptask->pt_link);
-	spin_unlock_irqrestore(&dev->lock, flags);
 
-	ptask->outstanding_pkts--; /* FIXME access inside lock */
+	ptask->outstanding_pkts--;
+
+	/* Check whether we or the networking TX soft-IRQ is last user. */
+	free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link));
+
+	if (ptask->outstanding_pkts == 0)
+		list_del(&ptask->pt_link);
+
+	spin_unlock_irqrestore(&dev->lock, flags);
 
 	if (ptask->outstanding_pkts > 0) {
 		u16 dg_size;
@@ -951,10 +962,10 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 			ptask->max_payload = skb->len + RFC2374_FRAG_HDR_SIZE;
 		}
 		fwnet_send_packet(ptask);
-	} else {
-		dev_kfree_skb_any(ptask->skb);
-		kmem_cache_free(fwnet_packet_task_cache, ptask);
 	}
+
+	if (free)
+		fwnet_free_ptask(ptask);
 }
 
 static void fwnet_write_complete(struct fw_card *card, int rcode,
@@ -977,6 +988,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 	unsigned tx_len;
 	struct rfc2734_header *bufhdr;
 	unsigned long flags;
+	bool free;
 
 	dev = ptask->dev;
 	tx_len = ptask->max_payload;
@@ -1022,12 +1034,16 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 				generation, SCODE_100, 0ULL, ptask->skb->data,
 				tx_len + 8, fwnet_write_complete, ptask);
 
-		/* FIXME race? */
 		spin_lock_irqsave(&dev->lock, flags);
-		list_add_tail(&ptask->pt_link, &dev->broadcasted_list);
+
+		/* If the AT tasklet already ran, we may be last user. */
+		free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link));
+		if (!free)
+			list_add_tail(&ptask->pt_link, &dev->broadcasted_list);
+
 		spin_unlock_irqrestore(&dev->lock, flags);
 
-		return 0;
+		goto out;
 	}
 
 	fw_send_request(dev->card, &ptask->transaction,
@@ -1035,12 +1051,19 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 			ptask->generation, ptask->speed, ptask->fifo_addr,
 			ptask->skb->data, tx_len, fwnet_write_complete, ptask);
 
-	/* FIXME race? */
 	spin_lock_irqsave(&dev->lock, flags);
-	list_add_tail(&ptask->pt_link, &dev->sent_list);
+
+	/* If the AT tasklet already ran, we may be last user. */
+	free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link));
+	if (!free)
+		list_add_tail(&ptask->pt_link, &dev->sent_list);
+
 	spin_unlock_irqrestore(&dev->lock, flags);
 
 	dev->netdev->trans_start = jiffies;
+ out:
+	if (free)
+		fwnet_free_ptask(ptask);
 
 	return 0;
 }
@@ -1298,6 +1321,8 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	spin_unlock_irqrestore(&dev->lock, flags);
 
 	ptask->max_payload = max_payload;
+	INIT_LIST_HEAD(&ptask->pt_link);
+
 	fwnet_send_packet(ptask);
 
 	return NETDEV_TX_OK;

From ba9e9f3c08a5b58c1ffacf0cc6fb703ab0fa55ff Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 1 Feb 2010 21:23:46 -0200
Subject: [PATCH 175/640] saa7146: stop DMA before de-allocating DMA
 scatter/gather page buffers

Thanks-to: Hartmut <e9hack@googlemail.com> for pointing me the problem
and testing the fix.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/saa7146_video.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index becbaadb3b77..5ed75263340a 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -1333,9 +1333,9 @@ static void buffer_release(struct videobuf_queue *q, struct videobuf_buffer *vb)
 
 	DEB_CAP(("vbuf:%p\n",vb));
 
-	release_all_pagetables(dev, buf);
-
 	saa7146_dma_free(dev,q,buf);
+
+	release_all_pagetables(dev, buf);
 }
 
 static struct videobuf_queue_ops video_qops = {

From 4f48f8b7fd18c44f8478174f9925cc3c059c6ce4 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 2 Feb 2010 15:32:09 +0800
Subject: [PATCH 176/640] tracing: Fix circular dead lock in stack trace

When we cat <debugfs>/tracing/stack_trace, we may cause circular lock:
sys_read()
  t_start()
     arch_spin_lock(&max_stack_lock);

  t_show()
     seq_printf(), vsnprintf() .... /* they are all trace-able,
       when they are traced, max_stack_lock may be required again. */

The following script can trigger this circular dead lock very easy:
#!/bin/bash

echo 1 > /proc/sys/kernel/stack_tracer_enabled

mount -t debugfs xxx /mnt > /dev/null 2>&1

(
# make check_stack() zealous to require max_stack_lock
for ((; ;))
{
	echo 1 > /mnt/tracing/stack_max_size
}
) &

for ((; ;))
{
	cat /mnt/tracing/stack_trace > /dev/null
}

To fix this bug, we increase the percpu trace_active before
require the lock.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B67D4F9.9080905@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_stack.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 	unsigned long val, flags;
 	char buf[64];
 	int ret;
+	int cpu;
 
 	if (count >= sizeof(buf))
 		return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 		return ret;
 
 	local_irq_save(flags);
+
+	/*
+	 * In case we trace inside arch_spin_lock() or after (NMI),
+	 * we will cause circular lock, so we also need to increase
+	 * the percpu trace_active here.
+	 */
+	cpu = smp_processor_id();
+	per_cpu(trace_active, cpu)++;
+
 	arch_spin_lock(&max_stack_lock);
 	*ptr = val;
 	arch_spin_unlock(&max_stack_lock);
+
+	per_cpu(trace_active, cpu)--;
 	local_irq_restore(flags);
 
 	return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+	int cpu;
+
 	local_irq_disable();
+
+	cpu = smp_processor_id();
+	per_cpu(trace_active, cpu)++;
+
 	arch_spin_lock(&max_stack_lock);
 
 	if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 
 static void t_stop(struct seq_file *m, void *p)
 {
+	int cpu;
+
 	arch_spin_unlock(&max_stack_lock);
+
+	cpu = smp_processor_id();
+	per_cpu(trace_active, cpu)--;
+
 	local_irq_enable();
 }
 

From adef477268ff5ddd0195611dc7e26d7a879fefe1 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Tue, 26 Jan 2010 10:26:06 +0100
Subject: [PATCH 177/640] dmaengine: fix memleak in dma_async_device_unregister

While debugging a dma driver I noticed a memleak after
unloading the driver module.

Caught by kmemleak.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Cc: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6f51a0a7a8bb..e7a3230fb7d5 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -826,6 +826,7 @@ void dma_async_device_unregister(struct dma_device *device)
 		chan->dev->chan = NULL;
 		mutex_unlock(&dma_list_mutex);
 		device_unregister(&chan->dev->device);
+		free_percpu(chan->local);
 	}
 }
 EXPORT_SYMBOL(dma_async_device_unregister);

From ab09809f2eee1dc2d8f8bea636e77d176ba6c648 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <ext-andriy.shevchenko@nokia.com>
Date: Tue, 2 Feb 2010 14:38:12 -0800
Subject: [PATCH 178/640] x86, doc: Fix minor spelling error in
 arch/x86/mm/gup.c

Fix minor spelling error in comment.  No code change.

Signed-off-by: Andy Shevchenko <ext-andriy.shevchenko@nokia.com>
LKML-Reference: <201002022238.o12McDiF018720@imap1.linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/gup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 71da1bca13cb..738e6593799d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
 #else
 	/*
 	 * With get_user_pages_fast, we walk down the pagetables without taking
-	 * any locks.  For this we would like to load the pointers atoimcally,
+	 * any locks.  For this we would like to load the pointers atomically,
 	 * but that is not possible (without expensive cmpxchg8b) on PAE.  What
 	 * we do have is the guarantee that a pte will only either go from not
 	 * present to present, or present to not present or both -- it will not

From d622b89a2f58613a9c1407b22b02aecdd2187a7c Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sat, 30 Jan 2010 23:32:19 +0800
Subject: [PATCH 179/640] ocfs2: Fix memory overflow in cow_by_page.

In ocfs2_duplicate_clusters_by_page, we calculate map_end
by shifting page_index. But actually in case we meet with
a large offset(say in a i686 box, poff_t is only 32 bits
and page_index=2056240), we will overflow. So change the
type of page_index to loff_t.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/refcounttree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 74db2be75dd6..5b64468de0b0 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2945,7 +2945,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 
 	while (offset < end) {
 		page_index = offset >> PAGE_CACHE_SHIFT;
-		map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+		map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
 		if (map_end > end)
 			map_end = end;
 
@@ -3170,7 +3170,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
 
 	while (offset < end) {
 		page_index = offset >> PAGE_CACHE_SHIFT;
-		map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+		map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
 		if (map_end > end)
 			map_end = end;
 

From 0a1ea437d87af830786605813972e8e277992917 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 1 Feb 2010 17:05:33 +0800
Subject: [PATCH 180/640] ocfs2: Only bug out when page size is larger than
 cluster size.

In CoW, we have to make sure that the page is already written
out to the disk. So we have a BUG_ON(PageDirty(page)).

In ppc platform we have pagesize=64K, so if the cs=4K, if the
file have fragmented clusters, we will map the page many times.
See this file as an example.
Tree Depth: 0   Count: 19   Next Free Rec: 14
	## Offset        Clusters       Block#          Flags
	0  0             4              2164864         0x2 Refcounted
	1  4             2              9302792         0x2 Refcounted
...

We have to replace the extent recs one by one, so the page with index 0
will be mapped and dirtied twice.

I'd like to leave the BUG_ON there while adding a check so that in
case we meet with an error in other platforms, we can find it easily.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/refcounttree.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5b64468de0b0..8ae65c9c020c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2957,8 +2957,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 
 		page = grab_cache_page(mapping, page_index);
 
-		/* This page can't be dirtied before we CoW it out. */
-		BUG_ON(PageDirty(page));
+		/*
+		 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
+		 * can't be dirtied before we CoW it out.
+		 */
+		if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
+			BUG_ON(PageDirty(page));
 
 		if (!PageUptodate(page)) {
 			ret = block_read_full_page(page, ocfs2_get_block);

From 60c486744c9a30ea60fa863e9587242dde2fe4bd Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 3 Feb 2010 09:56:04 +0800
Subject: [PATCH 181/640] ocfs2: Add parenthesis to wrap the check for
 O_DIRECT.

Add parenthesis to wrap the check for O_DIRECT.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 65e9375d2fb3..558ce0312421 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2013,8 +2013,8 @@ out_dio:
 	/* buffered aio wouldn't have proper lock coverage today */
 	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
 
-	if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) ||
-	    (file->f_flags & O_DIRECT && has_refcount)) {
+	if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
+	    ((file->f_flags & O_DIRECT) && has_refcount)) {
 		ret = filemap_fdatawrite_range(file->f_mapping, pos,
 					       pos + count - 1);
 		if (ret < 0)

From cd34edd8cf80b507bb84b3f0c2988fe05099ffb5 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 25 Jan 2010 17:58:30 -0800
Subject: [PATCH 182/640] ocfs2/dlm: Handle EAGAIN for compatibility - v2

Mainline commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8 made the
dlm_begin_reco_handler() return -EAGAIN instead of EAGAIN.

As this error is transmitted over the wire, we want the receiver,
dlm_send_begin_reco_message(), to understand both the older EAGAIN and
the newer -EAGAIN, to allow rolling upgrade of the cluster nodes.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmrecovery.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index cfb2ae9ab538..ad712211d4ea 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2639,7 +2639,13 @@ retry:
 			     "begin reco msg (%d)\n", dlm->name, nodenum, ret);
 			ret = 0;
 		}
-		if (ret == -EAGAIN) {
+
+		/*
+		 * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8,
+		 * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN.
+		 * We are handling both for compatibility reasons.
+		 */
+		if (ret == -EAGAIN || ret == EAGAIN) {
 			mlog(0, "%s: trying to start recovery of node "
 			     "%u, but node %u is waiting for last recovery "
 			     "to complete, backoff for a bit\n", dlm->name,

From 34e6c59af06cbca07b1490ec0015ea2d303470d3 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 27 Jan 2010 10:21:52 +0800
Subject: [PATCH 183/640] ocfs2: Use compat_ptr in reflink_arguments.

Although we use u64 to pass userspace pointers to the kernel
to avoid compat_ioctl, it doesn't work in some ppc platform.
So wrap them with compat_ptr and add compat_ioctl.

The detailed discussion about compat_ptr can be found in thread
http://lkml.org/lkml/2009/10/27/423.

We indeed met with a bug when testing on ppc(-EFAULT is returned
when using old_path). This patch try to fix this.
I have tested in ppc64(with 32 bit reflink) and x86_64(with i686
reflink), both works.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/ioctl.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 31fbb0619510..7d9d9c132cef 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
+#include <linux/compat.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -181,6 +182,10 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_COMPAT
 long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
+	bool preserve;
+	struct reflink_arguments args;
+	struct inode *inode = file->f_path.dentry->d_inode;
+
 	switch (cmd) {
 	case OCFS2_IOC32_GETFLAGS:
 		cmd = OCFS2_IOC_GETFLAGS;
@@ -195,8 +200,15 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case OCFS2_IOC_GROUP_EXTEND:
 	case OCFS2_IOC_GROUP_ADD:
 	case OCFS2_IOC_GROUP_ADD64:
-	case OCFS2_IOC_REFLINK:
 		break;
+	case OCFS2_IOC_REFLINK:
+		if (copy_from_user(&args, (struct reflink_arguments *)arg,
+				   sizeof(args)))
+			return -EFAULT;
+		preserve = (args.preserve != 0);
+
+		return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
+					   compat_ptr(args.new_path), preserve);
 	default:
 		return -ENOIOCTLCMD;
 	}

From 7e55a70c5b9a57c12f49c44b0847c9343d4f54e4 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 13 Jan 2010 13:33:12 -0700
Subject: [PATCH 184/640] ioat: fix infinite timeout checking in ioat2_quiesce

Fix typo in ioat2_quiesce. check 'tmo' is zero, not 'end'.  Also applies
to 2.6.32.3

Cc: <stable@kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ioat/dma_v2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 5f7a500e18d0..5cc37afe2bc1 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -249,7 +249,7 @@ int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo)
 	if (is_ioat_active(status) || is_ioat_idle(status))
 		ioat_suspend(chan);
 	while (is_ioat_active(status) || is_ioat_idle(status)) {
-		if (end && time_after(jiffies, end)) {
+		if (tmo && time_after(jiffies, end)) {
 			err = -ETIMEDOUT;
 			break;
 		}

From 0b94a909eb2e2f6990d05fd486a0cb4902ef1ae7 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Thu, 21 Jan 2010 10:50:02 -0800
Subject: [PATCH 185/640] ocfs2: Fix setting of OCFS2_LOCK_BLOCKED during bast

During bast, set the OCFS2_LOCK_BLOCKED flag only if the lock needs to
downconverted.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Acked-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 172f4c6ce1be..0cdf63042b76 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -907,8 +907,6 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
 
 	assert_spin_locked(&lockres->l_lock);
 
-	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
-
 	if (level > lockres->l_blocking) {
 		/* only schedule a downconvert if we haven't already scheduled
 		 * one that goes low enough to satisfy the level we're
@@ -921,6 +919,9 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
 		lockres->l_blocking = level;
 	}
 
+	if (needs_downconvert)
+		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
+
 	mlog_exit(needs_downconvert);
 	return needs_downconvert;
 }

From a19128260107f951d1b4c421cf98b92f8092b069 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 21 Jan 2010 10:50:03 -0800
Subject: [PATCH 186/640] ocfs2: Prevent a livelock in dlmglue

There is possibility of a livelock in __ocfs2_cluster_lock(). If a node were
to get an ast for an upconvert request, followed immediately by a bast,
there is a small window where the fs may downconvert the lock before the
process requesting the upconvert is able to take the lock.

This patch adds a new flag to indicate that the upconvert is still in
progress and that the dc thread should not downconvert it right now.

Wengang Wang <wen.gang.wang@oracle.com> and Joel Becker
<joel.becker@oracle.com> contributed heavily to this patch.

Reported-by: David Teigland <teigland@redhat.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 49 +++++++++++++++++++++++++++++++++++++++++++---
 fs/ocfs2/ocfs2.h   |  4 ++++
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 0cdf63042b76..85d7c490755b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
 
 	lockres->l_level = lockres->l_requested;
+
+	/*
+	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
+	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
+	 * downconverting the lock before the upconvert has fully completed.
+	 */
+	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 
 	mlog_exit_void();
@@ -1134,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 	mlog_entry_void();
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
+	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
 	if (convert)
 		lockres->l_action = OCFS2_AST_INVALID;
 	else
@@ -1324,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
 again:
 	wait = 0;
 
+	spin_lock_irqsave(&lockres->l_lock, flags);
+
 	if (catch_signals && signal_pending(current)) {
 		ret = -ERESTARTSYS;
-		goto out;
+		goto unlock;
 	}
 
-	spin_lock_irqsave(&lockres->l_lock, flags);
-
 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
 			"Cluster lock called on freeing lockres %s! flags "
 			"0x%lx\n", lockres->l_name, lockres->l_flags);
@@ -1347,6 +1356,25 @@ again:
 		goto unlock;
 	}
 
+	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
+		/*
+		 * We've upconverted. If the lock now has a level we can
+		 * work with, we take it. If, however, the lock is not at the
+		 * required level, we go thru the full cycle. One way this could
+		 * happen is if a process requesting an upconvert to PR is
+		 * closely followed by another requesting upconvert to an EX.
+		 * If the process requesting EX lands here, we want it to
+		 * continue attempting to upconvert and let the process
+		 * requesting PR take the lock.
+		 * If multiple processes request upconvert to PR, the first one
+		 * here will take the lock. The others will have to go thru the
+		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
+		 * downconvert request.
+		 */
+		if (level <= lockres->l_level)
+			goto update_holders;
+	}
+
 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
 		/* is the lock is currently blocked on behalf of
@@ -1417,11 +1445,14 @@ again:
 		goto again;
 	}
 
+update_holders:
 	/* Ok, if we get here then we're good to go. */
 	ocfs2_inc_holders(lockres, level);
 
 	ret = 0;
 unlock:
+	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 out:
 	/*
@@ -3402,6 +3433,18 @@ recheck:
 		goto leave;
 	}
 
+	/*
+	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
+	 * set when the ast is received for an upconvert just before the
+	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
+	 * on the heels of the ast, we want to delay the downconvert just
+	 * enough to allow the up requestor to do its task. Because this
+	 * lock is in the blocked queue, the lock will be downconverted
+	 * as soon as the requestor is done with the lock.
+	 */
+	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
+		goto leave_requeue;
+
 	/* if we're blocking an exclusive and we have *any* holders,
 	 * then requeue. */
 	if ((lockres->l_blocking == DLM_LOCK_EX)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9362eea7424b..740f448041e2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -136,6 +136,10 @@ enum ocfs2_unlock_action {
 #define OCFS2_LOCK_PENDING       (0x00000400) /* This lockres is pending a
 						 call to dlm_lock.  Only
 						 exists with BUSY set. */
+#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
+						     * from downconverting
+						     * before the upconvert
+						     * has completed */
 
 struct ocfs2_lock_res_ops;
 

From 0d74125a6a68d4f1969ecaf0b3543f315916ccdc Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 29 Jan 2010 09:44:11 -0800
Subject: [PATCH 187/640] ocfs2: Do not downconvert if the lock level is
 already compatible

During upconvert, if the master were to send a BAST, dlmglue will detect the
upconversion in process and send a cancel convert to the master. Upon receiving
the AST for the cancel convert, it will re-process the lock resource to determine
whether it needs downconverting. Say, the up was from PR to EX and the BAST was
for EX. After the cancel convert, it will need to downconvert to NL.

However, if the node was originally upconverting from NL to EX, then there would
be no reason to downconvert (assuming the same message sequence).

This patch makes dlmglue consider the possibility that the current lock level
is already compatible and that downconverting is not required.

Joel Becker <joel.becker@oracle.com> assisted in fixing this issue.

Fixes ossbz#1178
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1178

Reported-by: Coly Li <coly.li@suse.de>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 85d7c490755b..ac24f49ae2fb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3445,6 +3445,19 @@ recheck:
 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
 		goto leave_requeue;
 
+	/*
+	 * How can we block and yet be at NL?  We were trying to upconvert
+	 * from NL and got canceled.  The code comes back here, and now
+	 * we notice and clear BLOCKING.
+	 */
+	if (lockres->l_level == DLM_LOCK_NL) {
+		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
+		lockres->l_blocking = DLM_LOCK_NL;
+		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
+		spin_unlock_irqrestore(&lockres->l_lock, flags);
+		goto leave;
+	}
+
 	/* if we're blocking an exclusive and we have *any* holders,
 	 * then requeue. */
 	if ((lockres->l_blocking == DLM_LOCK_EX)

From db0f6ce69776370232431eb8be85a5b18b0019c0 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 1 Feb 2010 16:55:50 -0800
Subject: [PATCH 188/640] ocfs2: Remove overzealous BUG_ON during blocked lock
 processing

During blocked lock processing, we should consider the possibility that the
lock is no longer blocking.

Joel Becker <joel.becker@oracle.com> assisted in fixing this issue.

Reported-by: David Teigland <teigland@redhat.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ac24f49ae2fb..ce8e061c9a22 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3392,9 +3392,17 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
 
-	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
-
 recheck:
+	/*
+	 * Is it still blocking? If not, we have no more work to do.
+	 */
+	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
+		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
+		spin_unlock_irqrestore(&lockres->l_lock, flags);
+		ret = 0;
+		goto leave;
+	}
+
 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
 		/* XXX
 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag

From b8f46c5a34fa64fd456295388d18f50ae69d9f37 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 3 Feb 2010 11:53:14 +0800
Subject: [PATCH 189/640] perf tools: Use O_LARGEFILE to open perf data file

Open perf data file with O_LARGEFILE flag since its size is
easily larger that 2G.

For example:

 # rm -rf perf.data
 # ./perf kmem record sleep 300

 [ perf record: Woken up 0 times to write data ]
 [ perf record: Captured and wrote 3142.147 MB perf.data
 (~137282513 samples) ]

 # ll -h perf.data
 -rw------- 1 root root 3.1G .....

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B68F32A.9040203@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c        |  5 ++++-
 tools/perf/util/header.c           | 22 +++++++++++++---------
 tools/perf/util/session.c          |  5 ++++-
 tools/perf/util/trace-event-read.c |  4 ++--
 4 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index eea56910b91c..949167efa1ed 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -5,6 +5,9 @@
  * (or a CPU, or a PID) into the perf.data output file - for
  * later analysis via perf report.
  */
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+
 #include "builtin.h"
 
 #include "perf.h"
@@ -451,7 +454,7 @@ static int __cmd_record(int argc, const char **argv)
 		append_file = 0;
 	}
 
-	flags = O_CREAT|O_RDWR;
+	flags = O_CREAT|O_RDWR|O_LARGEFILE;
 	if (append_file)
 		file_new = 0;
 	else
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 2bb2bdb1f456..ed3efd728b41 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1,3 +1,6 @@
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+
 #include <sys/types.h>
 #include <byteswap.h>
 #include <unistd.h>
@@ -382,7 +385,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 	sec_size = sizeof(*feat_sec) * nr_sections;
 
 	sec_start = self->data_offset + self->data_size;
-	lseek(fd, sec_start + sec_size, SEEK_SET);
+	lseek64(fd, sec_start + sec_size, SEEK_SET);
 
 	if (perf_header__has_feat(self, HEADER_TRACE_INFO)) {
 		struct perf_file_section *trace_sec;
@@ -390,9 +393,9 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 		trace_sec = &feat_sec[idx++];
 
 		/* Write trace info */
-		trace_sec->offset = lseek(fd, 0, SEEK_CUR);
+		trace_sec->offset = lseek64(fd, 0, SEEK_CUR);
 		read_tracing_data(fd, attrs, nr_counters);
-		trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
+		trace_sec->size = lseek64(fd, 0, SEEK_CUR) - trace_sec->offset;
 	}
 
 
@@ -402,17 +405,18 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 		buildid_sec = &feat_sec[idx++];
 
 		/* Write build-ids */
-		buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
+		buildid_sec->offset = lseek64(fd, 0, SEEK_CUR);
 		err = dsos__write_buildid_table(fd);
 		if (err < 0) {
 			pr_debug("failed to write buildid table\n");
 			goto out_free;
 		}
-		buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
+		buildid_sec->size = lseek64(fd, 0, SEEK_CUR) -
+					    buildid_sec->offset;
 		dsos__cache_build_ids();
 	}
 
-	lseek(fd, sec_start, SEEK_SET);
+	lseek64(fd, sec_start, SEEK_SET);
 	err = do_write(fd, feat_sec, sec_size);
 	if (err < 0)
 		pr_debug("failed to write feature section\n");
@@ -506,7 +510,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
 		pr_debug("failed to write perf header\n");
 		return err;
 	}
-	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+	lseek64(fd, self->data_offset + self->data_size, SEEK_SET);
 
 	self->frozen = 1;
 	return 0;
@@ -560,7 +564,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 
 	sec_size = sizeof(*feat_sec) * nr_sections;
 
-	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+	lseek64(fd, self->data_offset + self->data_size, SEEK_SET);
 
 	if (perf_header__getbuffer64(self, fd, feat_sec, sec_size))
 		goto out_free;
@@ -634,7 +638,7 @@ static int perf_file_section__process(struct perf_file_section *self,
 				      struct perf_header *ph,
 				      int feat, int fd)
 {
-	if (lseek(fd, self->offset, SEEK_SET) < 0) {
+	if (lseek64(fd, self->offset, SEEK_SET) < 0) {
 		pr_debug("Failed to lseek to %Ld offset for feature %d, "
 			 "continuing...\n", self->offset, feat);
 		return 0;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8e7c1896eaa2..cf91d099f0aa 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,3 +1,6 @@
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+
 #include <linux/kernel.h>
 
 #include <byteswap.h>
@@ -12,7 +15,7 @@ static int perf_session__open(struct perf_session *self, bool force)
 {
 	struct stat input_stat;
 
-	self->fd = open(self->filename, O_RDONLY);
+	self->fd = open(self->filename, O_RDONLY|O_LARGEFILE);
 	if (self->fd < 0) {
 		pr_err("failed to open file: %s", self->filename);
 		if (!strcmp(self->filename, "perf.data"))
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 1744422cafcb..ca3c26d466f3 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -83,7 +83,7 @@ static char *read_string(void)
 	char *str = NULL;
 	int size = 0;
 	int i;
-	int r;
+	s64 r;
 
 	for (;;) {
 		r = read(input_fd, buf, BUFSIZ);
@@ -117,7 +117,7 @@ static char *read_string(void)
 	i++;
 
 	/* move the file descriptor to the end of the string */
-	r = lseek(input_fd, -(r - i), SEEK_CUR);
+	r = lseek64(input_fd, -(r - i), SEEK_CUR);
 	if (r < 0)
 		die("lseek");
 

From 7823860ca2904d6325eb636b77768f3e8183c861 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Wed, 3 Feb 2010 10:18:20 +0100
Subject: [PATCH 190/640] microblaze: Defconfig update

There were several changes in Microblaze defconfig that's why
is good to update defconfigs.

Signed-off-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/configs/mmu_defconfig   | 112 ++++++++++++++++++------
 arch/microblaze/configs/nommu_defconfig | 101 +++++++++++++++++----
 2 files changed, 169 insertions(+), 44 deletions(-)

diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index bb7c374713ad..6fced1fe3bf0 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31
-# Thu Sep 24 10:28:50 2009
+# Linux kernel version: 2.6.33-rc6
+# Wed Feb  3 10:02:59 2010
 #
 CONFIG_MICROBLAZE=y
 # CONFIG_SWAP is not set
@@ -19,8 +19,12 @@ CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_GPIO=y
 CONFIG_GENERIC_CSUM=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
 # CONFIG_PCI is not set
 CONFIG_NO_DMA=y
+CONFIG_DTC=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 CONFIG_CONSTRUCTORS=y
 
@@ -44,6 +48,7 @@ CONFIG_SYSVIPC_SYSCTL=y
 #
 CONFIG_TREE_RCU=y
 # CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_TINY_RCU is not set
 # CONFIG_RCU_TRACE is not set
 CONFIG_RCU_FANOUT=32
 # CONFIG_RCU_FANOUT_EXACT is not set
@@ -64,10 +69,12 @@ CONFIG_INITRAMFS_ROOT_GID=0
 CONFIG_RD_GZIP=y
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
+# CONFIG_RD_LZO is not set
 # CONFIG_INITRAMFS_COMPRESSION_NONE is not set
 CONFIG_INITRAMFS_COMPRESSION_GZIP=y
 # CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
 # CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
+# CONFIG_INITRAMFS_COMPRESSION_LZO is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -90,21 +97,20 @@ CONFIG_EVENTFD=y
 CONFIG_AIO=y
 
 #
-# Performance Counters
+# Kernel Performance Events And Counters
 #
 CONFIG_VM_EVENT_COUNTERS=y
-# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
-# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
 
 #
 # GCOV-based kernel profiling
 #
-# CONFIG_SLOW_WORK is not set
+CONFIG_SLOW_WORK=y
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_BASE_SMALL=1
@@ -123,14 +129,41 @@ CONFIG_LBDAF=y
 # IO Schedulers
 #
 CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
 # CONFIG_DEFAULT_DEADLINE is not set
 CONFIG_DEFAULT_CFQ=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_INLINE_SPIN_TRYLOCK is not set
+# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK is not set
+# CONFIG_INLINE_SPIN_LOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
+# CONFIG_INLINE_SPIN_UNLOCK is not set
+# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
+# CONFIG_INLINE_SPIN_UNLOCK_IRQ is not set
+# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_READ_TRYLOCK is not set
+# CONFIG_INLINE_READ_LOCK is not set
+# CONFIG_INLINE_READ_LOCK_BH is not set
+# CONFIG_INLINE_READ_LOCK_IRQ is not set
+# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
+# CONFIG_INLINE_READ_UNLOCK is not set
+# CONFIG_INLINE_READ_UNLOCK_BH is not set
+# CONFIG_INLINE_READ_UNLOCK_IRQ is not set
+# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_WRITE_TRYLOCK is not set
+# CONFIG_INLINE_WRITE_LOCK is not set
+# CONFIG_INLINE_WRITE_LOCK_BH is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
+# CONFIG_INLINE_WRITE_UNLOCK is not set
+# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
+# CONFIG_INLINE_WRITE_UNLOCK_IRQ is not set
+# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
+# CONFIG_MUTEX_SPIN_ON_OWNER is not set
 # CONFIG_FREEZER is not set
 
 #
@@ -139,11 +172,6 @@ CONFIG_DEFAULT_IOSCHED="cfq"
 CONFIG_PLATFORM_GENERIC=y
 CONFIG_OPT_LIB_FUNCTION=y
 CONFIG_OPT_LIB_ASM=y
-CONFIG_ALLOW_EDIT_AUTO=y
-
-#
-# Automatic platform settings from Kconfig.auto
-#
 
 #
 # Definitions for MICROBLAZE0
@@ -203,12 +231,11 @@ CONFIG_FLATMEM_MANUAL=y
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_SPLIT_PTLOCK_CPUS=999999
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_VIRT_TO_BUS=y
-CONFIG_HAVE_MLOCK=y
-CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+# CONFIG_KSM is not set
 CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
 
 #
@@ -289,7 +316,13 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_WIRELESS is not set
+CONFIG_WIRELESS=y
+# CONFIG_CFG80211 is not set
+# CONFIG_LIB80211 is not set
+
+#
+# CFG80211 needs to be enabled for MAC80211
+#
 # CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
@@ -313,6 +346,10 @@ CONFIG_OF_DEVICE=y
 CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 # CONFIG_BLK_DEV_LOOP is not set
+
+#
+# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
+#
 # CONFIG_BLK_DEV_NBD is not set
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
@@ -349,7 +386,6 @@ CONFIG_NETDEVICES=y
 # CONFIG_PHYLIB is not set
 CONFIG_NET_ETHERNET=y
 # CONFIG_MII is not set
-# CONFIG_ETHOC is not set
 # CONFIG_DNET is not set
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
@@ -359,12 +395,12 @@ CONFIG_NET_ETHERNET=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_KS8842 is not set
+# CONFIG_KS8851_MLL is not set
 CONFIG_XILINX_EMACLITE=y
 CONFIG_NETDEV_1000=y
 CONFIG_NETDEV_10000=y
 CONFIG_WLAN=y
-# CONFIG_WLAN_PRE80211 is not set
-# CONFIG_WLAN_80211 is not set
+# CONFIG_HOSTAP is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -408,6 +444,7 @@ CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_GRLIB_GAISLER_APBUART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -433,7 +470,6 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
 # CONFIG_THERMAL is not set
-# CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
 
 #
@@ -526,8 +562,6 @@ CONFIG_PROC_FS=y
 CONFIG_PROC_SYSCTL=y
 CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-# CONFIG_TMPFS_POSIX_ACL is not set
 # CONFIG_HUGETLB_PAGE is not set
 # CONFIG_CONFIGFS_FS is not set
 CONFIG_MISC_FILESYSTEMS=y
@@ -638,11 +672,13 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 #
 # Kernel hacking
 #
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 # CONFIG_PRINTK_TIME is not set
 CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
 CONFIG_FRAME_WARN=1024
 # CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_UNUSED_SYMBOLS is not set
 # CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
@@ -662,6 +698,9 @@ CONFIG_DEBUG_SLAB=y
 # CONFIG_DEBUG_SLAB_LEAK is not set
 CONFIG_DEBUG_SPINLOCK=y
 # CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
@@ -680,10 +719,29 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
 # CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
 # CONFIG_SYSCTL_SYSCALL_CHECK is not set
 # CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
+CONFIG_FTRACE=y
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_ENABLE_DEFAULT_TRACERS is not set
+# CONFIG_BOOT_TRACER is not set
+CONFIG_BRANCH_PROFILE_NONE=y
+# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
+# CONFIG_PROFILE_ALL_BRANCHES is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_SAMPLES is not set
-# CONFIG_KMEMCHECK is not set
 CONFIG_EARLY_PRINTK=y
 # CONFIG_HEART_BEAT is not set
 CONFIG_DEBUG_BOOTMEM=y
@@ -694,7 +752,11 @@ CONFIG_DEBUG_BOOTMEM=y
 # CONFIG_KEYS is not set
 # CONFIG_SECURITY is not set
 # CONFIG_SECURITYFS is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_DEFAULT_SECURITY_SELINUX is not set
+# CONFIG_DEFAULT_SECURITY_SMACK is not set
+# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEFAULT_SECURITY=""
 CONFIG_CRYPTO=y
 
 #
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index adb839bab704..ce2da535246a 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31
-# Thu Sep 24 10:29:43 2009
+# Linux kernel version: 2.6.33-rc6
+# Wed Feb  3 10:03:21 2010
 #
 CONFIG_MICROBLAZE=y
 # CONFIG_SWAP is not set
@@ -19,8 +19,12 @@ CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_GPIO=y
 CONFIG_GENERIC_CSUM=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
 # CONFIG_PCI is not set
 CONFIG_NO_DMA=y
+CONFIG_DTC=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 CONFIG_CONSTRUCTORS=y
 
@@ -46,6 +50,7 @@ CONFIG_BSD_PROCESS_ACCT_V3=y
 #
 CONFIG_TREE_RCU=y
 # CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_TINY_RCU is not set
 # CONFIG_RCU_TRACE is not set
 CONFIG_RCU_FANOUT=32
 # CONFIG_RCU_FANOUT_EXACT is not set
@@ -81,16 +86,16 @@ CONFIG_EVENTFD=y
 CONFIG_AIO=y
 
 #
-# Performance Counters
+# Kernel Performance Events And Counters
 #
 CONFIG_VM_EVENT_COUNTERS=y
-# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
+# CONFIG_MMAP_ALLOW_UNINITIALIZED is not set
 # CONFIG_PROFILING is not set
-# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
 
 #
 # GCOV-based kernel profiling
@@ -116,14 +121,41 @@ CONFIG_LBDAF=y
 # IO Schedulers
 #
 CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
 # CONFIG_DEFAULT_DEADLINE is not set
 CONFIG_DEFAULT_CFQ=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_INLINE_SPIN_TRYLOCK is not set
+# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK is not set
+# CONFIG_INLINE_SPIN_LOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
+CONFIG_INLINE_SPIN_UNLOCK=y
+# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
+CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
+# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_READ_TRYLOCK is not set
+# CONFIG_INLINE_READ_LOCK is not set
+# CONFIG_INLINE_READ_LOCK_BH is not set
+# CONFIG_INLINE_READ_LOCK_IRQ is not set
+# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
+CONFIG_INLINE_READ_UNLOCK=y
+# CONFIG_INLINE_READ_UNLOCK_BH is not set
+CONFIG_INLINE_READ_UNLOCK_IRQ=y
+# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_WRITE_TRYLOCK is not set
+# CONFIG_INLINE_WRITE_LOCK is not set
+# CONFIG_INLINE_WRITE_LOCK_BH is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
+CONFIG_INLINE_WRITE_UNLOCK=y
+# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
+CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
+# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
+# CONFIG_MUTEX_SPIN_ON_OWNER is not set
 # CONFIG_FREEZER is not set
 
 #
@@ -132,7 +164,10 @@ CONFIG_DEFAULT_IOSCHED="cfq"
 CONFIG_PLATFORM_GENERIC=y
 # CONFIG_SELFMOD is not set
 # CONFIG_OPT_LIB_FUNCTION is not set
-# CONFIG_ALLOW_EDIT_AUTO is not set
+
+#
+# Definitions for MICROBLAZE0
+#
 CONFIG_KERNEL_BASE_ADDR=0x90000000
 CONFIG_XILINX_MICROBLAZE0_FAMILY="virtex5"
 CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
@@ -190,7 +225,6 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_VIRT_TO_BUS=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
 CONFIG_NOMMU_INITIAL_TRIM_EXCESS=1
 
 #
@@ -274,9 +308,6 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_AF_RXRPC is not set
 CONFIG_WIRELESS=y
 # CONFIG_CFG80211 is not set
-CONFIG_CFG80211_DEFAULT_PS_VALUE=0
-CONFIG_WIRELESS_OLD_REGULATORY=y
-# CONFIG_WIRELESS_EXT is not set
 # CONFIG_LIB80211 is not set
 
 #
@@ -301,9 +332,9 @@ CONFIG_STANDALONE=y
 # CONFIG_CONNECTOR is not set
 CONFIG_MTD=y
 # CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_TESTS is not set
 CONFIG_MTD_CONCAT=y
 CONFIG_MTD_PARTITIONS=y
-# CONFIG_MTD_TESTS is not set
 # CONFIG_MTD_REDBOOT_PARTS is not set
 CONFIG_MTD_CMDLINE_PARTS=y
 # CONFIG_MTD_OF_PARTS is not set
@@ -387,6 +418,10 @@ CONFIG_OF_DEVICE=y
 CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 # CONFIG_BLK_DEV_LOOP is not set
+
+#
+# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
+#
 CONFIG_BLK_DEV_NBD=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
@@ -423,7 +458,6 @@ CONFIG_NETDEVICES=y
 # CONFIG_PHYLIB is not set
 CONFIG_NET_ETHERNET=y
 # CONFIG_MII is not set
-# CONFIG_ETHOC is not set
 # CONFIG_DNET is not set
 # CONFIG_IBM_NEW_EMAC_ZMII is not set
 # CONFIG_IBM_NEW_EMAC_RGMII is not set
@@ -433,12 +467,12 @@ CONFIG_NET_ETHERNET=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_KS8842 is not set
+# CONFIG_KS8851_MLL is not set
 # CONFIG_XILINX_EMACLITE is not set
 CONFIG_NETDEV_1000=y
 CONFIG_NETDEV_10000=y
 CONFIG_WLAN=y
-# CONFIG_WLAN_PRE80211 is not set
-# CONFIG_WLAN_80211 is not set
+# CONFIG_HOSTAP is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -482,6 +516,7 @@ CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_GRLIB_GAISLER_APBUART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -508,7 +543,6 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
 # CONFIG_THERMAL is not set
-# CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
 
 #
@@ -616,7 +650,6 @@ CONFIG_INOTIFY_USER=y
 CONFIG_PROC_FS=y
 CONFIG_PROC_SYSCTL=y
 CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
 # CONFIG_HUGETLB_PAGE is not set
 # CONFIG_CONFIGFS_FS is not set
 CONFIG_MISC_FILESYSTEMS=y
@@ -672,11 +705,13 @@ CONFIG_MSDOS_PARTITION=y
 #
 # Kernel hacking
 #
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 # CONFIG_PRINTK_TIME is not set
 CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
 CONFIG_FRAME_WARN=1024
 # CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_UNUSED_SYMBOLS=y
 CONFIG_DEBUG_FS=y
 # CONFIG_HEADERS_CHECK is not set
@@ -695,12 +730,16 @@ CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_SELFTEST=y
 CONFIG_DEBUG_OBJECTS_FREE=y
 CONFIG_DEBUG_OBJECTS_TIMERS=y
+# CONFIG_DEBUG_OBJECTS_WORK is not set
 CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
 # CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_RT_MUTEXES is not set
 # CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
@@ -720,8 +759,28 @@ CONFIG_DEBUG_SG=y
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
 # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
 # CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
+CONFIG_FTRACE=y
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_ENABLE_DEFAULT_TRACERS is not set
+# CONFIG_BOOT_TRACER is not set
+CONFIG_BRANCH_PROFILE_NONE=y
+# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
+# CONFIG_PROFILE_ALL_BRANCHES is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_EARLY_PRINTK=y
@@ -734,7 +793,11 @@ CONFIG_EARLY_PRINTK=y
 # CONFIG_KEYS is not set
 # CONFIG_SECURITY is not set
 # CONFIG_SECURITYFS is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_DEFAULT_SECURITY_SELINUX is not set
+# CONFIG_DEFAULT_SECURITY_SMACK is not set
+# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEFAULT_SECURITY=""
 CONFIG_CRYPTO=y
 
 #

From e402746a945ceb9d0486a8e3d5917c9228fa4404 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 25 Jan 2010 11:20:19 +0000
Subject: [PATCH 191/640] GFS2: Wait for unlock completion on umount

This patch adds a wait on umount between the point at which we
dispose of all glocks and the point at which we unmount the
lock protocol. This ensures that we've received all the replies
to our unlock requests before we stop the locking.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Fabio M. Di Nitto <fdinitto@redhat.com>
---
 fs/gfs2/incore.h     | 2 ++
 fs/gfs2/lock_dlm.c   | 7 ++++++-
 fs/gfs2/ops_fstype.c | 2 ++
 fs/gfs2/super.c      | 3 +++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4792200978c8..bc0ad158e6b4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -544,6 +544,8 @@ struct gfs2_sbd {
 	struct gfs2_holder sd_live_gh;
 	struct gfs2_glock *sd_rename_gl;
 	struct gfs2_glock *sd_trans_gl;
+	wait_queue_head_t sd_glock_wait;
+	atomic_t sd_glock_disposal;
 
 	/* Inode Stuff */
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 46df988323bc..cdd0755d7823 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -21,6 +21,7 @@ static void gdlm_ast(void *arg)
 {
 	struct gfs2_glock *gl = arg;
 	unsigned ret = gl->gl_state;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
 
 	BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
 
@@ -30,6 +31,8 @@ static void gdlm_ast(void *arg)
 	switch (gl->gl_lksb.sb_status) {
 	case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
 		kmem_cache_free(gfs2_glock_cachep, gl);
+		if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+			wake_up(&sdp->sd_glock_wait);
 		return;
 	case -DLM_ECANCEL: /* Cancel while getting lock */
 		ret |= LM_OUT_CANCELED;
@@ -167,7 +170,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
 static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
 {
 	struct gfs2_glock *gl = ptr;
-	struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	int error;
 
 	if (gl->gl_lksb.sb_lkid == 0) {
@@ -183,6 +187,7 @@ static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
 		       (unsigned long long)gl->gl_name.ln_number, error);
 		return;
 	}
+	atomic_inc(&sdp->sd_glock_disposal);
 }
 
 static void gdlm_cancel(struct gfs2_glock *gl)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index edfee24f3636..9390fc7d8d40 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -82,6 +82,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 	gfs2_tune_init(&sdp->sd_tune);
 
+	init_waitqueue_head(&sdp->sd_glock_wait);
+	atomic_set(&sdp->sd_glock_disposal, 0);
 	spin_lock_init(&sdp->sd_statfs_spin);
 
 	spin_lock_init(&sdp->sd_rindex_spin);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c282ad41f3d1..66242b32db5b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -21,6 +21,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/time.h>
+#include <linux/wait.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -860,6 +861,8 @@ restart:
 	gfs2_jindex_free(sdp);
 	/*  Take apart glock structures and buffer lists  */
 	gfs2_gl_hash_clear(sdp);
+	/* Wait for dlm to reply to all our unlock requests */
+	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
 	/*  Unmount the locking protocol  */
 	gfs2_lm_unmount(sdp);
 

From 8f05228ee7c8f409ae3c6f9c3e13d7ccb9c18360 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 29 Jan 2010 15:21:27 +0000
Subject: [PATCH 192/640] GFS2: Extend umount wait coverage to full glock
 lifetime

Although all glocks are, by the time of the umount glock wait,
scheduled for demotion, some of them haven't made it far
enough through the process for the original set of waiting
code to wait for them.

This extends the ref count to the whole glock lifetime in order
to ensure that the waiting does catch all glocks. It does make
it a bit more invasive, but it seems the only sensible solution
at the moment.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c      |  4 ++++
 fs/gfs2/glock.h      |  2 +-
 fs/gfs2/lock_dlm.c   |  6 +++---
 fs/gfs2/ops_fstype.c | 10 +++++++++-
 fs/gfs2/super.c      |  2 --
 5 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f455a03a09e2..f42663325931 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	if (!gl)
 		return -ENOMEM;
 
+	atomic_inc(&sdp->sd_glock_disposal);
 	gl->gl_flags = 0;
 	gl->gl_name = name;
 	atomic_set(&gl->gl_ref, 1);
@@ -1538,6 +1539,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 		up_write(&gfs2_umount_flush_sem);
 		msleep(10);
 	}
+	flush_workqueue(glock_workqueue);
+	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
+	gfs2_dump_lockstate(sdp);
 }
 
 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 13f0bd228132..c0262faf4725 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -123,7 +123,7 @@ struct lm_lockops {
 	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
  	void (*lm_unmount) (struct gfs2_sbd *sdp);
 	void (*lm_withdraw) (struct gfs2_sbd *sdp);
-	void (*lm_put_lock) (struct kmem_cache *cachep, void *gl);
+	void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
 	unsigned int (*lm_lock) (struct gfs2_glock *gl,
 				 unsigned int req_state, unsigned int flags);
 	void (*lm_cancel) (struct gfs2_glock *gl);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index cdd0755d7823..0e5e0e7022e5 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -167,15 +167,16 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
 	return LM_OUT_ASYNC;
 }
 
-static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
+static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
 {
-	struct gfs2_glock *gl = ptr;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	int error;
 
 	if (gl->gl_lksb.sb_lkid == 0) {
 		kmem_cache_free(cachep, gl);
+		if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+			wake_up(&sdp->sd_glock_wait);
 		return;
 	}
 
@@ -187,7 +188,6 @@ static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
 		       (unsigned long long)gl->gl_name.ln_number, error);
 		return;
 	}
-	atomic_inc(&sdp->sd_glock_disposal);
 }
 
 static void gdlm_cancel(struct gfs2_glock *gl)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 9390fc7d8d40..8a102f731003 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -985,9 +985,17 @@ static const match_table_t nolock_tokens = {
 	{ Opt_err, NULL },
 };
 
+static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	kmem_cache_free(cachep, gl);
+	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+		wake_up(&sdp->sd_glock_wait);
+}
+
 static const struct lm_lockops nolock_ops = {
 	.lm_proto_name = "lock_nolock",
-	.lm_put_lock = kmem_cache_free,
+	.lm_put_lock = nolock_put_lock,
 	.lm_tokens = &nolock_tokens,
 };
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 66242b32db5b..b9dd3da22c0a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -861,8 +861,6 @@ restart:
 	gfs2_jindex_free(sdp);
 	/*  Take apart glock structures and buffer lists  */
 	gfs2_gl_hash_clear(sdp);
-	/* Wait for dlm to reply to all our unlock requests */
-	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
 	/*  Unmount the locking protocol  */
 	gfs2_lm_unmount(sdp);
 

From 58424a49cb99c4ad9386b47f885b352476313a02 Mon Sep 17 00:00:00 2001
From: "Steven J. Magnani" <steve@digidescorp.com>
Date: Mon, 1 Feb 2010 06:34:45 -0600
Subject: [PATCH 193/640] microblaze: fix interrupt state restore

Interrupts must be disabled while an interrupt state restore
(prep for interrupt return) is in progress.
Code to do this was lost in the port to the mainline kernel.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Signed-off-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/kernel/entry-nommu.S | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S
index 95b0855802df..391d6197fc3b 100644
--- a/arch/microblaze/kernel/entry-nommu.S
+++ b/arch/microblaze/kernel/entry-nommu.S
@@ -122,7 +122,7 @@ ENTRY(_interrupt)
 
 ret_from_intr:
 	lwi	r11, r1, PT_MODE
-	bneid	r11, 3f
+	bneid	r11, no_intr_resched
 
 	lwi	r6, r31, TS_THREAD_INFO	/* get thread info */
 	lwi	r19, r6, TI_FLAGS	/* get flags in thread info */
@@ -133,16 +133,18 @@ ret_from_intr:
 	bralid	r15, schedule
 	nop
 1:	andi	r11, r19, _TIF_SIGPENDING
-	beqid	r11, no_intr_reshed
+	beqid	r11, no_intr_resched
 	addk	r5, r1, r0
 	addk	r7, r0, r0
 	bralid	r15, do_signal
 	addk	r6, r0, r0
 
-no_intr_reshed:
+no_intr_resched:
+	/* Disable interrupts, we are now committed to the state restore */
+	disable_irq
+
 	/* save mode indicator */
 	lwi	r11, r1, PT_MODE
-3:
 	swi	r11, r0, PER_CPU(KM)
 
 	/* save r31 */

From 9f557cd8073104b39528794d44e129331ded649f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 3 Feb 2010 08:27:22 -0500
Subject: [PATCH 194/640] NFS: Fix an Oops when truncating a file

The VM/VFS does not allow mapping->a_ops->invalidatepage() to fail.
Unfortunately, nfs_wb_page_cancel() may fail if a fatal signal occurs.
Since the NFS code assumes that the page stays mapped for as long as the
writeback is active, we can end up Oopsing (among other things).

The only safe fix here is to convert nfs_wait_on_request(), so as to make
it uninterruptible (as is already the case with wait_on_page_writeback()).


Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/pagelist.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e2975939126a..a12c45b65dd4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req)
 	kref_put(&req->wb_kref, nfs_free_request);
 }
 
+static int nfs_wait_bit_uninterruptible(void *word)
+{
+	io_schedule();
+	return 0;
+}
+
 /**
  * nfs_wait_on_request - Wait for a request to complete.
  * @req: request to wait upon.
@@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-	int ret = 0;
-
-	if (!test_bit(PG_BUSY, &req->wb_flags))
-		goto out;
-	ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
-			nfs_wait_bit_killable, TASK_KILLABLE);
-out:
-	return ret;
+	return wait_on_bit(&req->wb_flags, PG_BUSY,
+			nfs_wait_bit_uninterruptible,
+			TASK_UNINTERRUPTIBLE);
 }
 
 /**

From 387c149b54b4321cbc790dadbd4f8eedb5a90468 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 3 Feb 2010 08:27:35 -0500
Subject: [PATCH 195/640] NFS: Fix a umount race

Ensure that we unregister the bdi before kill_anon_super() calls
ida_remove() on our device name.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/super.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce907efc5508..f1afee4eea77 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -243,6 +243,7 @@ static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
 static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static void nfs_put_super(struct super_block *);
 static void nfs_kill_super(struct super_block *);
 static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 
@@ -266,6 +267,7 @@ static const struct super_operations nfs_sops = {
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
 	.write_inode	= nfs_write_inode,
+	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
 	.clear_inode	= nfs_clear_inode,
 	.umount_begin	= nfs_umount_begin,
@@ -335,6 +337,7 @@ static const struct super_operations nfs4_sops = {
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
 	.write_inode	= nfs_write_inode,
+	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
 	.clear_inode	= nfs4_clear_inode,
 	.umount_begin	= nfs_umount_begin,
@@ -2257,6 +2260,17 @@ error_splat_super:
 	goto out;
 }
 
+/*
+ * Ensure that we unregister the bdi before kill_anon_super
+ * releases the device name
+ */
+static void nfs_put_super(struct super_block *s)
+{
+	struct nfs_server *server = NFS_SB(s);
+
+	bdi_unregister(&server->backing_dev_info);
+}
+
 /*
  * Destroy an NFS2/3 superblock
  */
@@ -2265,7 +2279,6 @@ static void nfs_kill_super(struct super_block *s)
 	struct nfs_server *server = NFS_SB(s);
 
 	kill_anon_super(s);
-	bdi_unregister(&server->backing_dev_info);
 	nfs_fscache_release_super_cookie(s);
 	nfs_free_server(server);
 }

From 9b4b351346b41d923d69adec865814fdaac4dba9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 3 Feb 2010 08:27:35 -0500
Subject: [PATCH 196/640] NFS: Don't clobber the attribute type in
 nfs_update_inode()

If the NFS_ATTR_FATTR_TYPE field isn't set in fattr->valid, then we should
not set the S_IFMT part of inode->i_mode.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index faa091865ad0..f141bde7756a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1261,8 +1261,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	if (fattr->valid & NFS_ATTR_FATTR_MODE) {
 		if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
+			umode_t newmode = inode->i_mode & S_IFMT;
+			newmode |= fattr->mode & S_IALLUGO;
+			inode->i_mode = newmode;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-			inode->i_mode = fattr->mode;
 		}
 	} else if (server->caps & NFS_CAP_MODE)
 		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR

From 5ecb01cfdf96c5f465192bdb2a4fd4a61a24c6cc Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Sat, 23 Jan 2010 22:36:29 +0100
Subject: [PATCH 197/640] futex_lock_pi() key refcnt fix

This fixes a futex key reference count bug in futex_lock_pi(),
where a key's reference count is incremented twice but decremented
only once, causing the backing object to not be released.

If the futex is created in a temporary file in an ext3 file system,
this bug causes the file's inode to become an "undead" orphan,
which causes an oops from a BUG_ON() in ext3_put_super() when the
file system is unmounted. glibc's test suite is known to trigger this,
see <http://bugzilla.kernel.org/show_bug.cgi?id=14256>.

The bug is a regression from 2.6.28-git3, namely Peter Zijlstra's
38d47c1b7075bd7ec3881141bb3629da58f88dab "[PATCH] futex: rely on
get_user_pages() for shared futexes". That commit made get_futex_key()
also increment the reference count of the futex key, and updated its
callers to decrement the key's reference count before returning.
Unfortunately the normal exit path in futex_lock_pi() wasn't corrected:
the reference count is incremented by get_futex_key() and queue_lock(),
but the normal exit path only decrements once, via unqueue_me_pi().
The fix is to put_futex_key() after unqueue_me_pi(), since 2.6.31
this is easily done by 'goto out_put_key' rather than 'goto out'.

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Darren Hart <dvhltc@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@kernel.org>
---
 kernel/futex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index d9b3a2228f9d..17828033a639 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1971,7 +1971,7 @@ retry_private:
 	/* Unqueue and drop the lock */
 	unqueue_me_pi(&q);
 
-	goto out;
+	goto out_put_key;
 
 out_unlock_put_key:
 	queue_unlock(&q, hb);

From 51246bfd189064079c54421507236fd2723b18f3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 2 Feb 2010 11:40:27 +0100
Subject: [PATCH 198/640] futex: Handle user space corruption gracefully

If the owner of a PI futex dies we fix up the pi_state and set
pi_state->owner to NULL. When a malicious or just sloppy programmed
user space application sets the futex value to 0 e.g. by calling
pthread_mutex_init(), then the futex can be acquired again. A new
waiter manages to enqueue itself on the pi_state w/o damage, but on
unlock the kernel dereferences pi_state->owner and oopses.

Prevent this by checking pi_state->owner in the unlock path. If
pi_state->owner is not current we know that user space manipulated the
futex value. Ignore the mess and return -EINVAL.

This catches the above case and also the case where a task hijacks the
futex by setting the tid value and then tries to unlock it.

Reported-by: Jermome Marchand <jmarchan@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Darren Hart <dvhltc@us.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org>
---
 kernel/futex.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/futex.c b/kernel/futex.c
index 17828033a639..06e8240d2abe 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -758,6 +758,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	if (!pi_state)
 		return -EINVAL;
 
+	/*
+	 * If current does not own the pi_state then the futex is
+	 * inconsistent and user space fiddled with the futex value.
+	 */
+	if (pi_state->owner != current)
+		return -EINVAL;
+
 	raw_spin_lock(&pi_state->pi_mutex.wait_lock);
 	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 

From 59647b6ac3050dd964bc556fe6ef22f4db5b935c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 3 Feb 2010 09:33:05 +0100
Subject: [PATCH 199/640] futex: Handle futex value corruption gracefully

The WARN_ON in lookup_pi_state which complains about a mismatch
between pi_state->owner->pid and the pid which we retrieved from the
user space futex is completely bogus.

The code just emits the warning and then continues despite the fact
that it detected an inconsistent state of the futex. A conveniant way
for user space to spam the syslog.

Replace the WARN_ON by a consistency check. If the values do not match
return -EINVAL and let user space deal with the mess it created.

This also fixes the missing task_pid_vnr() when we compare the
pi_state->owner pid with the futex value.

Reported-by: Jermome Marchand <jmarchan@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Darren Hart <dvhltc@us.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org>
---
 kernel/futex.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 06e8240d2abe..e7a35f1039e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
 				return -EINVAL;
 
 			WARN_ON(!atomic_read(&pi_state->refcount));
-			WARN_ON(pid && pi_state->owner &&
-				pi_state->owner->pid != pid);
+
+			/*
+			 * When pi_state->owner is NULL then the owner died
+			 * and another waiter is on the fly. pi_state->owner
+			 * is fixed up by the task which acquires
+			 * pi_state->rt_mutex.
+			 *
+			 * We do not check for pid == 0 which can happen when
+			 * the owner died and robust_list_exit() cleared the
+			 * TID.
+			 */
+			if (pid && pi_state->owner) {
+				/*
+				 * Bail out if user space manipulated the
+				 * futex value.
+				 */
+				if (pid != task_pid_vnr(pi_state->owner))
+					return -EINVAL;
+			}
 
 			atomic_inc(&pi_state->refcount);
 			*ps = pi_state;

From 4aba098c8d64329f0c4b24d12e1dc5398dd41a75 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 3 Feb 2010 15:48:03 +0000
Subject: [PATCH 200/640] ARM: Fix wrong register in proc-arm6_7.S data abort
 handler

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-arm6_7.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S
index 3f9cd3d8f6d5..795dc615f43b 100644
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -41,7 +41,7 @@ ENTRY(cpu_arm7_dcache_clean_area)
 ENTRY(cpu_arm7_data_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	ldr	r8, [r0]			@ read arm instruction
+	ldr	r8, [r2]			@ read arm instruction
 	tst	r8, #1 << 20			@ L = 0 -> write?
 	orreq	r1, r1, #1 << 11		@ yes.
 	and	r7, r8, #15 << 24

From faccbcfb63af006e100d5b3b513131fe27aa66ab Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 29 Jan 2010 14:20:05 -0800
Subject: [PATCH 201/640] omap: Remove old unused defines for
 OMAP_32KSYNCT_BASE

Remove old unused defines for OMAP_32KSYNCT_BASE

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/plat-omap/omap_device.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c
index 1e5648d3e3d8..2ed72013c2e2 100644
--- a/arch/arm/plat-omap/omap_device.c
+++ b/arch/arm/plat-omap/omap_device.c
@@ -89,16 +89,6 @@
 #define USE_WAKEUP_LAT			0
 #define IGNORE_WAKEUP_LAT		1
 
-/* XXX this should be moved into a separate file */
-#if defined(CONFIG_ARCH_OMAP2420)
-# define OMAP_32KSYNCT_BASE		0x48004000
-#elif defined(CONFIG_ARCH_OMAP2430)
-# define OMAP_32KSYNCT_BASE		0x49020000
-#elif defined(CONFIG_ARCH_OMAP3430)
-# define OMAP_32KSYNCT_BASE		0x48320000
-#else
-# error Unknown OMAP device
-#endif
 
 /* Private functions */
 

From 9af915da20bd405c232ebb93c3cb80c6d92a12f6 Mon Sep 17 00:00:00 2001
From: Sriram <srk@ti.com>
Date: Fri, 29 Jan 2010 14:20:05 -0800
Subject: [PATCH 202/640] ARCH OMAP : enable ARCH_HAS_HOLES_MEMORYMODEL for
 OMAP

OMAP platforms(like OMAP3530) include DSP or other co-processors
for media acceleration. when carving out memory for the
accelerators we can end up creating a hole in the memory map
of sort:
<kernel memory><hole(memory for accelerator)><kernel memory>

To handle such a memory configuration ARCH_HAS_HOLES_MEMORYMODEL
has to be enabled. For further information refer discussion at:
http://www.mail-archive.com/linux-omap@vger.kernel.org/msg15262.html.

Signed-off-by: Sriramakrishnan <srk@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c33ca82f9b1..184a6bd54825 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -702,6 +702,7 @@ config ARCH_OMAP
 	select ARCH_HAS_CPUFREQ
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
+	select ARCH_HAS_HOLES_MEMORYMODEL
 	help
 	  Support for TI's OMAP platform (OMAP1 and OMAP2).
 

From 74005a2b116203f618fe784d88ad7e6071bb1554 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Fri, 29 Jan 2010 14:20:06 -0800
Subject: [PATCH 203/640] OMAP2/3: IRQ: ensure valid base address

Ensure valid base address during IRQ init.  Fixes compiler warning
about potential use of uninitialized variable.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/irq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c
index 27054025da2b..26aeef560aa3 100644
--- a/arch/arm/mach-omap2/irq.c
+++ b/arch/arm/mach-omap2/irq.c
@@ -194,7 +194,7 @@ void __init omap_init_irq(void)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(irq_banks); i++) {
-		unsigned long base;
+		unsigned long base = 0;
 		struct omap_irq_bank *bank = irq_banks + i;
 
 		if (cpu_is_omap24xx())
@@ -202,6 +202,8 @@ void __init omap_init_irq(void)
 		else if (cpu_is_omap34xx())
 			base = OMAP34XX_IC_BASE;
 
+		BUG_ON(!base);
+
 		/* Static mapping, never released */
 		bank->base_reg = ioremap(base, SZ_4K);
 		if (!bank->base_reg) {

From 8d08436d782d177747a0fac1e1455a44b932b7c6 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Fri, 29 Jan 2010 14:20:06 -0800
Subject: [PATCH 204/640] OMAP2/3: GPMC: ensure valid clock pointer

Ensure valid clock pointer during GPMC init.  Fixes compiler
warning about potential use of uninitialized variable.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/gpmc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 3f1334f62e7a..7027cdc1ba49 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -505,7 +505,7 @@ static void __init gpmc_mem_init(void)
 void __init gpmc_init(void)
 {
 	u32 l;
-	char *ck;
+	char *ck = NULL;
 
 	if (cpu_is_omap24xx()) {
 		ck = "core_l3_ck";
@@ -521,6 +521,9 @@ void __init gpmc_init(void)
 		l = OMAP44XX_GPMC_BASE;
 	}
 
+	if (WARN_ON(!ck))
+		return;
+
 	gpmc_l3_clk = clk_get(NULL, ck);
 	if (IS_ERR(gpmc_l3_clk)) {
 		printk(KERN_ERR "Could not get GPMC clock %s\n", ck);

From 9ecef433204f9b06550dd45cf84f14706f8fe4f0 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 1 Feb 2010 11:22:54 -0800
Subject: [PATCH 205/640] omap: Fix 3630 mux errors

3630 has more mux signals than 34xx. The additional pins
exist in omap36xx_cbp_subset, but are not initialized
as the superset is missing these offsets. This causes
the following errors during the boot:

mux: Unknown entry offset 0x236
mux: Unknown entry offset 0x22e
mux: Unknown entry offset 0x1ec
mux: Unknown entry offset 0x1ee
mux: Unknown entry offset 0x1f4
mux: Unknown entry offset 0x1f6
mux: Unknown entry offset 0x1f8
mux: Unknown entry offset 0x1fa
mux: Unknown entry offset 0x1fc
mux: Unknown entry offset 0x22a
mux: Unknown entry offset 0x226
mux: Unknown entry offset 0x230
mux: Unknown entry offset 0x22c
mux: Unknown entry offset 0x228

Fix this by adding the missing offsets to omap3 superset.
Note that additionally the uninitialized pins need to be
skipped on 34xx.

Based on an earlier patch by Allen Pais <allen.pais@ti.com>.

Reported-by: Allen Pais <allen.pais@ti.com>
Signed-off-by: Allen Pais <allen.pais@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/mux.c     |  7 ++++++
 arch/arm/mach-omap2/mux34xx.c | 47 +++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 3f59bd12cbbf..19001dd8dd7e 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -968,6 +968,13 @@ static void __init omap_mux_init_list(struct omap_mux *superset)
 		}
 #endif
 
+#if defined(CONFIG_OMAP_MUX) && defined(CONFIG_DEBUG_FS)
+		if (!superset->muxnames || !superset->muxnames[0]) {
+			superset++;
+			continue;
+		}
+#endif
+
 		entry = omap_mux_list_add(superset);
 		if (!entry) {
 			printk(KERN_ERR "mux: Could not add entry\n");
diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c
index 68e0a595f9a1..07aa7b3c95f7 100644
--- a/arch/arm/mach-omap2/mux34xx.c
+++ b/arch/arm/mach-omap2/mux34xx.c
@@ -649,6 +649,53 @@ static struct omap_mux __initdata omap3_muxmodes[] = {
 	_OMAP3_MUXENTRY(UART3_TX_IRTX, 166,
 		"uart3_tx_irtx", NULL, NULL, NULL,
 		"gpio_166", NULL, NULL, "safe_mode"),
+
+	/* Only on 3630, see omap36xx_cbp_subset for the signals */
+	_OMAP3_MUXENTRY(GPMC_A11, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MBUSFLAG, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MREAD, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MWRITE, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_SBUSFLAG, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_SREAD, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_SWRITE, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(GPMC_A11, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MCAD28, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MCAD29, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MCAD32, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MCAD33, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MCAD34, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MCAD35, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
+	_OMAP3_MUXENTRY(SAD2D_MCAD36, 0,
+		NULL, NULL, NULL, NULL,
+		NULL, NULL, NULL, NULL),
 	{ .reg_offset = OMAP_MUX_TERMINATOR },
 };
 

From 78737ae1b0f0b425e8eb72a9c84125fa8cac8e8e Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 1 Feb 2010 13:03:42 -0800
Subject: [PATCH 206/640] omap: Fix arch/arm/mach-omap2/mux.c: Off by one error

David Binderman ran the sourceforge tool cppcheck over the source code of the
new Linux kernel 2.6.33-rc6:

[./arm/mach-omap2/mux.c:492]: (error) Buffer access out-of-bounds

13 characters + 1 digit + 1 zero byte is more than 14 characters.

Also add a comment on mode0 name length in case new omaps
start using longer names.

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/mux.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 19001dd8dd7e..5fedc50c58e4 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -486,7 +486,7 @@ int __init omap_mux_init_signal(char *muxname, int val)
 static inline void omap_mux_decode(struct seq_file *s, u16 val)
 {
 	char *flags[OMAP_MUX_MAX_NR_FLAGS];
-	char mode[14];
+	char mode[sizeof("OMAP_MUX_MODE") + 1];
 	int i = -1;
 
 	sprintf(mode, "OMAP_MUX_MODE%d", val & 0x7);
@@ -553,6 +553,7 @@ static int omap_mux_dbg_board_show(struct seq_file *s, void *unused)
 		if (!m0_name)
 			continue;
 
+		/* REVISIT: Needs to be updated if mode0 names get longer */
 		for (i = 0; i < OMAP_MUX_DEFNAME_LEN; i++) {
 			if (m0_name[i] == '\0') {
 				m0_def[i] = m0_name[i];

From 0825cc8a6ffa54c87ad7ad914a16d6c035627935 Mon Sep 17 00:00:00 2001
From: Marek Skuczynski <mareksk7@gmail.com>
Date: Sun, 31 Jan 2010 10:00:54 +0000
Subject: [PATCH 207/640] omap: Fix access to already released memory in
 clk_debugfs_register_one()

I have found an access to already released memory in
clk_debugfs_register_one() function.

Signed-off-by: Marek Skuczynski <mareksk7@gmail.com>
Acked-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/plat-omap/clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c
index d9f8c844c385..4becbdd1935c 100644
--- a/arch/arm/plat-omap/clock.c
+++ b/arch/arm/plat-omap/clock.c
@@ -391,7 +391,7 @@ static struct dentry *clk_debugfs_root;
 static int clk_debugfs_register_one(struct clk *c)
 {
 	int err;
-	struct dentry *d, *child;
+	struct dentry *d, *child, *child_tmp;
 	struct clk *pa = c->parent;
 	char s[255];
 	char *p = s;
@@ -423,7 +423,7 @@ static int clk_debugfs_register_one(struct clk *c)
 
 err_out:
 	d = c->dent;
-	list_for_each_entry(child, &d->d_subdirs, d_u.d_child)
+	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
 		debugfs_remove(child);
 	debugfs_remove(c->dent);
 	return err;

From 301fe8eeee02c570c5bd30537aff9456f7f7955c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 1 Feb 2010 12:34:31 -0800
Subject: [PATCH 208/640] omap: Disable serial port autoidle by default

Currently the omap serial clocks are autoidled after 5 seconds.
However, this causes lost characters on the serial ports. As this
is considered non-standard behaviour for Linux, disable the timeout.

Note that this will also cause blocking of any deeper omap sleep
states.

To enable the autoidling of the serial ports, do something like
this for each serial port:

# echo 5 > /sys/devices/platform/serial8250.0/sleep_timeout
# echo 5 > /sys/devices/platform/serial8250.1/sleep_timeout
...

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/serial.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 8c964bec8159..e10a02df6e1d 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -36,7 +36,13 @@
 #define UART_OMAP_NO_EMPTY_FIFO_READ_IP_REV	0x52
 #define UART_OMAP_WER		0x17	/* Wake-up enable register */
 
-#define DEFAULT_TIMEOUT (5 * HZ)
+/*
+ * NOTE: By default the serial timeout is disabled as it causes lost characters
+ * over the serial ports. This means that the UART clocks will stay on until
+ * disabled via sysfs. This also causes that any deeper omap sleep states are
+ * blocked. 
+ */
+#define DEFAULT_TIMEOUT 0
 
 struct omap_uart_state {
 	int num;
@@ -422,7 +428,8 @@ static void omap_uart_idle_init(struct omap_uart_state *uart)
 	uart->timeout = DEFAULT_TIMEOUT;
 	setup_timer(&uart->timer, omap_uart_idle_timer,
 		    (unsigned long) uart);
-	mod_timer(&uart->timer, jiffies + uart->timeout);
+	if (uart->timeout)
+		mod_timer(&uart->timer, jiffies + uart->timeout);
 	omap_uart_smart_idle_enable(uart, 0);
 
 	if (cpu_is_omap34xx()) {

From b9c3032277f756e73f6c673419dc414155e04e46 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Feb 2010 18:08:52 +0100
Subject: [PATCH 209/640] hrtimer, softirq: Fix hrtimer->softirq trampoline

hrtimers callbacks are always done from hardirq context, either the
jiffy tick interrupt or the hrtimer device interrupt.

[ there is currently one exception that can still call a hrtimer
  callback from softirq, but even in that case this will still
  work correctly. ]

Reported-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Yury Polyanskiy <ypolyans@princeton.edu>
Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: David S. Miller <davem@davemloft.net>
LKML-Reference: <1265120401.24455.306.camel@laptop>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/softirq.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index a09502e2ef75..7c1a67ef0274 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill);
  */
 
 /*
- * The trampoline is called when the hrtimer expires. If this is
- * called from the hrtimer interrupt then we schedule the tasklet as
- * the timer callback function expects to run in softirq context. If
- * it's called in softirq context anyway (i.e. high resolution timers
- * disabled) then the hrtimer callback is called right away.
+ * The trampoline is called when the hrtimer expires. It schedules a tasklet
+ * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
+ * hrtimer callback, but from softirq context.
  */
 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
 {
 	struct tasklet_hrtimer *ttimer =
 		container_of(timer, struct tasklet_hrtimer, timer);
 
-	if (hrtimer_is_hres_active(timer)) {
-		tasklet_hi_schedule(&ttimer->tasklet);
-		return HRTIMER_NORESTART;
-	}
-	return ttimer->function(timer);
+	tasklet_hi_schedule(&ttimer->tasklet);
+	return HRTIMER_NORESTART;
 }
 
 /*

From 1038a00b458997661bcd0e780a24dc280a8841fc Mon Sep 17 00:00:00 2001
From: Nick Pelly <npelly@google.com>
Date: Wed, 3 Feb 2010 11:42:26 -0800
Subject: [PATCH 210/640] Bluetooth: Fallback eSCO to SCO on error 0x1a
 (Unsupported Remote Feature)

General Motors carkits that use LGE BT chipsets return this error code
when an eSCO is attempted, despite advertising eSCO support.

2009-08-13 14:41:39.755518 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
   handle 1 voice setting 0x0060
2009-08-13 14:41:39.757563 > HCI Event: Command Status (0x0f) plen 4
   Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
2009-08-13 14:41:39.789484 > HCI Event: Synchronous Connect Complete (0x2c) plen 17
   status 0x1a handle 257 bdaddr 00:1E:B2:23:5E:B3 type eSCO
   Error: Unsupported Remote Feature / Unsupported LMP Feature

Signed-off-by: Jaikumar Ganesh <jaikumar@google.com>
Signed-off-by: Nick Pelly <npelly@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 28517bad796c..592da5c909c1 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1699,6 +1699,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
 		break;
 
 	case 0x1c:	/* SCO interval rejected */
+	case 0x1a:	/* Unsupported Remote Feature */
 	case 0x1f:	/* Unspecified error */
 		if (conn->out && conn->attempt < 2) {
 			conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |

From b6c3f5be7c6ac3375f44de4545c1ffe216b34022 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 2 Feb 2010 10:08:19 -0600
Subject: [PATCH 211/640] b43: Fix throughput regression

Commit c7ab5ef9bcd281135c21b4732c9be779585181be entitled "b43: implement
short slot and basic rate handling" reduced the transmit throughput for
my BCM4311 device from 18 Mb/s to 0.7 Mb/s. The basic rate handling
portion is OK, the problem is in the short slot handling.

Prior to this change, the short slot enable/disable routines were never
called. Experimentation showed that the critical part was changing the
value at offset 0x0010 in the shared memory. This is supposed to contain
the 802.11 Slot Time in usec, but if it is changed from its initial value
of zero, performance is destroyed. On the other hand, changing the value
in the MMIO register corresponding to the Interframe Slot Time increased
performance from 18 to 22 Mb/s. A BCM4306/3 also shows dramatic
improvement of the transmit rate from 5.3 to 19.0 Mb/s.

Other changes in the patch include removal of the magic number for the
MMIO register, and allowing the slot time to be set for any PHY operating
in the 2.4 GHz band. Previously, the routine was executed only for G PHYs.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@kernel.org> [Any stable version back through 2.6.28]
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/b43.h  |  1 +
 drivers/net/wireless/b43/main.c | 13 ++++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h
index fe3bf9491997..c484cc253892 100644
--- a/drivers/net/wireless/b43/b43.h
+++ b/drivers/net/wireless/b43/b43.h
@@ -115,6 +115,7 @@
 #define B43_MMIO_TSF_2			0x636	/* core rev < 3 only */
 #define B43_MMIO_TSF_3			0x638	/* core rev < 3 only */
 #define B43_MMIO_RNG			0x65A
+#define B43_MMIO_IFSSLOT		0x684	/* Interframe slot time */
 #define B43_MMIO_IFSCTL			0x688 /* Interframe space control */
 #define  B43_MMIO_IFSCTL_USE_EDCF	0x0004
 #define B43_MMIO_POWERUP_DELAY		0x6A8
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 19b4eae47b59..fcbf0e27d9f3 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -628,10 +628,17 @@ static void b43_upload_card_macaddress(struct b43_wldev *dev)
 static void b43_set_slot_time(struct b43_wldev *dev, u16 slot_time)
 {
 	/* slot_time is in usec. */
-	if (dev->phy.type != B43_PHYTYPE_G)
+	/* This test used to exit for all but a G PHY. */
+	if (b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ)
 		return;
-	b43_write16(dev, 0x684, 510 + slot_time);
-	b43_shm_write16(dev, B43_SHM_SHARED, 0x0010, slot_time);
+	b43_write16(dev, B43_MMIO_IFSSLOT, 510 + slot_time);
+	/* Shared memory location 0x0010 is the slot time and should be
+	 * set to slot_time; however, this register is initially 0 and changing
+	 * the value adversely affects the transmit rate for BCM4311
+	 * devices. Until this behavior is unterstood, delete this step
+	 *
+	 * b43_shm_write16(dev, B43_SHM_SHARED, 0x0010, slot_time);
+	 */
 }
 
 static void b43_short_slot_timing_enable(struct b43_wldev *dev)

From 391ae22ae5726d2a8cebfa62879635c54a349642 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Wed, 3 Feb 2010 18:24:35 +0100
Subject: [PATCH 212/640] ssb: Fix CONFIG_SSB_SDIOHOST typo

This fixes a CONFIG_SSB_SDIOHOST typo.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Reported-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Tested-By: Albert Herranz <albert_herranz@yahoo.es>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 5681ebed9c65..03dfd27c4bfb 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -494,8 +494,7 @@ static int ssb_devices_register(struct ssb_bus *bus)
 #endif
 			break;
 		case SSB_BUSTYPE_SDIO:
-#ifdef CONFIG_SSB_SDIO
-			sdev->irq = bus->host_sdio->dev.irq;
+#ifdef CONFIG_SSB_SDIOHOST
 			dev->parent = &bus->host_sdio->dev;
 #endif
 			break;

From 485f1eff73a7b932fd3abb0dfcf804e1a1f59025 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 3 Feb 2010 15:52:18 -0800
Subject: [PATCH 213/640] Bluetooth: Fix sleeping function in RFCOMM within
 invalid context

With the commit 9e726b17422bade75fba94e625cd35fd1353e682 the
rfcomm_session_put() gets accidentially called from a timeout
callback and results in this:

BUG: sleeping function called from invalid context at net/core/sock.c:1897
in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper
Pid: 0, comm: swapper Tainted: P           2.6.32 #31
Call Trace:
 <IRQ>  [<ffffffff81036455>] __might_sleep+0xf8/0xfa
 [<ffffffff8138ef1d>] lock_sock_nested+0x29/0xc4
 [<ffffffffa03921b3>] lock_sock+0xb/0xd [l2cap]
 [<ffffffffa03948e6>] l2cap_sock_shutdown+0x1c/0x76 [l2cap]
 [<ffffffff8106adea>] ? clockevents_program_event+0x75/0x7e
 [<ffffffff8106bea2>] ? tick_dev_program_event+0x37/0xa5
 [<ffffffffa0394967>] l2cap_sock_release+0x27/0x67 [l2cap]
 [<ffffffff8138c971>] sock_release+0x1a/0x67
 [<ffffffffa03d2492>] rfcomm_session_del+0x34/0x53 [rfcomm]
 [<ffffffffa03d24c5>] rfcomm_session_put+0x14/0x16 [rfcomm]
 [<ffffffffa03d28b4>] rfcomm_session_timeout+0xe/0x1a [rfcomm]
 [<ffffffff810554a8>] run_timer_softirq+0x1e2/0x29a
 [<ffffffffa03d28a6>] ? rfcomm_session_timeout+0x0/0x1a [rfcomm]
 [<ffffffff8104e0f6>] __do_softirq+0xfe/0x1c5
 [<ffffffff8100e8ce>] ? timer_interrupt+0x1a/0x21
 [<ffffffff8100cc4c>] call_softirq+0x1c/0x28
 [<ffffffff8100e05b>] do_softirq+0x33/0x6b
 [<ffffffff8104daf6>] irq_exit+0x36/0x85
 [<ffffffff8100d7a9>] do_IRQ+0xa6/0xbd
 [<ffffffff8100c493>] ret_from_intr+0x0/0xa
 <EOI>  [<ffffffff812585b3>] ? acpi_idle_enter_bm+0x269/0x294
 [<ffffffff812585a9>] ? acpi_idle_enter_bm+0x25f/0x294
 [<ffffffff81373ddc>] ? cpuidle_idle_call+0x97/0x107
 [<ffffffff8100aca0>] ? cpu_idle+0x53/0xaa
 [<ffffffff81429006>] ? rest_init+0x7a/0x7c
 [<ffffffff8177bc8c>] ? start_kernel+0x389/0x394
 [<ffffffff8177b29c>] ? x86_64_start_reservations+0xac/0xb0
 [<ffffffff8177b384>] ? x86_64_start_kernel+0xe4/0xeb

To fix this, the rfcomm_session_put() needs to be moved out of
rfcomm_session_timeout() into rfcomm_process_sessions(). In that
context it is perfectly fine to sleep and disconnect the socket.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: David John <davidjon@xenontk.org>
---
 net/bluetooth/rfcomm/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index fc5ee3296e22..2b506373957a 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -252,7 +252,6 @@ static void rfcomm_session_timeout(unsigned long arg)
 	BT_DBG("session %p state %ld", s, s->state);
 
 	set_bit(RFCOMM_TIMED_OUT, &s->flags);
-	rfcomm_session_put(s);
 	rfcomm_schedule(RFCOMM_SCHED_TIMEO);
 }
 
@@ -1920,6 +1919,7 @@ static inline void rfcomm_process_sessions(void)
 		if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
 			s->state = BT_DISCONN;
 			rfcomm_send_disc(s, 0);
+			rfcomm_session_put(s);
 			continue;
 		}
 

From 6c2718da59613d76013b501bf0f8bcf9d7794b2d Mon Sep 17 00:00:00 2001
From: Nick Pelly <npelly@google.com>
Date: Wed, 3 Feb 2010 16:18:36 -0800
Subject: [PATCH 214/640] Bluetooth: Do not call rfcomm_session_put() for
 RFCOMM UA on closed socket

When processing a RFCOMM UA frame when the socket is closed and we were
not the RFCOMM initiator would cause rfcomm_session_put() to be called
twice during rfcomm_process_rx(). This would cause a kernel panic in
rfcomm_session_close() then.

This could be easily reproduced during disconnect with devices such as
Motorola H270 that send RFCOMM UA followed quickly by L2CAP disconnect
request. This trace for this looks like:

2009-09-21 17:22:37.788895 < ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0041 len 4 [psm 3]
     RFCOMM(s): DISC: cr 0 dlci 20 pf 1 ilen 0 fcs 0x7d
2009-09-21 17:22:37.906204 > HCI Event: Number of Completed Packets (0x13) plen 5
   handle 1 packets 1
2009-09-21 17:22:37.933090 > ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0040 len 4 [psm 3]
     RFCOMM(s): UA: cr 0 dlci 20 pf 1 ilen 0 fcs 0x57
2009-09-21 17:22:38.636764 < ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0041 len 4 [psm 3]
     RFCOMM(s): DISC: cr 0 dlci 0 pf 1 ilen 0 fcs 0x9c
2009-09-21 17:22:38.744125 > HCI Event: Number of Completed Packets (0x13) plen 5
   handle 1 packets 1
2009-09-21 17:22:38.763687 > ACL data: handle 1 flags 0x02 dlen 8
   L2CAP(d): cid 0x0040 len 4 [psm 3]
     RFCOMM(s): UA: cr 0 dlci 0 pf 1 ilen 0 fcs 0xb6
2009-09-21 17:22:38.783554 > ACL data: handle 1 flags 0x02 dlen 12
   L2CAP(s): Disconn req: dcid 0x0040 scid 0x0041

Avoid calling rfcomm_session_put() twice by skipping this call
in rfcomm_recv_ua() if the socket is closed.

Signed-off-by: Nick Pelly <npelly@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 2b506373957a..89f4a59eb82b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1150,7 +1150,11 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
 			break;
 
 		case BT_DISCONN:
-			rfcomm_session_put(s);
+			/* When socket is closed and we are not RFCOMM
+			 * initiator rfcomm_process_rx already calls
+			 * rfcomm_session_put() */
+			if (s->sock->sk->sk_state != BT_CLOSED)
+				rfcomm_session_put(s);
 			break;
 		}
 	}

From 079b805782f94f4b278132286a8c9bc4655d1c51 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 3 Feb 2010 10:16:54 -0800
Subject: [PATCH 215/640] ocfs2: Plugs race between the dc thread and an unlock
 ast message

This patch plugs a race between the downconvert thread and an unlock ast message.
Specifically, after the downconvert worker has done its task, the dc thread needs
to check whether an unlock ast made the downconvert moot.

Reported-by: David Teigland <teigland@redhat.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@sus.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ce8e061c9a22..e044019cb3b1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3384,6 +3384,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
 	unsigned long flags;
 	int blocking;
 	int new_level;
+	int level;
 	int ret = 0;
 	int set_lvb = 0;
 	unsigned int gen;
@@ -3503,6 +3504,7 @@ recheck:
 	 * may sleep, so we save off a copy of what we're blocking as
 	 * it may change while we're not holding the spin lock. */
 	blocking = lockres->l_blocking;
+	level = lockres->l_level;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
@@ -3511,7 +3513,7 @@ recheck:
 		goto leave;
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
-	if (blocking != lockres->l_blocking) {
+	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
 		/* If this changed underneath us, then we can't drop
 		 * it just yet. */
 		goto recheck;

From cda70ba8c05a8661f882862c4699a31d215ab151 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 1 Feb 2010 17:34:58 -0800
Subject: [PATCH 216/640] ocfs2/dlm: Remove BUG_ON in dlm recovery when freeing
 locks of a dead node

During recovery, the dlm frees the locks for the dead node. If it finds a
lock in a resource for the dead node, it expects that node to also have a
ref in that lock resource. If not, it BUGs.

ossbz#1175 was filed with the above BUG. Now, while it is correct that we
should be expecting the ref, I see no reason why we have to BUG. After all,
we are freeing up the lock and clearing the ref.

This patch replaces the BUG_ON with a printk(). Hopefully, that will give
us more clues next time this happens.

http://oss.oracle.com/bugzilla/show_bug.cgi?id=1175

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmrecovery.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index ad712211d4ea..344bcf90cbf4 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2243,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 		mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
 		     "dropping ref from lockres\n", dlm->name,
 		     res->lockname.len, res->lockname.name, freed, dead_node);
-		BUG_ON(!test_bit(dead_node, res->refmap));
+		if(!test_bit(dead_node, res->refmap)) {
+			mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, "
+			     "but ref was not set\n", dlm->name,
+			     res->lockname.len, res->lockname.name, freed, dead_node);
+			__dlm_print_one_lock_resource(res);
+		}
 		dlm_lockres_clear_refmap_bit(dead_node, res);
 	} else if (test_bit(dead_node, res->refmap)) {
 		mlog(0, "%s:%.*s: dead node %u had a ref, but had "

From 180211b841b5bf13ab10d19202adab3eb7749f6c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 30 Jan 2010 02:53:27 +0000
Subject: [PATCH 217/640] af_key: fix netns ops ordering on module load/unload

1. After sock_register() returns, it's possible to create sockets,
   even if module still not initialized fully (blame generic module code
   for that!)
2. Consequently, pfkey_create() can be called with pfkey_net_id still not
   initialized which will BUG_ON in net_generic():
	kernel BUG at include/net/netns/generic.h:43!
3. During netns shutdown, netns ops should be unregistered after
   key manager unregistered because key manager calls can be triggered
   from xfrm_user module:

   	general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
	pfkey_broadcast+0x111/0x210 [af_key]
	pfkey_send_notify+0x16a/0x300 [af_key]
	km_state_notify+0x41/0x70
	xfrm_flush_sa+0x75/0x90 [xfrm_user]
4. Unregister netns ops after socket ops just in case and for symmetry.

Reported by Luca Tettamanti.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Tested-by: Luca Tettamanti <kronos.it@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 76fa6fef6473..539f43bc97db 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3794,9 +3794,9 @@ static struct pernet_operations pfkey_net_ops = {
 
 static void __exit ipsec_pfkey_exit(void)
 {
-	unregister_pernet_subsys(&pfkey_net_ops);
 	xfrm_unregister_km(&pfkeyv2_mgr);
 	sock_unregister(PF_KEY);
+	unregister_pernet_subsys(&pfkey_net_ops);
 	proto_unregister(&key_proto);
 }
 
@@ -3807,21 +3807,22 @@ static int __init ipsec_pfkey_init(void)
 	if (err != 0)
 		goto out;
 
-	err = sock_register(&pfkey_family_ops);
+	err = register_pernet_subsys(&pfkey_net_ops);
 	if (err != 0)
 		goto out_unregister_key_proto;
+	err = sock_register(&pfkey_family_ops);
+	if (err != 0)
+		goto out_unregister_pernet;
 	err = xfrm_register_km(&pfkeyv2_mgr);
 	if (err != 0)
 		goto out_sock_unregister;
-	err = register_pernet_subsys(&pfkey_net_ops);
-	if (err != 0)
-		goto out_xfrm_unregister_km;
 out:
 	return err;
-out_xfrm_unregister_km:
-	xfrm_unregister_km(&pfkeyv2_mgr);
+
 out_sock_unregister:
 	sock_unregister(PF_KEY);
+out_unregister_pernet:
+	unregister_pernet_subsys(&pfkey_net_ops);
 out_unregister_key_proto:
 	proto_unregister(&key_proto);
 	goto out;

From 974c37e9d88c3e5a3e56eb98cb9c84232eb2bdcb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 30 Jan 2010 10:05:05 +0000
Subject: [PATCH 218/640] netlink: fix for too early rmmod

Netlink code does module autoload if protocol userspace is asking for is
not ready. However, module can dissapear right after it was autoloaded.
Example: modprobe/rmmod stress-testing and xfrm_user.ko providing NETLINK_XFRM.

netlink_create() in such situation _will_ create userspace socket and
_will_not_ pin module. Now if module was removed and we're going to call
->netlink_rcv into nothing:

BUG: unable to handle kernel paging request at ffffffffa02f842a
					       ^^^^^^^^^^^^^^^^
	modules are loaded near these addresses here

IP: [<ffffffffa02f842a>] 0xffffffffa02f842a
PGD 161f067 PUD 1623063 PMD baa12067 PTE 0
Oops: 0010 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/devices/pci0000:00/0000:00:1f.2/host0/target0:0:0/0:0:0:0/block/sda/uevent
CPU 1
Pid: 11515, comm: ip Not tainted 2.6.33-rc5-netns-00594-gaaa5728-dirty #6 P5E/P5E
RIP: 0010:[<ffffffffa02f842a>]  [<ffffffffa02f842a>] 0xffffffffa02f842a
RSP: 0018:ffff8800baa3db48  EFLAGS: 00010292
RAX: ffff8800baa3dfd8 RBX: ffff8800be353640 RCX: 0000000000000000
RDX: ffffffff81959380 RSI: ffff8800bab7f130 RDI: 0000000000000001
RBP: ffff8800baa3db58 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000011
R13: ffff8800be353640 R14: ffff8800bcdec240 R15: ffff8800bd488010
FS:  00007f93749656f0(0000) GS:ffff880002300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: ffffffffa02f842a CR3: 00000000ba82b000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process ip (pid: 11515, threadinfo ffff8800baa3c000, task ffff8800bab7eb30)
Stack:
 ffffffff813637c0 ffff8800bd488000 ffff8800baa3dba8 ffffffff8136397d
<0> 0000000000000000 ffffffff81344adc 7fffffffffffffff 0000000000000000
<0> ffff8800baa3ded8 ffff8800be353640 ffff8800bcdec240 0000000000000000
Call Trace:
 [<ffffffff813637c0>] ? netlink_unicast+0x100/0x2d0
 [<ffffffff8136397d>] netlink_unicast+0x2bd/0x2d0

	netlink_unicast_kernel:
		nlk->netlink_rcv(skb);

 [<ffffffff81344adc>] ? memcpy_fromiovec+0x6c/0x90
 [<ffffffff81364263>] netlink_sendmsg+0x1d3/0x2d0
 [<ffffffff8133975b>] sock_sendmsg+0xbb/0xf0
 [<ffffffff8106cdeb>] ? __lock_acquire+0x27b/0xa60
 [<ffffffff810a18c3>] ? might_fault+0x73/0xd0
 [<ffffffff810a18c3>] ? might_fault+0x73/0xd0
 [<ffffffff8106db22>] ? __lock_release+0x82/0x170
 [<ffffffff810a190e>] ? might_fault+0xbe/0xd0
 [<ffffffff810a18c3>] ? might_fault+0x73/0xd0
 [<ffffffff81344c77>] ? verify_iovec+0x47/0xd0
 [<ffffffff8133a509>] sys_sendmsg+0x1a9/0x360
 [<ffffffff813c2be5>] ? _raw_spin_unlock_irqrestore+0x65/0x70
 [<ffffffff8106aced>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff813c2bc2>] ? _raw_spin_unlock_irqrestore+0x42/0x70
 [<ffffffff81197004>] ? __up_read+0x84/0xb0
 [<ffffffff8106ac95>] ? trace_hardirqs_on_caller+0x145/0x190
 [<ffffffff813c207f>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff8100262b>] system_call_fastpath+0x16/0x1b
Code:  Bad RIP value.
RIP  [<ffffffffa02f842a>] 0xffffffffa02f842a
 RSP <ffff8800baa3db48>
CR2: ffffffffa02f842a

If module was quickly removed after autoloading, return -E.

Return -EPROTONOSUPPORT if module was quickly removed after autoloading.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a4957bf2ca60..4c5972ba8c78 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -455,9 +455,14 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	if (nl_table[protocol].registered &&
 	    try_module_get(nl_table[protocol].module))
 		module = nl_table[protocol].module;
+	else
+		err = -EPROTONOSUPPORT;
 	cb_mutex = nl_table[protocol].cb_mutex;
 	netlink_unlock_table();
 
+	if (err < 0)
+		goto out;
+
 	err = __netlink_create(net, sock, cb_mutex, protocol);
 	if (err < 0)
 		goto out_module;

From f6815077e75c5b7f55b56fc3788e328514d4e72a Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 1 Feb 2010 13:41:47 +0000
Subject: [PATCH 219/640] sky2: fix transmit DMA map leakage

The book keeping structure for transmit always had the flags value
cleared so transmit DMA maps were never released correctly.
Based on patch by Jarek Poplawski, problem observed by Michael Breuer.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sky2.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index d760650c5c04..67249c3c9f50 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1025,11 +1025,8 @@ static void sky2_prefetch_init(struct sky2_hw *hw, u32 qaddr,
 static inline struct sky2_tx_le *get_tx_le(struct sky2_port *sky2, u16 *slot)
 {
 	struct sky2_tx_le *le = sky2->tx_le + *slot;
-	struct tx_ring_info *re = sky2->tx_ring + *slot;
 
 	*slot = RING_NEXT(*slot, sky2->tx_ring_size);
-	re->flags = 0;
-	re->skb = NULL;
 	le->ctrl = 0;
 	return le;
 }
@@ -1622,8 +1619,7 @@ static unsigned tx_le_req(const struct sk_buff *skb)
 	return count;
 }
 
-static void sky2_tx_unmap(struct pci_dev *pdev,
-			  const struct tx_ring_info *re)
+static void sky2_tx_unmap(struct pci_dev *pdev, struct tx_ring_info *re)
 {
 	if (re->flags & TX_MAP_SINGLE)
 		pci_unmap_single(pdev, pci_unmap_addr(re, mapaddr),
@@ -1633,6 +1629,7 @@ static void sky2_tx_unmap(struct pci_dev *pdev,
 		pci_unmap_page(pdev, pci_unmap_addr(re, mapaddr),
 			       pci_unmap_len(re, maplen),
 			       PCI_DMA_TODEVICE);
+	re->flags = 0;
 }
 
 /*
@@ -1839,6 +1836,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
 			dev->stats.tx_packets++;
 			dev->stats.tx_bytes += skb->len;
 
+			re->skb = NULL;
 			dev_kfree_skb_any(skb);
 
 			sky2->tx_next = RING_NEXT(idx, sky2->tx_ring_size);

From 8ed030dd0aa400d18c63861c2c6deb7c38f4edde Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 1 Feb 2010 02:12:19 +0000
Subject: [PATCH 220/640] dccp: fix bug in cache allocation

This fixes a bug introduced in commit de4ef86cfce60d2250111f34f8a084e769f23b16
("dccp: fix dccp rmmod when kernel configured to use slub", 17 Jan): the
vsnprintf used sizeof(slab_name_fmt), which became truncated to 4 bytes, since
slab_name_fmt is now a 4-byte pointer and no longer a 32-character array.

This lead to error messages such as
 FATAL: Error inserting dccp: No buffer space available

 >> kernel: [ 1456.341501] kmem_cache_create: duplicate cache cci
generated due to the truncation after the 3rd character.

Fixed for the moment by introducing a symbolic constant. Tested to fix the bug.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccid.c | 2 +-
 net/dccp/ccid.h | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 57dfb9c8c4f2..ff16e9df1969 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -83,7 +83,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f
 	va_list args;
 
 	va_start(args, fmt);
-	vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args);
+	vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args);
 	va_end(args);
 
 	slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0,
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 269958bf7fe9..6df6f8ac9636 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -19,7 +19,9 @@
 #include <linux/list.h>
 #include <linux/module.h>
 
-#define CCID_MAX 255
+/* maximum value for a CCID (RFC 4340, 19.5) */
+#define CCID_MAX		255
+#define CCID_SLAB_NAME_LENGTH	32
 
 struct tcp_info;
 
@@ -49,8 +51,8 @@ struct ccid_operations {
 	const char		*ccid_name;
 	struct kmem_cache	*ccid_hc_rx_slab,
 				*ccid_hc_tx_slab;
-	char			ccid_hc_rx_slab_name[32];
-	char			ccid_hc_tx_slab_name[32];
+	char			ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH];
+	char			ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH];
 	__u32			ccid_hc_rx_obj_size,
 				ccid_hc_tx_obj_size;
 	/* Interface Routines */

From 1386be55e32a3c5d8ef4a2b243c530a7b664c02c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 2 Feb 2010 20:16:56 +0000
Subject: [PATCH 221/640] dccp: fix auto-loading of dccp(_probe)

This fixes commit (38ff3e6bb987ec583268da8eb22628293095d43b) ("dccp_probe:
Fix module load dependencies between dccp and dccp_probe", from 15 Jan).

It fixes the construction of the first argument of try_then_request_module(),
where only valid return codes from the first argument should be returned.

What we do now is assign the result of register_jprobe() to ret, without
the side effect of the comparison.

Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/probe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index bace1d8cbcfd..f5b3464f1242 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -161,8 +161,8 @@ static __init int dccpprobe_init(void)
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
 
-	ret = try_then_request_module((register_jprobe(&dccp_send_probe) == 0),
-					"dccp");
+	try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0,
+				"dccp");
 	if (ret)
 		goto err1;
 

From 88d1a0cf659438a66135661538ae332b23f8635a Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Thu, 4 Feb 2010 09:55:51 +0900
Subject: [PATCH 222/640] Bluetooth: Fix memory leak in Marvell BT-over-SDIO
 driver

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmrvl_sdio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index f36defa37764..57d965b7f521 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -808,6 +808,7 @@ static int btmrvl_sdio_host_to_card(struct btmrvl_private *priv,
 
 exit:
 	sdio_release_host(card->func);
+	kfree(tmpbuf);
 
 	return ret;
 }

From c390216b3e868b16d8154939f4b6f8c16dbd9a9f Mon Sep 17 00:00:00 2001
From: Nick Pelly <npelly@google.com>
Date: Fri, 13 Nov 2009 14:16:32 -0800
Subject: [PATCH 223/640] Bluetooth: Enter active mode before establishing a
 SCO link.

When in sniff mode with a long interval time (1.28s) it can take 4+ seconds
to establish a SCO link. Fix by requesting active mode before requesting
SCO connection. This improves SCO setup time to ~500ms.

Bluetooth headsets that use a long interval time, and exhibit the long
SCO connection time include Motorola H790, HX1 and H17. They have a
CSR 2.1 chipset.

Verified this behavior and fix with host Bluetooth chipsets: BCM4329 and
TI1271.

2009-10-13 14:17:46.183722 > HCI Event: Mode Change (0x14) plen 6
    status 0x00 handle 1 mode 0x02 interval 2048
    Mode: Sniff
2009-10-13 14:17:53.436285 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
    handle 1 voice setting 0x0060
2009-10-13 14:17:53.445593 > HCI Event: Command Status (0x0f) plen 4
    Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
2009-10-13 14:17:57.788855 > HCI Event: Synchronous Connect Complete 0x2c) plen 17
    status 0x00 handle 257 bdaddr 00:1A:0E:F1:A4:7F type eSCO
    Air mode: CVSD

Signed-off-by: Nick Pelly <npelly@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b7c4224f4e7d..b10e3cdb08f8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -377,6 +377,9 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 
 	if (acl->state == BT_CONNECTED &&
 			(sco->state == BT_OPEN || sco->state == BT_CLOSED)) {
+		acl->power_save = 1;
+		hci_conn_enter_active_mode(acl);
+
 		if (lmp_esco_capable(hdev))
 			hci_setup_sync(sco, acl->handle);
 		else

From 41f3e0b1f524c47908e944a7e55f11c371ddddec Mon Sep 17 00:00:00 2001
From: Amit Kumar Salecha <amit.salecha@qlogic.com>
Date: Tue, 2 Feb 2010 04:16:20 +0000
Subject: [PATCH 224/640] netxen: fix tx timeout recovery for NX2031 chip

For NX2031, first try to scrub interrupt before requesting firmware
reset. Return statement was missing after scrubbbing interrupt.

Signed-off-by: Vernon Mauery <vernux@us.ibm.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netxen/netxen_nic_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 9f9d6081959b..5209095a8739 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -1941,7 +1941,7 @@ static void netxen_tx_timeout_task(struct work_struct *work)
 		netif_wake_queue(adapter->netdev);
 
 		clear_bit(__NX_RESETTING, &adapter->state);
-
+		return;
 	} else {
 		clear_bit(__NX_RESETTING, &adapter->state);
 		if (!netxen_nic_reset_context(adapter)) {

From e15eec2805565c7e31dbe402215637012f1e4616 Mon Sep 17 00:00:00 2001
From: Amit Kumar Salecha <amit.salecha@qlogic.com>
Date: Tue, 2 Feb 2010 04:16:21 +0000
Subject: [PATCH 225/640] netxen: protect resource cleanup by rtnl lock

o context resources can be in used, while resource cleanup is in progress,
  during fw recover.
o Null pointer execption can occur in send_cmd_desc, if fw recovery
  module frees tx ring without rtnl lock.
o Same applies to ethtool register dump.

Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netxen/netxen_nic_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 5209095a8739..24279e6e55f5 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -2240,7 +2240,9 @@ netxen_detach_work(struct work_struct *work)
 
 	netxen_nic_down(adapter, netdev);
 
+	rtnl_lock();
 	netxen_nic_detach(adapter);
+	rtnl_unlock();
 
 	status = NXRD32(adapter, NETXEN_PEG_HALT_STATUS1);
 

From e44d38e1b72a2aa7f5e7024c5da83a879355a1cc Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 3 Feb 2010 13:12:51 +0000
Subject: [PATCH 226/640] ixgbe: Fix ixgbe_tx_map error path

Commit e5a43549f7a58509a91b299a51337d386697b92c (ixgbe: remove
skb_dma_map/unmap calls from driver) looks to have introduced a bug in
ixgbe_tx_map. If we get an error from a PCI DMA call, we loop backwards
through count until it becomes -1 and return that.

The caller of ixgbe_tx_map expects 0 on error, so return that instead.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index b5f64ad67975..37e2af0a6145 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5179,7 +5179,7 @@ dma_error:
 		ixgbe_unmap_and_free_tx_resource(adapter, tx_buffer_info);
 	}
 
-	return count;
+	return 0;
 }
 
 static void ixgbe_tx_queue(struct ixgbe_adapter *adapter,

From fdd3d631cddad20ad9d3e1eb7dbf26825a8a121f Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Wed, 3 Feb 2010 13:13:10 +0000
Subject: [PATCH 227/640] ixgbe: Fix return of invalid txq

a developer had complained of getting lots of warnings:

"eth16 selects TX queue 98, but real number of TX queues is 64"

http://www.mail-archive.com/e1000-devel@lists.sourceforge.net/msg02200.html

As there was no follow up on that bug, I am submitting this
patch assuming that the other return points will not return
invalid txq's, and also that this fixes the bug (not tested).

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 37e2af0a6145..7b7c8486c0bf 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5329,8 +5329,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb)
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	int txq = smp_processor_id();
 
-	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+		while (unlikely(txq >= dev->real_num_tx_queues))
+			txq -= dev->real_num_tx_queues;
 		return txq;
+	}
 
 #ifdef IXGBE_FCOE
 	if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) &&

From 454debe45c86102528c90c12eb6a99245b773bfe Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Mon, 1 Feb 2010 08:21:34 +0000
Subject: [PATCH 228/640] irda: unbalanced lock_kernel in irnet_ppp

Add the missing unlock_kernel in one ioctl operation.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 156020d138b5..7c22c126f0ea 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -706,7 +706,8 @@ dev_irnet_ioctl(
       lock_kernel();
       if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan),
 						(int __user *)argp))
-      err = 0;
+        err = 0;
+      unlock_kernel();
       break;
 
       /* All these ioctls can be passed both directly and from ppp_generic,

From 3fdde0a1602d20c02a7d66e07ab6718ab8d79b12 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Mon, 1 Feb 2010 08:21:35 +0000
Subject: [PATCH 229/640] irda: add missing BKL in irnet_ppp ioctl

One ioctl has been forgotten when the BKL was push down into irnet_ppp
ioctl function.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7c22c126f0ea..6b3602de359a 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -698,9 +698,11 @@ dev_irnet_ioctl(
 
       /* Query PPP channel and unit number */
     case PPPIOCGCHAN:
+      lock_kernel();
       if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan),
 						(int __user *)argp))
 	err = 0;
+      unlock_kernel();
       break;
     case PPPIOCGUNIT:
       lock_kernel();

From bc496ed00ab1411d3efaf295b72e0c9eb343e1a3 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dgilbert@interlog.com>
Date: Mon, 1 Feb 2010 13:11:38 -0500
Subject: [PATCH 230/640] libata-scsi passthru: fix bug which truncated LBA48
 return values

Fix assignment which overwrote SAT ATA PASS-THROUGH command EXTEND
bit setting (ATA_TFLAG_LBA48)

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index f4ea5a8c325b..d096fbcbc771 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2875,7 +2875,7 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 	 * write indication (used for PIO/DMA setup), result TF is
 	 * copied back and we don't whine too much about its failure.
 	 */
-	tf->flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
 	if (scmd->sc_data_direction == DMA_TO_DEVICE)
 		tf->flags |= ATA_TFLAG_WRITE;
 

From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 Jan 2010 13:30:11 +0100
Subject: [PATCH 231/640] libata: fix ata_id_logical_per_physical_sectors

The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not
a plain multiple, but the log of it, so fix the helper to give the correct
answer.  Without this we'll get an incorrect minimal I/O size in the block
limits VPD page for 4k sector drives.

Also change the return value of ata_id_logical_per_physical_sectors to u16
for the unlikely case of very large logical sectors.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 38a6948ce0c2..20f31567ccee 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id)
 	return id[ATA_ID_SECTOR_SIZE] & (1 << 13);
 }
 
-static inline u8 ata_id_logical_per_physical_sectors(const u16 *id)
+static inline u16 ata_id_logical_per_physical_sectors(const u16 *id)
 {
-	return id[ATA_ID_SECTOR_SIZE] & 0xf;
+	return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf);
 }
 
 static inline int ata_id_has_lba48(const u16 *id)

From cedc9bf906dae044443d403371c887affdb44168 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 28 Jan 2010 16:04:15 +0900
Subject: [PATCH 232/640] ahci: add Acer G725 to broken suspend list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Acer G725 shares the same suspend problem with the HP laptops which
lose ATA devices on resume.  New firmware which fixes the problem is
already available.  Add G725 with old firmwares to the broken suspend
list.

This problem has been reported in bko#15104.

  http://bugzilla.kernel.org/show_bug.cgi?id=15104

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jani-Matti Hätinen <jani-matti.hatinen@iki.fi>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index b8bea100a160..b34390347c16 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -2868,6 +2868,21 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
 			},
 			.driver_data = "F.23",	/* cutoff BIOS version */
 		},
+		/*
+		 * Acer eMachines G725 has the same problem.  BIOS
+		 * V1.03 is known to be broken.  V3.04 is known to
+		 * work.  Inbetween, there are V1.06, V2.06 and V3.03
+		 * that we don't have much idea about.  For now,
+		 * blacklist anything older than V3.04.
+		 */
+		{
+			.ident = "G725",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "eMachines"),
+				DMI_MATCH(DMI_PRODUCT_NAME, "eMachines G725"),
+			},
+			.driver_data = "V3.04",	/* cutoff BIOS version */
+		},
 		{ }	/* terminate list */
 	};
 	const struct dmi_system_id *dmi = dmi_first_match(sysids);

From 2d68b7fe55d9e19a8a868224ed0dfd6526568521 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 4 Feb 2010 01:04:50 -0500
Subject: [PATCH 233/640] [libata] Call flush_dcache_page after PIO data
 transfers in libata-sff.c

flush_dcache_page() must be called after (!ATA_TFLAG_WRITE) the
data copying to avoid D-cache aliasing with user space or I-D cache
coherency issues (when reading data from an ATA device using PIO,
the kernel dirties the D-cache but there is no flush_dcache_page()
required on Harvard architectures).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 741065c9da67..730ef3c384ca 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -893,6 +893,9 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 				       do_write);
 	}
 
+	if (!do_write)
+		flush_dcache_page(page);
+
 	qc->curbytes += qc->sect_size;
 	qc->cursg_ofs += qc->sect_size;
 

From 06df6dafb5d9e3cfa3588c6ce79328b91582b6af Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 4 Feb 2010 14:43:38 +0900
Subject: [PATCH 234/640] x86/agp: Fix amd64-agp module initialization
 regression

This fixes the regression introduced by commit
42590a75019a50012f25a962246498dead428433 ("x86/agp: Fix
agp_amd64_init and agp_amd64_cleanup").

The commit 61684ceaad4f65d1a9832c722f7bd5e7fc714de9 fixed the
above regression but it's not enough. When amd64-agp is built as
a module, AGP isn't initialized, iommu is initialized, all the
aperture is owned by the iommu.

Reported-by: Marin Mitov <mitov@issp.bas.bg>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Marin Mitov <mitov@issp.bas.bg>
LKML-Reference: <20100204090802S.fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/char/agp/amd64-agp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 34cf04e21795..fd50ead59c79 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -767,16 +767,19 @@ int __init agp_amd64_init(void)
 
 static int __init agp_amd64_mod_init(void)
 {
+#ifndef MODULE
 	if (gart_iommu_aperture)
 		return agp_bridges_found ? 0 : -ENODEV;
-
+#endif
 	return agp_amd64_init();
 }
 
 static void __exit agp_amd64_cleanup(void)
 {
+#ifndef MODULE
 	if (gart_iommu_aperture)
 		return;
+#endif
 	if (aperture_resource)
 		release_resource(aperture_resource);
 	pci_unregister_driver(&agp_amd64_pci_driver);

From 9de89fe7c577847877ae00ea1aa6315559b10243 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Feb 2010 16:52:00 -0200
Subject: [PATCH 235/640] perf symbols: Remove perf_session usage in symbols
 layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I noticed while writing the first test in 'perf regtest' that to
just test the symbol handling routines one needs to create a
perf session, that is a layer centered on a perf.data file,
events, etc, so I untied these layers.

This reduces the complexity for the users as the number of
parameters to most of the symbols and session APIs now was
reduced while not adding more state to all the map instances by
only having data that is needed to split the kernel (kallsyms
and ELF symtab sections) maps and do vmlinux relocation on the
main kernel map.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265223128-11786-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-kmem.c  |   2 +-
 tools/perf/builtin-probe.c |   5 +-
 tools/perf/util/event.c    |   6 +-
 tools/perf/util/map.c      |  20 +++---
 tools/perf/util/map.h      |  22 +++++--
 tools/perf/util/session.c  |  35 +++++++----
 tools/perf/util/session.h  |  22 ++++---
 tools/perf/util/symbol.c   | 122 +++++++++++++++++--------------------
 tools/perf/util/symbol.h   |  19 +++---
 tools/perf/util/thread.c   |   3 +-
 tools/perf/util/thread.h   |  14 +++--
 11 files changed, 149 insertions(+), 121 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 5d5dc6b09617..924a9518931a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -369,7 +369,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
 		if (is_caller) {
 			addr = data->call_site;
 			if (!raw_ip)
-				sym = map_groups__find_function(&session->kmaps, session, addr, NULL);
+				sym = map_groups__find_function(&session->kmaps, addr, NULL);
 		} else
 			addr = data->ptr;
 
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 34f2acb1cc88..4fa73eca1d82 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -122,8 +122,7 @@ static int opt_del_probe_event(const struct option *opt __used,
 static void evaluate_probe_point(struct probe_point *pp)
 {
 	struct symbol *sym;
-	sym = map__find_symbol_by_name(session.kmap, pp->function,
-				       session.psession, NULL);
+	sym = map__find_symbol_by_name(session.kmap, pp->function, NULL);
 	if (!sym)
 		die("Kernel symbol \'%s\' not found - probe not added.",
 		    pp->function);
@@ -132,7 +131,7 @@ static void evaluate_probe_point(struct probe_point *pp)
 #ifndef NO_LIBDWARF
 static int open_vmlinux(void)
 {
-	if (map__load(session.kmap, session.psession, NULL) < 0) {
+	if (map__load(session.kmap, NULL) < 0) {
 		pr_debug("Failed to load kernel map.\n");
 		return -EINVAL;
 	}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bbaee61c1683..c3831f633dec 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -374,9 +374,7 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 				goto out_problem;
 
 			kernel->kernel = 1;
-			if (__map_groups__create_kernel_maps(&session->kmaps,
-							     session->vmlinux_maps,
-							     kernel) < 0)
+			if (__perf_session__create_kernel_maps(session, kernel) < 0)
 				goto out_problem;
 
 			session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start;
@@ -476,7 +474,7 @@ void thread__find_addr_location(struct thread *self,
 {
 	thread__find_addr_map(self, session, cpumode, type, addr, al);
 	if (al->map != NULL)
-		al->sym = map__find_symbol(al->map, session, al->addr, filter);
+		al->sym = map__find_symbol(al->map, al->addr, filter);
 	else
 		al->sym = NULL;
 }
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index c4d55a0da2ea..36ff0bf0315d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -104,8 +104,7 @@ void map__fixup_end(struct map *self)
 
 #define DSO__DELETED "(deleted)"
 
-int map__load(struct map *self, struct perf_session *session,
-	      symbol_filter_t filter)
+int map__load(struct map *self, symbol_filter_t filter)
 {
 	const char *name = self->dso->long_name;
 	int nr;
@@ -113,7 +112,7 @@ int map__load(struct map *self, struct perf_session *session,
 	if (dso__loaded(self->dso, self->type))
 		return 0;
 
-	nr = dso__load(self->dso, self, session, filter);
+	nr = dso__load(self->dso, self, filter);
 	if (nr < 0) {
 		if (self->dso->has_build_id) {
 			char sbuild_id[BUILD_ID_SIZE * 2 + 1];
@@ -144,24 +143,29 @@ int map__load(struct map *self, struct perf_session *session,
 
 		return -1;
 	}
+	/*
+	 * Only applies to the kernel, as its symtabs aren't relative like the
+	 * module ones.
+	 */
+	if (self->dso->kernel)
+		map__reloc_vmlinux(self);
 
 	return 0;
 }
 
-struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
-				u64 addr, symbol_filter_t filter)
+struct symbol *map__find_symbol(struct map *self, u64 addr,
+				symbol_filter_t filter)
 {
-	if (map__load(self, session, filter) < 0)
+	if (map__load(self, filter) < 0)
 		return NULL;
 
 	return dso__find_symbol(self->dso, self->type, addr);
 }
 
 struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
-					struct perf_session *session,
 					symbol_filter_t filter)
 {
-	if (map__load(self, session, filter) < 0)
+	if (map__load(self, filter) < 0)
 		return NULL;
 
 	if (!dso__sorted_by_name(self->dso, self->type))
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 72f0b6ab5ea5..de048399d776 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -14,6 +14,8 @@ enum map_type {
 #define MAP__NR_TYPES (MAP__VARIABLE + 1)
 
 struct dso;
+struct ref_reloc_sym;
+struct map_groups;
 
 struct map {
 	union {
@@ -29,6 +31,16 @@ struct map {
 	struct dso		*dso;
 };
 
+struct kmap {
+	struct ref_reloc_sym	*ref_reloc_sym;
+	struct map_groups	*kmaps;
+};
+
+static inline struct kmap *map__kmap(struct map *self)
+{
+	return (struct kmap *)(self + 1);
+}
+
 static inline u64 map__map_ip(struct map *map, u64 ip)
 {
 	return ip - map->start + map->pgoff;
@@ -58,16 +70,14 @@ struct map *map__clone(struct map *self);
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *self, FILE *fp);
 
-struct perf_session;
-
-int map__load(struct map *self, struct perf_session *session,
-	      symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
+int map__load(struct map *self, symbol_filter_t filter);
+struct symbol *map__find_symbol(struct map *self,
 				u64 addr, symbol_filter_t filter);
 struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
-					struct perf_session *session,
 					symbol_filter_t filter);
 void map__fixup_start(struct map *self);
 void map__fixup_end(struct map *self);
 
+void map__reloc_vmlinux(struct map *self);
+
 #endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index cf91d099f0aa..aa8a03120bbd 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -53,6 +53,11 @@ out_close:
 	return -1;
 }
 
+static inline int perf_session__create_kernel_maps(struct perf_session *self)
+{
+	return map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps);
+}
+
 struct perf_session *perf_session__new(const char *filename, int mode, bool force)
 {
 	size_t len = filename ? strlen(filename) + 1 : 0;
@@ -507,6 +512,7 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
 					     u64 addr)
 {
 	char *bracket;
+	enum map_type i;
 
 	self->ref_reloc_sym.name = strdup(symbol_name);
 	if (self->ref_reloc_sym.name == NULL)
@@ -517,6 +523,12 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
 		*bracket = '\0';
 
 	self->ref_reloc_sym.addr = addr;
+
+	for (i = 0; i < MAP__NR_TYPES; ++i) {
+		struct kmap *kmap = map__kmap(self->vmlinux_maps[i]);
+		kmap->ref_reloc_sym = &self->ref_reloc_sym;
+	}
+
 	return 0;
 }
 
@@ -530,20 +542,21 @@ static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
 	return ip - (s64)map->pgoff;
 }
 
-void perf_session__reloc_vmlinux_maps(struct perf_session *self,
-				      u64 unrelocated_addr)
+void map__reloc_vmlinux(struct map *self)
 {
-	enum map_type type;
-	s64 reloc = unrelocated_addr - self->ref_reloc_sym.addr;
+	struct kmap *kmap = map__kmap(self);
+	s64 reloc;
+
+	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
+		return;
+
+	reloc = (kmap->ref_reloc_sym->unrelocated_addr -
+		 kmap->ref_reloc_sym->addr);
 
 	if (!reloc)
 		return;
 
-	for (type = 0; type < MAP__NR_TYPES; ++type) {
-		struct map *map = self->vmlinux_maps[type];
-
-		map->map_ip = map__reloc_map_ip;
-		map->unmap_ip = map__reloc_unmap_ip;
-		map->pgoff = reloc;
-	}
+	self->map_ip   = map__reloc_map_ip;
+	self->unmap_ip = map__reloc_unmap_ip;
+	self->pgoff    = reloc;
 }
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 36d1a80c0b6c..752d75aebade 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -3,13 +3,13 @@
 
 #include "event.h"
 #include "header.h"
+#include "symbol.h"
 #include "thread.h"
 #include <linux/rbtree.h>
 #include "../../../include/linux/perf_event.h"
 
 struct ip_callchain;
 struct thread;
-struct symbol;
 
 struct perf_session {
 	struct perf_header	header;
@@ -24,10 +24,7 @@ struct perf_session {
 	unsigned long		unknown_events;
 	struct rb_root		hists;
 	u64			sample_type;
-	struct {
-		const char	*name;
-		u64		addr;
-	}			ref_reloc_sym;
+	struct ref_reloc_sym	ref_reloc_sym;
 	int			fd;
 	int			cwdlen;
 	char			*cwd;
@@ -69,9 +66,20 @@ int perf_header__read_build_ids(struct perf_header *self, int input,
 int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
 					     const char *symbol_name,
 					     u64 addr);
-void perf_session__reloc_vmlinux_maps(struct perf_session *self,
-				      u64 unrelocated_addr);
 
 void mem_bswap_64(void *src, int byte_size);
 
+static inline int __perf_session__create_kernel_maps(struct perf_session *self,
+						struct dso *kernel)
+{
+	return __map_groups__create_kernel_maps(&self->kmaps,
+						self->vmlinux_maps, kernel);
+}
+
+static inline struct map *
+	perf_session__new_module_map(struct perf_session *self,
+				     u64 start, const char *filename)
+{
+	return map_groups__new_module(&self->kmaps, start, filename);
+}
 #endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f9049d12ead6..613874260761 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1,6 +1,5 @@
 #include "util.h"
 #include "../perf.h"
-#include "session.h"
 #include "sort.h"
 #include "string.h"
 #include "symbol.h"
@@ -34,7 +33,7 @@ enum dso_origin {
 static void dsos__add(struct list_head *head, struct dso *dso);
 static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
 static int dso__load_kernel_sym(struct dso *self, struct map *map,
-				struct perf_session *session, symbol_filter_t filter);
+				symbol_filter_t filter);
 static int vmlinux_path__nr_entries;
 static char **vmlinux_path;
 
@@ -480,8 +479,9 @@ static int dso__load_all_kallsyms(struct dso *self, const char *filename,
  * the original ELF section names vmlinux have.
  */
 static int dso__split_kallsyms(struct dso *self, struct map *map,
-			       struct perf_session *session, symbol_filter_t filter)
+			       symbol_filter_t filter)
 {
+	struct map_groups *kmaps = map__kmap(map)->kmaps;
 	struct map *curr_map = map;
 	struct symbol *pos;
 	int count = 0;
@@ -503,7 +503,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 			*module++ = '\0';
 
 			if (strcmp(curr_map->dso->short_name, module)) {
-				curr_map = map_groups__find_by_name(&session->kmaps, map->type, module);
+				curr_map = map_groups__find_by_name(kmaps, map->type, module);
 				if (curr_map == NULL) {
 					pr_debug("/proc/{kallsyms,modules} "
 					         "inconsistency while looking "
@@ -538,7 +538,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 			}
 
 			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
-			map_groups__insert(&session->kmaps, curr_map);
+			map_groups__insert(kmaps, curr_map);
 			++kernel_range;
 		}
 
@@ -557,9 +557,8 @@ discard_symbol:		rb_erase(&pos->rb_node, root);
 	return count;
 }
 
-
-static int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
-			      struct perf_session *session, symbol_filter_t filter)
+int dso__load_kallsyms(struct dso *self, const char *filename,
+		       struct map *map, symbol_filter_t filter)
 {
 	if (dso__load_all_kallsyms(self, filename, map) < 0)
 		return -1;
@@ -567,7 +566,7 @@ static int dso__load_kallsyms(struct dso *self, const char *filename, struct map
 	symbols__fixup_end(&self->symbols[map->type]);
 	self->origin = DSO__ORIG_KERNEL;
 
-	return dso__split_kallsyms(self, map, session, filter);
+	return dso__split_kallsyms(self, map, filter);
 }
 
 static int dso__load_perf_map(struct dso *self, struct map *map,
@@ -893,10 +892,10 @@ static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type
 	}
 }
 
-static int dso__load_sym(struct dso *self, struct map *map,
-			 struct perf_session *session, const char *name, int fd,
-			 symbol_filter_t filter, int kernel, int kmodule)
+static int dso__load_sym(struct dso *self, struct map *map, const char *name,
+			 int fd, symbol_filter_t filter, int kmodule)
 {
+	struct kmap *kmap = self->kernel ? map__kmap(map) : NULL;
 	struct map *curr_map = map;
 	struct dso *curr_dso = self;
 	size_t dso_name_len = strlen(self->short_name);
@@ -953,7 +952,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
 	memset(&sym, 0, sizeof(sym));
-	if (!kernel) {
+	if (!self->kernel) {
 		self->adjust_symbols = (ehdr.e_type == ET_EXEC ||
 				elf_section_by_name(elf, &ehdr, &shdr,
 						     ".gnu.prelink_undo",
@@ -967,9 +966,9 @@ static int dso__load_sym(struct dso *self, struct map *map,
 		int is_label = elf_sym__is_label(&sym);
 		const char *section_name;
 
-		if (kernel && session->ref_reloc_sym.name != NULL &&
-		    strcmp(elf_name, session->ref_reloc_sym.name) == 0)
-			perf_session__reloc_vmlinux_maps(session, sym.st_value);
+		if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
+		    strcmp(elf_name, kmap->ref_reloc_sym->name) == 0)
+			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
 
 		if (!is_label && !elf_sym__is_a(&sym, map->type))
 			continue;
@@ -985,7 +984,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 
 		section_name = elf_sec__name(&shdr, secstrs);
 
-		if (kernel || kmodule) {
+		if (self->kernel || kmodule) {
 			char dso_name[PATH_MAX];
 
 			if (strcmp(section_name,
@@ -1001,7 +1000,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 			snprintf(dso_name, sizeof(dso_name),
 				 "%s%s", self->short_name, section_name);
 
-			curr_map = map_groups__find_by_name(&session->kmaps, map->type, dso_name);
+			curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name);
 			if (curr_map == NULL) {
 				u64 start = sym.st_value;
 
@@ -1020,7 +1019,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 				curr_map->map_ip = identity__map_ip;
 				curr_map->unmap_ip = identity__map_ip;
 				curr_dso->origin = DSO__ORIG_KERNEL;
-				map_groups__insert(&session->kmaps, curr_map);
+				map_groups__insert(kmap->kmaps, curr_map);
 				dsos__add(&dsos__kernel, curr_dso);
 			} else
 				curr_dso = curr_map->dso;
@@ -1236,8 +1235,7 @@ char dso__symtab_origin(const struct dso *self)
 	return origin[self->origin];
 }
 
-int dso__load(struct dso *self, struct map *map, struct perf_session *session,
-	      symbol_filter_t filter)
+int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 {
 	int size = PATH_MAX;
 	char *name;
@@ -1249,7 +1247,7 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session,
 	dso__set_loaded(self, map->type);
 
 	if (self->kernel)
-		return dso__load_kernel_sym(self, map, session, filter);
+		return dso__load_kernel_sym(self, map, filter);
 
 	name = malloc(size);
 	if (!name)
@@ -1320,7 +1318,7 @@ open_file:
 		fd = open(name, O_RDONLY);
 	} while (fd < 0);
 
-	ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0);
+	ret = dso__load_sym(self, map, name, fd, filter, 0);
 	close(fd);
 
 	/*
@@ -1376,7 +1374,7 @@ static int dso__kernel_module_get_build_id(struct dso *self)
 	return 0;
 }
 
-static int perf_session__set_modules_path_dir(struct perf_session *self, char *dirname)
+static int map_groups__set_modules_path_dir(struct map_groups *self, char *dirname)
 {
 	struct dirent *dent;
 	DIR *dir = opendir(dirname);
@@ -1396,7 +1394,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d
 
 			snprintf(path, sizeof(path), "%s/%s",
 				 dirname, dent->d_name);
-			if (perf_session__set_modules_path_dir(self, path) < 0)
+			if (map_groups__set_modules_path_dir(self, path) < 0)
 				goto failure;
 		} else {
 			char *dot = strrchr(dent->d_name, '.'),
@@ -1410,7 +1408,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d
 				 (int)(dot - dent->d_name), dent->d_name);
 
 			strxfrchar(dso_name, '-', '_');
-			map = map_groups__find_by_name(&self->kmaps, MAP__FUNCTION, dso_name);
+			map = map_groups__find_by_name(self, MAP__FUNCTION, dso_name);
 			if (map == NULL)
 				continue;
 
@@ -1431,7 +1429,7 @@ failure:
 	return -1;
 }
 
-static int perf_session__set_modules_path(struct perf_session *self)
+static int map_groups__set_modules_path(struct map_groups *self)
 {
 	struct utsname uts;
 	char modules_path[PATH_MAX];
@@ -1442,7 +1440,7 @@ static int perf_session__set_modules_path(struct perf_session *self)
 	snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel",
 		 uts.release);
 
-	return perf_session__set_modules_path_dir(self, modules_path);
+	return map_groups__set_modules_path_dir(self, modules_path);
 }
 
 /*
@@ -1452,8 +1450,8 @@ static int perf_session__set_modules_path(struct perf_session *self)
  */
 static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 {
-	struct map *self = malloc(sizeof(*self));
-
+	struct map *self = zalloc(sizeof(*self) +
+				  (dso->kernel ? sizeof(struct kmap) : 0));
 	if (self != NULL) {
 		/*
 		 * ->end will be filled after we load all the symbols
@@ -1464,8 +1462,8 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 	return self;
 }
 
-struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
-					 const char *filename)
+struct map *map_groups__new_module(struct map_groups *self, u64 start,
+				   const char *filename)
 {
 	struct map *map;
 	struct dso *dso = __dsos__findnew(&dsos__kernel, filename);
@@ -1478,11 +1476,11 @@ struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
 		return NULL;
 
 	dso->origin = DSO__ORIG_KMODULE;
-	map_groups__insert(&self->kmaps, map);
+	map_groups__insert(self, map);
 	return map;
 }
 
-static int perf_session__create_module_maps(struct perf_session *self)
+static int map_groups__create_modules(struct map_groups *self)
 {
 	char *line = NULL;
 	size_t n;
@@ -1520,7 +1518,7 @@ static int perf_session__create_module_maps(struct perf_session *self)
 		*sep = '\0';
 
 		snprintf(name, sizeof(name), "[%s]", line);
-		map = perf_session__new_module_map(self, start, name);
+		map = map_groups__new_module(self, start, name);
 		if (map == NULL)
 			goto out_delete_line;
 		dso__kernel_module_get_build_id(map->dso);
@@ -1529,7 +1527,7 @@ static int perf_session__create_module_maps(struct perf_session *self)
 	free(line);
 	fclose(file);
 
-	return perf_session__set_modules_path(self);
+	return map_groups__set_modules_path(self);
 
 out_delete_line:
 	free(line);
@@ -1538,7 +1536,6 @@ out_failure:
 }
 
 static int dso__load_vmlinux(struct dso *self, struct map *map,
-			     struct perf_session *session,
 			     const char *vmlinux, symbol_filter_t filter)
 {
 	int err = -1, fd;
@@ -1572,14 +1569,14 @@ static int dso__load_vmlinux(struct dso *self, struct map *map,
 		return -1;
 
 	dso__set_loaded(self, map->type);
-	err = dso__load_sym(self, map, session, vmlinux, fd, filter, 1, 0);
+	err = dso__load_sym(self, map, vmlinux, fd, filter, 0);
 	close(fd);
 
 	return err;
 }
 
 int dso__load_vmlinux_path(struct dso *self, struct map *map,
-			   struct perf_session *session, symbol_filter_t filter)
+			   symbol_filter_t filter)
 {
 	int i, err = 0;
 
@@ -1587,8 +1584,7 @@ int dso__load_vmlinux_path(struct dso *self, struct map *map,
 		 vmlinux_path__nr_entries);
 
 	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
-		err = dso__load_vmlinux(self, map, session, vmlinux_path[i],
-					filter);
+		err = dso__load_vmlinux(self, map, vmlinux_path[i], filter);
 		if (err > 0) {
 			pr_debug("Using %s for symbols\n", vmlinux_path[i]);
 			dso__set_long_name(self, strdup(vmlinux_path[i]));
@@ -1600,7 +1596,7 @@ int dso__load_vmlinux_path(struct dso *self, struct map *map,
 }
 
 static int dso__load_kernel_sym(struct dso *self, struct map *map,
-				struct perf_session *session, symbol_filter_t filter)
+				symbol_filter_t filter)
 {
 	int err;
 	const char *kallsyms_filename = NULL;
@@ -1621,13 +1617,13 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	 * match.
 	 */
 	if (symbol_conf.vmlinux_name != NULL) {
-		err = dso__load_vmlinux(self, map, session,
+		err = dso__load_vmlinux(self, map,
 					symbol_conf.vmlinux_name, filter);
 		goto out_try_fixup;
 	}
 
 	if (vmlinux_path != NULL) {
-		err = dso__load_vmlinux_path(self, map, session, filter);
+		err = dso__load_vmlinux_path(self, map, filter);
 		if (err > 0)
 			goto out_fixup;
 	}
@@ -1675,7 +1671,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	}
 
 do_kallsyms:
-	err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter);
+	err = dso__load_kallsyms(self, kallsyms_filename, map, filter);
 	free(kallsyms_allocated_filename);
 
 out_try_fixup:
@@ -1812,30 +1808,23 @@ int __map_groups__create_kernel_maps(struct map_groups *self,
 	enum map_type type;
 
 	for (type = 0; type < MAP__NR_TYPES; ++type) {
+		struct kmap *kmap;
+
 		vmlinux_maps[type] = map__new2(0, kernel, type);
 		if (vmlinux_maps[type] == NULL)
 			return -1;
 
 		vmlinux_maps[type]->map_ip =
 			vmlinux_maps[type]->unmap_ip = identity__map_ip;
+
+		kmap = map__kmap(vmlinux_maps[type]);
+		kmap->kmaps = self;
 		map_groups__insert(self, vmlinux_maps[type]);
 	}
 
 	return 0;
 }
 
-static int map_groups__create_kernel_maps(struct map_groups *self,
-					  struct map *vmlinux_maps[MAP__NR_TYPES],
-					  const char *vmlinux)
-{
-	struct dso *kernel = dsos__create_kernel(vmlinux);
-
-	if (kernel == NULL)
-		return -1;
-
-	return __map_groups__create_kernel_maps(self, vmlinux_maps, kernel);
-}
-
 static void vmlinux_path__exit(void)
 {
 	while (--vmlinux_path__nr_entries >= 0) {
@@ -1941,19 +1930,22 @@ out_free_comm_list:
 	return -1;
 }
 
-int perf_session__create_kernel_maps(struct perf_session *self)
+int map_groups__create_kernel_maps(struct map_groups *self,
+				   struct map *vmlinux_maps[MAP__NR_TYPES])
 {
-	if (map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps,
-					   symbol_conf.vmlinux_name) < 0)
+	struct dso *kernel = dsos__create_kernel(symbol_conf.vmlinux_name);
+
+	if (kernel == NULL)
 		return -1;
 
-	if (symbol_conf.use_modules &&
-	    perf_session__create_module_maps(self) < 0)
-		pr_debug("Failed to load list of modules for session %s, "
-			 "continuing...\n", self->filename);
+	if (__map_groups__create_kernel_maps(self, vmlinux_maps, kernel) < 0)
+		return -1;
+
+	if (symbol_conf.use_modules && map_groups__create_modules(self) < 0)
+		return -1;
 	/*
 	 * Now that we have all the maps created, just set the ->end of them:
 	 */
-	map_groups__fixup_end(&self->kmaps);
+	map_groups__fixup_end(self);
 	return 0;
 }
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 124302778c09..e6a59e5c2bea 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -80,6 +80,12 @@ static inline void *symbol__priv(struct symbol *self)
 	return ((void *)self) - symbol_conf.priv_size;
 }
 
+struct ref_reloc_sym {
+	const char	*name;
+	u64		addr;
+	u64		unrelocated_addr;
+};
+
 struct addr_location {
 	struct thread *thread;
 	struct map    *map;
@@ -126,12 +132,11 @@ static inline struct dso *dsos__findnew(const char *name)
 	return __dsos__findnew(&dsos__user, name);
 }
 
-struct perf_session;
-
-int dso__load(struct dso *self, struct map *map, struct perf_session *session,
-	      symbol_filter_t filter);
+int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
 int dso__load_vmlinux_path(struct dso *self, struct map *map,
-			   struct perf_session *session, symbol_filter_t filter);
+			   symbol_filter_t filter);
+int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
+		       symbol_filter_t filter);
 void dsos__fprintf(FILE *fp);
 size_t dsos__fprintf_buildid(FILE *fp, bool with_hits);
 
@@ -156,9 +161,5 @@ int kallsyms__parse(const char *filename, void *arg,
 int symbol__init(void);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 
-int perf_session__create_kernel_maps(struct perf_session *self);
-
-struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
-					 const char *filename);
 extern struct dso *vdso;
 #endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 4a08dcf50b68..634b7f7140d5 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -282,14 +282,13 @@ size_t perf_session__fprintf(struct perf_session *self, FILE *fp)
 }
 
 struct symbol *map_groups__find_symbol(struct map_groups *self,
-				       struct perf_session *session,
 				       enum map_type type, u64 addr,
 				       symbol_filter_t filter)
 {
 	struct map *map = map_groups__find(self, type, addr);
 
 	if (map != NULL)
-		return map__find_symbol(map, session, map->map_ip(map, addr), filter);
+		return map__find_symbol(map, map->map_ip(map, addr), filter);
 
 	return NULL;
 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index e35653c1817c..56f317b8a06c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -59,15 +59,14 @@ void thread__find_addr_location(struct thread *self,
 				struct addr_location *al,
 				symbol_filter_t filter);
 struct symbol *map_groups__find_symbol(struct map_groups *self,
-				       struct perf_session *session,
 				       enum map_type type, u64 addr,
 				       symbol_filter_t filter);
 
-static inline struct symbol *
-map_groups__find_function(struct map_groups *self, struct perf_session *session,
-			  u64 addr, symbol_filter_t filter)
+static inline struct symbol *map_groups__find_function(struct map_groups *self,
+						       u64 addr,
+						       symbol_filter_t filter)
 {
-	return map_groups__find_symbol(self, session, MAP__FUNCTION, addr, filter);
+	return map_groups__find_symbol(self, MAP__FUNCTION, addr, filter);
 }
 
 struct map *map_groups__find_by_name(struct map_groups *self,
@@ -76,4 +75,9 @@ struct map *map_groups__find_by_name(struct map_groups *self,
 int __map_groups__create_kernel_maps(struct map_groups *self,
 				     struct map *vmlinux_maps[MAP__NR_TYPES],
 				     struct dso *kernel);
+int map_groups__create_kernel_maps(struct map_groups *self,
+				   struct map *vmlinux_maps[MAP__NR_TYPES]);
+
+struct map *map_groups__new_module(struct map_groups *self, u64 start,
+				   const char *filename);
 #endif	/* __PERF_THREAD_H */

From 6275ce2d5f44ae4f8575c24724525cbb2a3a141b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Feb 2010 16:52:01 -0200
Subject: [PATCH 236/640] perf symbols: Fixup vsyscall maps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While debugging a problem reported by Pekka Enberg by printing
the IP and all the maps for a thread when we don't find a map
for an IP I noticed that dso__load_sym needs to fixup these
extra maps it creates to hold symbols in different ELF sections
than the main kernel one.

Now we're back showing things like:

[root@doppio linux-2.6-tip]# perf report | grep vsyscall
     0.02%             mutt  [kernel.kallsyms].vsyscall_fn  [.] vread_hpet
     0.01%            named  [kernel.kallsyms].vsyscall_fn  [.] vread_hpet
     0.01%   NetworkManager  [kernel.kallsyms].vsyscall_fn  [.] vread_hpet
     0.01%         gconfd-2  [kernel.kallsyms].vsyscall_0   [.] vgettimeofday
     0.01%  hald-addon-rfki  [kernel.kallsyms].vsyscall_fn  [.] vread_hpet
     0.00%      dbus-daemon  [kernel.kallsyms].vsyscall_fn  [.] vread_hpet
[root@doppio linux-2.6-tip]#

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265223128-11786-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 613874260761..051d71b33df0 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1011,7 +1011,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 				if (curr_dso == NULL)
 					goto out_elf_end;
 				curr_map = map__new2(start, curr_dso,
-						     MAP__FUNCTION);
+						     map->type);
 				if (curr_map == NULL) {
 					dso__delete(curr_dso);
 					goto out_elf_end;
@@ -1021,6 +1021,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 				curr_dso->origin = DSO__ORIG_KERNEL;
 				map_groups__insert(kmap->kmaps, curr_map);
 				dsos__add(&dsos__kernel, curr_dso);
+				dso__set_loaded(curr_dso, map->type);
 			} else
 				curr_dso = curr_map->dso;
 
@@ -1058,8 +1059,16 @@ new_symbol:
 	/*
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
-	if (nr > 0)
+	if (nr > 0) {
 		symbols__fixup_end(&self->symbols[map->type]);
+		if (kmap) {
+			/*
+			 * We need to fixup this here too because we create new
+			 * maps here, for things like vsyscall sections.
+			 */
+			__map_groups__fixup_end(kmap->kmaps, map->type);
+		}
+	}
 	err = nr;
 out_elf_end:
 	elf_end(elf);

From 8d92c02ab07602786eaa6d4e5b519395730b3fd3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Feb 2010 16:52:02 -0200
Subject: [PATCH 237/640] perf symbols: Ditch vdso global variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can check using strcmp, most DSOs don't start with '[' so the
test is cheap enough and we had to test it there anyway since
when reading perf.data files we weren't calling the routine that
created this global variable and thus weren't setting it as
"loaded", which was causing a bogus:

  Failed to open [vdso], continuing without symbols

Message as the first line of 'perf report'.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265223128-11786-3-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/map.c    |  7 ++++++-
 tools/perf/util/symbol.c | 26 ++++----------------------
 tools/perf/util/symbol.h |  6 +++++-
 3 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 36ff0bf0315d..f6626cc3df2e 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -68,8 +68,13 @@ struct map *map__new(struct mmap_event *event, enum map_type type,
 		map__init(self, type, event->start, event->start + event->len,
 			  event->pgoff, dso);
 
-		if (self->dso == vdso || anon)
+		if (anon) {
+set_identity:
 			self->map_ip = self->unmap_ip = identity__map_ip;
+		} else if (strcmp(filename, "[vdso]") == 0) {
+			dso__set_loaded(dso, self->type);
+			goto set_identity;
+		}
 	}
 	return self;
 out_delete:
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 051d71b33df0..e752837363ee 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -53,11 +53,6 @@ bool dso__sorted_by_name(const struct dso *self, enum map_type type)
 	return self->sorted_by_name & (1 << type);
 }
 
-static void dso__set_loaded(struct dso *self, enum map_type type)
-{
-	self->loaded |= (1 << type);
-}
-
 static void dso__set_sorted_by_name(struct dso *self, enum map_type type)
 {
 	self->sorted_by_name |= (1 << type);
@@ -1697,7 +1692,6 @@ out_fixup:
 
 LIST_HEAD(dsos__user);
 LIST_HEAD(dsos__kernel);
-struct dso *vdso;
 
 static void dsos__add(struct list_head *head, struct dso *dso)
 {
@@ -1790,24 +1784,12 @@ static struct dso *dsos__create_kernel(const char *vmlinux)
 {
 	struct dso *kernel = dso__new_kernel(vmlinux);
 
-	if (kernel == NULL)
-		return NULL;
-
-	vdso = dso__new("[vdso]");
-	if (vdso == NULL)
-		goto out_delete_kernel_dso;
-	dso__set_loaded(vdso, MAP__FUNCTION);
-
-	dso__read_running_kernel_build_id(kernel);
-
-	dsos__add(&dsos__kernel, kernel);
-	dsos__add(&dsos__user, vdso);
+	if (kernel != NULL) {
+		dso__read_running_kernel_build_id(kernel);
+		dsos__add(&dsos__kernel, kernel);
+	}
 
 	return kernel;
-
-out_delete_kernel_dso:
-	dso__delete(kernel);
-	return NULL;
 }
 
 int __map_groups__create_kernel_maps(struct map_groups *self,
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index e6a59e5c2bea..e90568a9e467 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -121,6 +121,11 @@ void dso__delete(struct dso *self);
 bool dso__loaded(const struct dso *self, enum map_type type);
 bool dso__sorted_by_name(const struct dso *self, enum map_type type);
 
+static inline void dso__set_loaded(struct dso *self, enum map_type type)
+{
+	self->loaded |= (1 << type);
+}
+
 void dso__sort_by_name(struct dso *self, enum map_type type);
 
 extern struct list_head dsos__user, dsos__kernel;
@@ -161,5 +166,4 @@ int kallsyms__parse(const char *filename, void *arg,
 int symbol__init(void);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 
-extern struct dso *vdso;
 #endif /* __PERF_SYMBOL */

From 8ad94c6052649a8e32120b464eefa0ffd8f2f04f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Feb 2010 16:52:03 -0200
Subject: [PATCH 238/640] perf probe: Don't use a perf_session instance just to
 resolve symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the recent modifications done to untie the session and
symbol layers, 'perf probe' now can use just the symbols layer.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-probe.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 4fa73eca1d82..ad47bd4c50ef 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -41,7 +41,6 @@
 #include "util/debugfs.h"
 #include "util/symbol.h"
 #include "util/thread.h"
-#include "util/session.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"	/* For debugfs_path */
 #include "util/probe-finder.h"
@@ -59,8 +58,8 @@ static struct {
 	int nr_probe;
 	struct probe_point probes[MAX_PROBES];
 	struct strlist *dellist;
-	struct perf_session *psession;
-	struct map *kmap;
+	struct map_groups kmap_groups;
+	struct map *kmaps[MAP__NR_TYPES];
 	struct line_range line_range;
 } session;
 
@@ -122,7 +121,8 @@ static int opt_del_probe_event(const struct option *opt __used,
 static void evaluate_probe_point(struct probe_point *pp)
 {
 	struct symbol *sym;
-	sym = map__find_symbol_by_name(session.kmap, pp->function, NULL);
+	sym = map__find_symbol_by_name(session.kmaps[MAP__FUNCTION],
+				       pp->function, NULL);
 	if (!sym)
 		die("Kernel symbol \'%s\' not found - probe not added.",
 		    pp->function);
@@ -131,12 +131,13 @@ static void evaluate_probe_point(struct probe_point *pp)
 #ifndef NO_LIBDWARF
 static int open_vmlinux(void)
 {
-	if (map__load(session.kmap, NULL) < 0) {
+	if (map__load(session.kmaps[MAP__FUNCTION], NULL) < 0) {
 		pr_debug("Failed to load kernel map.\n");
 		return -EINVAL;
 	}
-	pr_debug("Try to open %s\n", session.kmap->dso->long_name);
-	return open(session.kmap->dso->long_name, O_RDONLY);
+	pr_debug("Try to open %s\n",
+		 session.kmaps[MAP__FUNCTION]->dso->long_name);
+	return open(session.kmaps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
 }
 
 static int opt_show_lines(const struct option *opt __used,
@@ -212,12 +213,11 @@ static void init_vmlinux(void)
 		pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
 	if (symbol__init() < 0)
 		die("Failed to init symbol map.");
-	session.psession = perf_session__new(NULL, O_WRONLY, false);
-	if (session.psession == NULL)
-		die("Failed to init perf_session.");
-	session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
-	if (!session.kmap)
-		die("Could not find kernel map.\n");
+
+	map_groups__init(&session.kmap_groups);
+	if (map_groups__create_kernel_maps(&session.kmap_groups,
+					   session.kmaps) < 0)
+		die("Failed to create kernel maps.");
 }
 
 int cmd_probe(int argc, const char **argv, const char *prefix __used)

From 7b2567c1f57c059de29d3f2ca03aca84473865c8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Feb 2010 16:52:04 -0200
Subject: [PATCH 239/640] perf build-id: Move the routine to find DSOs with
 hits to the lib
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because 'perf record' will have to find the build-ids in after
we stop recording, so as to reduce even more the impact in the
workload while we do the measurement.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265223128-11786-5-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile               |  2 ++
 tools/perf/builtin-buildid-list.c | 31 ++----------------------
 tools/perf/util/build-id.c        | 39 +++++++++++++++++++++++++++++++
 tools/perf/util/build-id.h        |  8 +++++++
 4 files changed, 51 insertions(+), 29 deletions(-)
 create mode 100644 tools/perf/util/build-id.c
 create mode 100644 tools/perf/util/build-id.h

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 42969303e20b..3a5fb36ccc97 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -357,6 +357,7 @@ LIB_H += util/include/asm/uaccess.h
 LIB_H += perf.h
 LIB_H += util/cache.h
 LIB_H += util/callchain.h
+LIB_H += util/build-id.h
 LIB_H += util/debug.h
 LIB_H += util/debugfs.h
 LIB_H += util/event.h
@@ -390,6 +391,7 @@ LIB_H += util/probe-event.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
+LIB_OBJS += util/build-id.o
 LIB_OBJS += util/config.o
 LIB_OBJS += util/ctype.o
 LIB_OBJS += util/debugfs.o
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 431f204bde64..d0675c02f81e 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -8,6 +8,7 @@
  */
 #include "builtin.h"
 #include "perf.h"
+#include "util/build-id.h"
 #include "util/cache.h"
 #include "util/debug.h"
 #include "util/parse-options.h"
@@ -33,34 +34,6 @@ static const struct option options[] = {
 	OPT_END()
 };
 
-static int build_id_list__process_event(event_t *event,
-					struct perf_session *session)
-{
-	struct addr_location al;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	struct thread *thread = perf_session__findnew(session, event->ip.pid);
-
-	if (thread == NULL) {
-		pr_err("problem processing %d event, skipping it.\n",
-			event->header.type);
-		return -1;
-	}
-
-	thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al);
-
-	if (al.map != NULL)
-		al.map->dso->hit = 1;
-
-	return 0;
-}
-
-static struct perf_event_ops build_id_list__event_ops = {
-	.sample	= build_id_list__process_event,
-	.mmap	= event__process_mmap,
-	.fork	= event__process_task,
-};
-
 static int __cmd_buildid_list(void)
 {
 	int err = -1;
@@ -71,7 +44,7 @@ static int __cmd_buildid_list(void)
 		return -1;
 
 	if (with_hits)
-		perf_session__process_events(session, &build_id_list__event_ops);
+		perf_session__process_events(session, &build_id__mark_dso_hit_ops);
 
 	dsos__fprintf_buildid(stdout, with_hits);
 
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
new file mode 100644
index 000000000000..04904b35ba81
--- /dev/null
+++ b/tools/perf/util/build-id.c
@@ -0,0 +1,39 @@
+/*
+ * build-id.c
+ *
+ * build-id support
+ *
+ * Copyright (C) 2009, 2010 Red Hat Inc.
+ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "build-id.h"
+#include "event.h"
+#include "symbol.h"
+#include <linux/kernel.h>
+
+static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
+{
+	struct addr_location al;
+	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	struct thread *thread = perf_session__findnew(session, event->ip.pid);
+
+	if (thread == NULL) {
+		pr_err("problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
+			      event->ip.ip, &al);
+
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
+	return 0;
+}
+
+struct perf_event_ops build_id__mark_dso_hit_ops = {
+	.sample	= build_id__mark_dso_hit,
+	.mmap	= event__process_mmap,
+	.fork	= event__process_task,
+};
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
new file mode 100644
index 000000000000..1d981d63cf9a
--- /dev/null
+++ b/tools/perf/util/build-id.h
@@ -0,0 +1,8 @@
+#ifndef PERF_BUILD_ID_H_
+#define PERF_BUILD_ID_H_ 1
+
+#include "session.h"
+
+extern struct perf_event_ops build_id__mark_dso_hit_ops;
+
+#endif

From 6122e4e4f5d0913e319ef8a4dc60a47afe4abc0a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Feb 2010 16:52:05 -0200
Subject: [PATCH 240/640] perf record: Stop intercepting events, use
 postprocessing to get build-ids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We want to stream events as fast as possible to perf.data, and
also in the future we want to have splice working, when no
interception will be possible.

Using build_id__mark_dso_hit_ops to create the list of DSOs that
back MMAPs we also optimize disk usage in the build-id cache by
only caching DSOs that had hits.

Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265223128-11786-6-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c | 37 +++++++++++----------
 tools/perf/util/header.c    |  7 ++--
 tools/perf/util/session.c   | 64 ++++++++++++++++++++++---------------
 tools/perf/util/session.h   |  3 ++
 tools/perf/util/symbol.c    | 13 +++++---
 tools/perf/util/symbol.h    |  2 +-
 6 files changed, 73 insertions(+), 53 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 949167efa1ed..706f00196b87 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -12,6 +12,7 @@
 
 #include "perf.h"
 
+#include "util/build-id.h"
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
@@ -65,6 +66,7 @@ static int			nr_poll				=      0;
 static int			nr_cpu				=      0;
 
 static int			file_new			=      1;
+static off_t			post_processing_offset;
 
 static struct perf_session	*session;
 
@@ -114,26 +116,10 @@ static void write_output(void *buf, size_t size)
 	}
 }
 
-static void write_event(event_t *buf, size_t size)
-{
-	/*
-	* Add it to the list of DSOs, so that when we finish this
-	 * record session we can pick the available build-ids.
-	 */
-	if (buf->header.type == PERF_RECORD_MMAP) {
-		struct list_head *head = &dsos__user;
-		if (buf->mmap.header.misc == 1)
-			head = &dsos__kernel;
-		__dsos__findnew(head, buf->mmap.filename);
-	}
-
-	write_output(buf, size);
-}
-
 static int process_synthesized_event(event_t *event,
 				     struct perf_session *self __used)
 {
-	write_event(event, event->header.size);
+	write_output(event, event->header.size);
 	return 0;
 }
 
@@ -185,14 +171,14 @@ static void mmap_read(struct mmap_data *md)
 		size = md->mask + 1 - (old & md->mask);
 		old += size;
 
-		write_event(buf, size);
+		write_output(buf, size);
 	}
 
 	buf = &data[old & md->mask];
 	size = head - old;
 	old += size;
 
-	write_event(buf, size);
+	write_output(buf, size);
 
 	md->prev = old;
 	mmap_write_tail(md, old);
@@ -402,10 +388,21 @@ static void open_counters(int cpu, pid_t pid)
 	nr_cpu++;
 }
 
+static int process_buildids(void)
+{
+	u64 size = lseek(output, 0, SEEK_CUR);
+
+	session->fd = output;
+	return __perf_session__process_events(session, post_processing_offset,
+					      size - post_processing_offset,
+					      size, &build_id__mark_dso_hit_ops);
+}
+
 static void atexit_header(void)
 {
 	session->header.data_size += bytes_written;
 
+	process_buildids();
 	perf_header__write(&session->header, output, true);
 }
 
@@ -558,6 +555,8 @@ static int __cmd_record(int argc, const char **argv)
 			return err;
 	}
 
+	post_processing_offset = lseek(output, 0, SEEK_CUR);
+
 	err = event__synthesize_kernel_mmap(process_synthesized_event,
 					    session, "_text");
 	if (err < 0) {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ed3efd728b41..d5facd5ab1f7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -205,8 +205,11 @@ static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd)
 	dsos__for_each_with_build_id(pos, head) {
 		int err;
 		struct build_id_event b;
-		size_t len = pos->long_name_len + 1;
+		size_t len;
 
+		if (!pos->hit)
+			continue;
+		len = pos->long_name_len + 1;
 		len = ALIGN(len, NAME_ALIGN);
 		memset(&b, 0, sizeof(b));
 		memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
@@ -371,7 +374,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 	u64 sec_start;
 	int idx = 0, err;
 
-	if (dsos__read_build_ids())
+	if (dsos__read_build_ids(true))
 		perf_header__set_feat(self, HEADER_BUILD_ID);
 
 	nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index aa8a03120bbd..74cbc64a3a3c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -385,8 +385,9 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
 	return thread;
 }
 
-int perf_session__process_events(struct perf_session *self,
-				 struct perf_event_ops *ops)
+int __perf_session__process_events(struct perf_session *self,
+				   u64 data_offset, u64 data_size,
+				   u64 file_size, struct perf_event_ops *ops)
 {
 	int err, mmap_prot, mmap_flags;
 	u64 head, shift;
@@ -396,32 +397,11 @@ int perf_session__process_events(struct perf_session *self,
 	uint32_t size;
 	char *buf;
 
-	if (perf_session__register_idle_thread(self) == NULL)
-		return -ENOMEM;
-
 	perf_event_ops__fill_defaults(ops);
 
 	page_size = sysconf(_SC_PAGESIZE);
 
-	head = self->header.data_offset;
-
-	if (!symbol_conf.full_paths) {
-		char bf[PATH_MAX];
-
-		if (getcwd(bf, sizeof(bf)) == NULL) {
-			err = -errno;
-out_getcwd_err:
-			pr_err("failed to get the current directory\n");
-			goto out_err;
-		}
-		self->cwd = strdup(bf);
-		if (self->cwd == NULL) {
-			err = -ENOMEM;
-			goto out_getcwd_err;
-		}
-		self->cwdlen = strlen(self->cwd);
-	}
-
+	head = data_offset;
 	shift = page_size * (head / page_size);
 	offset += shift;
 	head -= shift;
@@ -486,10 +466,10 @@ more:
 
 	head += size;
 
-	if (offset + head >= self->header.data_offset + self->header.data_size)
+	if (offset + head >= data_offset + data_size)
 		goto done;
 
-	if (offset + head < self->size)
+	if (offset + head < file_size)
 		goto more;
 done:
 	err = 0;
@@ -497,6 +477,38 @@ out_err:
 	return err;
 }
 
+int perf_session__process_events(struct perf_session *self,
+				 struct perf_event_ops *ops)
+{
+	int err;
+
+	if (perf_session__register_idle_thread(self) == NULL)
+		return -ENOMEM;
+
+	if (!symbol_conf.full_paths) {
+		char bf[PATH_MAX];
+
+		if (getcwd(bf, sizeof(bf)) == NULL) {
+			err = -errno;
+out_getcwd_err:
+			pr_err("failed to get the current directory\n");
+			goto out_err;
+		}
+		self->cwd = strdup(bf);
+		if (self->cwd == NULL) {
+			err = -ENOMEM;
+			goto out_getcwd_err;
+		}
+		self->cwdlen = strlen(self->cwd);
+	}
+
+	err = __perf_session__process_events(self, self->header.data_offset,
+					     self->header.data_size,
+					     self->size, ops);
+out_err:
+	return err;
+}
+
 bool perf_session__has_traces(struct perf_session *self, const char *msg)
 {
 	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 752d75aebade..31950fcd8a4d 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -50,6 +50,9 @@ void perf_session__delete(struct perf_session *self);
 
 void perf_event_header__bswap(struct perf_event_header *self);
 
+int __perf_session__process_events(struct perf_session *self,
+				   u64 data_offset, u64 data_size, u64 size,
+				   struct perf_event_ops *ops);
 int perf_session__process_events(struct perf_session *self,
 				 struct perf_event_ops *event_ops);
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e752837363ee..bfb055459670 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1076,25 +1076,28 @@ static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
 	return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
 }
 
-static bool __dsos__read_build_ids(struct list_head *head)
+static bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 {
 	bool have_build_id = false;
 	struct dso *pos;
 
-	list_for_each_entry(pos, head, node)
+	list_for_each_entry(pos, head, node) {
+		if (with_hits && !pos->hit)
+			continue;
 		if (filename__read_build_id(pos->long_name, pos->build_id,
 					    sizeof(pos->build_id)) > 0) {
 			have_build_id	  = true;
 			pos->has_build_id = true;
 		}
+	}
 
 	return have_build_id;
 }
 
-bool dsos__read_build_ids(void)
+bool dsos__read_build_ids(bool with_hits)
 {
-	bool kbuildids = __dsos__read_build_ids(&dsos__kernel),
-	     ubuildids = __dsos__read_build_ids(&dsos__user);
+	bool kbuildids = __dsos__read_build_ids(&dsos__kernel, with_hits),
+	     ubuildids = __dsos__read_build_ids(&dsos__user, with_hits);
 	return kbuildids || ubuildids;
 }
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index e90568a9e467..1b4192ee5300 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -157,7 +157,7 @@ struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
 
 int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
-bool dsos__read_build_ids(void);
+bool dsos__read_build_ids(bool with_hits);
 int build_id__sprintf(const u8 *self, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,

From 29a9f66d703cb9464e24084e09edab5683e1b6b8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 3 Feb 2010 16:52:06 -0200
Subject: [PATCH 241/640] perf tools: Adjust some verbosity levels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not to pollute too much 'perf annotate' debugging sessions.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265223128-11786-7-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-annotate.c          | 33 ++++++++++----------------
 tools/perf/util/include/linux/kernel.h |  1 +
 tools/perf/util/symbol.c               |  9 +++----
 3 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 73c202ee0882..4fc3899bf83a 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -97,9 +97,7 @@ static void hist_hit(struct hist_entry *he, u64 ip)
 	sym_size = sym->end - sym->start;
 	offset = ip - sym->start;
 
-	if (verbose)
-		fprintf(stderr, "%s: ip=%Lx\n", __func__,
-			he->map->unmap_ip(he->map, ip));
+	pr_debug3("%s: ip=%#Lx\n", __func__, he->map->unmap_ip(he->map, ip));
 
 	if (offset >= sym_size)
 		return;
@@ -108,12 +106,8 @@ static void hist_hit(struct hist_entry *he, u64 ip)
 	h->sum++;
 	h->ip[offset]++;
 
-	if (verbose >= 3)
-		printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
-			(void *)(unsigned long)he->sym->start,
-			he->sym->name,
-			(void *)(unsigned long)ip, ip - he->sym->start,
-			h->ip[offset]);
+	pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", he->sym->start,
+		  he->sym->name, ip, ip - he->sym->start, h->ip[offset]);
 }
 
 static int perf_session__add_hist_entry(struct perf_session *self,
@@ -136,14 +130,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 		    event->ip.pid, event->ip.ip);
 
 	if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) {
-		fprintf(stderr, "problem processing %d event, skipping it.\n",
-			event->header.type);
+		pr_warning("problem processing %d event, skipping it.\n",
+			   event->header.type);
 		return -1;
 	}
 
 	if (!al.filtered && perf_session__add_hist_entry(session, &al, 1)) {
-		fprintf(stderr, "problem incrementing symbol count, "
-				"skipping event\n");
+		pr_warning("problem incrementing symbol count, "
+			   "skipping event\n");
 		return -1;
 	}
 
@@ -378,11 +372,9 @@ static void annotate_sym(struct hist_entry *he)
 	if (!filename)
 		return;
 
-	if (verbose)
-		fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n",
-			__func__, filename, sym->name,
-			map->unmap_ip(map, sym->start),
-			map->unmap_ip(map, sym->end));
+	pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__,
+		 filename, sym->name, map->unmap_ip(map, sym->start),
+		 map->unmap_ip(map, sym->end));
 
 	if (full_paths)
 		d_filename = filename;
@@ -542,9 +534,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
 	setup_pager();
 
 	if (field_sep && *field_sep == '.') {
-		fputs("'.' is the only non valid --field-separator argument\n",
-				stderr);
-		exit(129);
+		pr_err("'.' is the only non valid --field-separator argument\n");
+		return -1;
 	}
 
 	return __cmd_annotate();
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index 21c0274c02fa..f2611655ab51 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -101,5 +101,6 @@ simple_strtoul(const char *nptr, char **endptr, int base)
 	eprintf(n, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
 
 #endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index bfb055459670..a60ba2ba1044 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -137,7 +137,7 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name)
 	self->start = start;
 	self->end   = len ? start + len - 1 : start;
 
-	pr_debug3("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end);
+	pr_debug4("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end);
 
 	memcpy(self->name, name, namelen);
 
@@ -1024,9 +1024,10 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
 		}
 
 		if (curr_dso->adjust_symbols) {
-			pr_debug2("adjusting symbol: st_value: %Lx sh_addr: "
-				  "%Lx sh_offset: %Lx\n", (u64)sym.st_value,
-				  (u64)shdr.sh_addr, (u64)shdr.sh_offset);
+			pr_debug4("%s: adjusting symbol: st_value: %#Lx "
+				  "sh_addr: %#Lx sh_offset: %#Lx\n", __func__,
+				  (u64)sym.st_value, (u64)shdr.sh_addr,
+				  (u64)shdr.sh_offset);
 			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 		}
 		/*

From 7a2b6209863626cf8362e5ff4653491558f91e67 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Date: Wed, 3 Feb 2010 16:52:07 -0200
Subject: [PATCH 242/640] perf annotate: Fix it for non-prelinked *.so

The problem was we were incorrectly calculating objdump
addresses for sym->start and sym->end, look:

For simple ET_DYN type DSO (*.so) with one function, objdump -dS
output is something like this:

    000004ac <my_strlen>:
    int my_strlen(const char *s)
     4ac:   55                      push   %ebp
     4ad:   89 e5                   mov    %esp,%ebp
     4af:   83 ec 10                sub    $0x10,%esp
    {

i.e. we have relative-to-dso-mapping IPs (=RIP) there.

For ET_EXEC type and probably for prelinked libs as well (sorry
can't test - I don't use prelink) objdump outputs absolute IPs,
e.g.

    08048604 <zz_strlen>:
    extern "C"
    int zz_strlen(const char *s)
     8048604:       55                      push   %ebp
     8048605:       89 e5                   mov    %esp,%ebp
     8048607:       83 ec 10                sub    $0x10,%esp
    {

So, if sym->start is always relative to dso mapping(*), we'll
have to unmap it for ET_EXEC like cases, and leave as is for
ET_DYN cases.

(*) and it is - we've explicitely made it relative. Look for
    adjust_symbols handling in dso__load_sym()

Previously we were always unmapping sym->start and for ET_DYN
dsos resulting addresses were wrong, and so objdump output was
empty.

The end result was that perf annotate output for symbols from
non-prelinked *.so had always 0.00% percents only, which is
wrong.

To fix it, let's introduce a helper for converting rip to
objdump address, and also let's document what map_ip() and
unmap_ip() do -- I had to study sources for several hours to
understand it.

Signed-off-by: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <1265223128-11786-8-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-annotate.c |  5 +++--
 tools/perf/util/map.c         | 12 ++++++++++++
 tools/perf/util/map.h         |  9 +++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 4fc3899bf83a..28ea4e0c3658 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -189,7 +189,7 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
 			line_ip = -1;
 	}
 
-	start = he->map->unmap_ip(he->map, sym->start);
+	start = map__rip_2objdump(he->map, sym->start);
 
 	if (line_ip != -1) {
 		const char *path = NULL;
@@ -397,7 +397,8 @@ static void annotate_sym(struct hist_entry *he)
 		       dso, dso->long_name, sym, sym->name);
 
 	sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s",
-		map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end),
+		map__rip_2objdump(map, sym->start),
+		map__rip_2objdump(map, sym->end),
 		filename, filename);
 
 	if (verbose >= 3)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index f6626cc3df2e..af5805f51314 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -210,3 +210,15 @@ size_t map__fprintf(struct map *self, FILE *fp)
 	return fprintf(fp, " %Lx-%Lx %Lx %s\n",
 		       self->start, self->end, self->pgoff, self->dso->name);
 }
+
+/*
+ * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
+ * map->dso->adjust_symbols==1 for ET_EXEC-like cases.
+ */
+u64 map__rip_2objdump(struct map *map, u64 rip)
+{
+	u64 addr = map->dso->adjust_symbols ?
+			map->unmap_ip(map, rip) :	/* RIP -> IP */
+			rip;
+	return addr;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index de048399d776..9cee9c788dbf 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -26,8 +26,12 @@ struct map {
 	u64			end;
 	enum map_type		type;
 	u64			pgoff;
+
+	/* ip -> dso rip */
 	u64			(*map_ip)(struct map *, u64);
+	/* dso rip -> ip */
 	u64			(*unmap_ip)(struct map *, u64);
+
 	struct dso		*dso;
 };
 
@@ -56,6 +60,11 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip)
 	return ip;
 }
 
+
+/* rip -> addr suitable for passing to `objdump --start-address=` */
+u64 map__rip_2objdump(struct map *map, u64 rip);
+
+
 struct symbol;
 struct mmap_event;
 

From 6cff0e8dbaa4d5d822a814e5028683d7e71c3291 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Date: Wed, 3 Feb 2010 16:52:08 -0200
Subject: [PATCH 243/640] perf top: Teach it to autolocate vmlinux

By relying on logic in dso__load_kernel_sym(), we can
automatically load vmlinux.

The only thing which needs to be adjusted, is how --sym-annotate
option is handled - now we can't rely on vmlinux been loaded
until full successful pass of dso__load_vmlinux(), but that's
not the case if we'll do sym_filter_entry setup in
symbol_filter().

So move this step right after event__process_sample() where we
know the whole dso__load_kernel_sym() pass is done.

By the way, though conceptually similar `perf top` still can't
annotate userspace - see next patches with fixes.

Signed-off-by: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <1265223128-11786-9-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-top.txt |  2 +-
 tools/perf/builtin-top.c              | 39 ++++++++++++++++-----------
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 4a7d558dc309..785b9fc32a46 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -74,7 +74,7 @@ OPTIONS
 
 -s <symbol>::
 --sym-annotate=<symbol>::
-        Annotate this symbol.  Requires -k option.
+        Annotate this symbol.
 
 -v::
 --verbose::
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1fc018e048e1..83c09c8f28ed 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -94,6 +94,7 @@ struct source_line {
 
 static char			*sym_filter			=   NULL;
 struct sym_entry		*sym_filter_entry		=   NULL;
+struct sym_entry		*sym_filter_entry_sched		=   NULL;
 static int			sym_pcnt_filter			=      5;
 static int			sym_counter			=      0;
 static int			display_weighted		=     -1;
@@ -695,11 +696,9 @@ static void print_mapped_keys(void)
 
 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);
 
-	if (symbol_conf.vmlinux_name) {
-		fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
-		fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
-		fprintf(stdout, "\t[S]     stop annotation.\n");
-	}
+	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
+	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
+	fprintf(stdout, "\t[S]     stop annotation.\n");
 
 	if (nr_counters > 1)
 		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
@@ -725,14 +724,13 @@ static int key_mapped(int c)
 		case 'Q':
 		case 'K':
 		case 'U':
+		case 'F':
+		case 's':
+		case 'S':
 			return 1;
 		case 'E':
 		case 'w':
 			return nr_counters > 1 ? 1 : 0;
-		case 'F':
-		case 's':
-		case 'S':
-			return symbol_conf.vmlinux_name ? 1 : 0;
 		default:
 			break;
 	}
@@ -910,8 +908,12 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 	syme = symbol__priv(sym);
 	syme->map = map;
 	syme->src = NULL;
-	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
-		sym_filter_entry = syme;
+
+	if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
+		/* schedule initial sym_filter_entry setup */
+		sym_filter_entry_sched = syme;
+		sym_filter = NULL;
+	}
 
 	for (i = 0; skip_symbols[i]; i++) {
 		if (!strcmp(skip_symbols[i], name)) {
@@ -976,6 +978,13 @@ static void event__process_sample(const event_t *self,
 		return;
 	}
 
+	/* let's see, whether we need to install initial sym_filter_entry */
+	if (sym_filter_entry_sched) {
+		sym_filter_entry = sym_filter_entry_sched;
+		sym_filter_entry_sched = NULL;
+		parse_source(sym_filter_entry);
+	}
+
 	syme = symbol__priv(al.sym);
 	if (!syme->skip) {
 		syme->count[counter]++;
@@ -1270,7 +1279,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN('i', "inherit", &inherit,
 		    "child tasks inherit counters"),
 	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
-		    "symbol to annotate - requires -k option"),
+		    "symbol to annotate"),
 	OPT_BOOLEAN('z', "zero", &zero,
 		    "zero history across updates"),
 	OPT_INTEGER('F', "freq", &freq,
@@ -1306,16 +1315,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 
 	symbol_conf.priv_size = (sizeof(struct sym_entry) +
 				 (nr_counters + 1) * sizeof(unsigned long));
-	if (symbol_conf.vmlinux_name == NULL)
-		symbol_conf.try_vmlinux_path = true;
+
+	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 	if (symbol__init() < 0)
 		return -1;
 
 	if (delay_secs < 1)
 		delay_secs = 1;
 
-	parse_source(sym_filter_entry);
-
 	/*
 	 * User specified count overrides default frequency.
 	 */

From 57d818895f9d294ab9080e5a662675fdee943ff1 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 4 Feb 2010 07:31:46 +0100
Subject: [PATCH 244/640] perf annotate: Fix perf top module symbol annotation

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1265265106.6364.5.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 83c09c8f28ed..e4156bc4566d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -204,8 +204,8 @@ static void parse_source(struct sym_entry *syme)
 	sprintf(command,
 		"objdump --start-address=0x%016Lx "
 			 "--stop-address=0x%016Lx -dS %s",
-		map->unmap_ip(map, sym->start),
-		map->unmap_ip(map, sym->end), path);
+		map__rip_2objdump(map, sym->start),
+		map__rip_2objdump(map, sym->end), path);
 
 	file = popen(command, "r");
 	if (!file)

From 615d0ebbc782b67296e3226c293f520f93f93515 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 2 Feb 2010 16:49:04 -0500
Subject: [PATCH 245/640] kprobes: Disable booster when CONFIG_PREEMPT=y

Disable kprobe booster when CONFIG_PREEMPT=y at this time,
because it can't ensure that all kernel threads preempted on
kprobe's boosted slot run out from the slot even using
freeze_processes().

The booster on preemptive kernel will be resumed if
synchronize_tasks() or something like that is introduced.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100202214904.4694.24330.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/kernel/kprobes.c |  2 +-
 arch/x86/kernel/kprobes.c  |  2 +-
 kernel/kprobes.c           | 29 ++---------------------------
 3 files changed, 4 insertions(+), 29 deletions(-)

diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 9adac441ac9b..7026b29e277a 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -870,7 +870,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		return 1;
 
 ss_probe:
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER)
+#if !defined(CONFIG_PREEMPT)
 	if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
 		/* Boost up -- we can execute copied instructions directly */
 		ia64_psr(regs)->ri = p->ainsn.slot;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5b8c7505b3bc..9453815138fa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -429,7 +429,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
 				       struct kprobe_ctlblk *kcb)
 {
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER)
+#if !defined(CONFIG_PREEMPT)
 	if (p->ainsn.boostable == 1 && !p->post_handler) {
 		/* Boost up -- we can execute copied instructions directly */
 		reset_current_kprobe();
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b7df302a0204..9907a03c29f6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -124,30 +124,6 @@ static LIST_HEAD(kprobe_insn_pages);
 static int kprobe_garbage_slots;
 static int collect_garbage_slots(void);
 
-static int __kprobes check_safety(void)
-{
-	int ret = 0;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER)
-	ret = freeze_processes();
-	if (ret == 0) {
-		struct task_struct *p, *q;
-		do_each_thread(p, q) {
-			if (p != current && p->state == TASK_RUNNING &&
-			    p->pid != 0) {
-				printk("Check failed: %s is running\n",p->comm);
-				ret = -1;
-				goto loop_end;
-			}
-		} while_each_thread(p, q);
-	}
-loop_end:
-	thaw_processes();
-#else
-	synchronize_sched();
-#endif
-	return ret;
-}
-
 /**
  * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
@@ -235,9 +211,8 @@ static int __kprobes collect_garbage_slots(void)
 {
 	struct kprobe_insn_page *kip, *next;
 
-	/* Ensure no-one is preepmted on the garbages */
-	if (check_safety())
-		return -EAGAIN;
+	/* Ensure no-one is interrupted on the garbages */
+	synchronize_sched();
 
 	list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) {
 		int i;

From 2cfa19780d61740f65790c5bae363b759d7c96fa Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 2 Feb 2010 16:49:11 -0500
Subject: [PATCH 246/640] ftrace/alternatives: Introducing *_text_reserved
 functions

Introducing *_text_reserved functions for checking the text
address range is partially reserved or not. This patch provides
checking routines for x86 smp alternatives and dynamic ftrace.
Since both functions modify fixed pieces of kernel text, they
should reserve and protect those from other dynamic text
modifier, like kprobes.

This will also be extended when introducing other subsystems
which modify fixed pieces of kernel text. Dynamic text modifiers
should avoid those.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: przemyslaw@pawelczyk.it
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <20100202214911.4694.16587.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/alternative.h |  5 +++++
 arch/x86/kernel/alternative.c      | 16 ++++++++++++++++
 include/linux/ftrace.h             |  6 ++++++
 kernel/trace/ftrace.c              | 15 +++++++++++++++
 4 files changed, 42 insertions(+)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7b877f..ac80b7d70014 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
 					void *text, void *text_end);
 extern void alternatives_smp_module_del(struct module *mod);
 extern void alternatives_smp_switch(int smp);
+extern int alternatives_text_reserved(void *start, void *end);
 #else
 static inline void alternatives_smp_module_add(struct module *mod, char *name,
 					       void *locks, void *locks_end,
 					       void *text, void *text_end) {}
 static inline void alternatives_smp_module_del(struct module *mod) {}
 static inline void alternatives_smp_switch(int smp) {}
+static inline int alternatives_text_reserved(void *start, void *end)
+{
+	return 0;
+}
 #endif	/* CONFIG_SMP */
 
 /* alternative assembly primitive: */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c0ce9c..3c13284ff86d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -390,6 +390,22 @@ void alternatives_smp_switch(int smp)
 	mutex_unlock(&smp_alt);
 }
 
+/* Return 1 if the address range is reserved for smp-alternatives */
+int alternatives_text_reserved(void *start, void *end)
+{
+	struct smp_alt_module *mod;
+	u8 **ptr;
+
+	list_for_each_entry(mod, &smp_alt_modules, next) {
+		if (mod->text > end || mod->text_end < start)
+			continue;
+		for (ptr = mod->locks; ptr < mod->locks_end; ptr++)
+			if (start <= *ptr && end >= *ptr)
+				return 1;
+	}
+
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_PARAVIRT
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0b4f97d24d7f..9d127efed43c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -134,6 +134,8 @@ extern void
 unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops);
 extern void unregister_ftrace_function_probe_all(char *glob);
 
+extern int ftrace_text_reserved(void *start, void *end);
+
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
 	FTRACE_FL_FAILED	= (1 << 1),
@@ -250,6 +252,10 @@ static inline int unregister_ftrace_command(char *cmd_name)
 {
 	return -EINVAL;
 }
+static inline int ftrace_text_reserved(void *start, void *end)
+{
+	return 0;
+}
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e6640f80454..3d90661a5f40 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1025,6 +1025,21 @@ static void ftrace_bug(int failed, unsigned long ip)
 }
 
 
+/* Return 1 if the address range is reserved for ftrace */
+int ftrace_text_reserved(void *start, void *end)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+
+	do_for_each_ftrace_rec(pg, rec) {
+		if (rec->ip <= (unsigned long)end &&
+		    rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
+			return 1;
+	} while_for_each_ftrace_rec();
+	return 0;
+}
+
+
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {

From 4554dbcb85a4ed2abaa2b6fa15649b796699ec89 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 2 Feb 2010 16:49:18 -0500
Subject: [PATCH 247/640] kprobes: Check probe address is reserved

Check whether the address of new probe is already reserved by
ftrace or alternatives (on x86) when registering new probe.
If reserved, it returns an error and not register the probe.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: przemyslaw@pawelczyk.it
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <20100202214918.4694.94179.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kprobes.c | 3 +++
 kernel/kprobes.c          | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 9453815138fa..5de9f4a9c3fd 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -337,6 +337,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
 
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
+	if (alternatives_text_reserved(p->addr, p->addr))
+		return -EINVAL;
+
 	if (!can_probe((unsigned long)p->addr))
 		return -EILSEQ;
 	/* insn: must be on special executable page on x86. */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9907a03c29f6..c3340e836c37 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -44,6 +44,7 @@
 #include <linux/debugfs.h>
 #include <linux/kdebug.h>
 #include <linux/memory.h>
+#include <linux/ftrace.h>
 
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
@@ -703,7 +704,8 @@ int __kprobes register_kprobe(struct kprobe *p)
 
 	preempt_disable();
 	if (!kernel_text_address((unsigned long) p->addr) ||
-	    in_kprobes_functions((unsigned long) p->addr)) {
+	    in_kprobes_functions((unsigned long) p->addr) ||
+	    ftrace_text_reserved(p->addr, p->addr)) {
 		preempt_enable();
 		return -EINVAL;
 	}

From f24bb999d2b9f2950e5cac5b69bffedf73c24ea4 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 2 Feb 2010 16:49:25 -0500
Subject: [PATCH 248/640] ftrace: Remove record freezing

Remove record freezing. Because kprobes never puts probe on
ftrace's mcount call anymore, it doesn't need ftrace to check
whether kprobes on it.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: przemyslaw@pawelczyk.it
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20100202214925.4694.73469.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  1 -
 kernel/trace/ftrace.c  | 39 ---------------------------------------
 2 files changed, 40 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9d127efed43c..eb054ae95605 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -143,7 +143,6 @@ enum {
 	FTRACE_FL_ENABLED	= (1 << 3),
 	FTRACE_FL_NOTRACE	= (1 << 4),
 	FTRACE_FL_CONVERTED	= (1 << 5),
-	FTRACE_FL_FROZEN	= (1 << 6),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3d90661a5f40..1904797f4a8a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,7 +22,6 @@
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
 #include <linux/uaccess.h>
-#include <linux/kprobes.h>
 #include <linux/ftrace.h>
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
 		}				\
 	}
 
-#ifdef CONFIG_KPROBES
-
-static int frozen_record_count;
-
-static inline void freeze_record(struct dyn_ftrace *rec)
-{
-	if (!(rec->flags & FTRACE_FL_FROZEN)) {
-		rec->flags |= FTRACE_FL_FROZEN;
-		frozen_record_count++;
-	}
-}
-
-static inline void unfreeze_record(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_FROZEN) {
-		rec->flags &= ~FTRACE_FL_FROZEN;
-		frozen_record_count--;
-	}
-}
-
-static inline int record_frozen(struct dyn_ftrace *rec)
-{
-	return rec->flags & FTRACE_FL_FROZEN;
-}
-#else
-# define freeze_record(rec)			({ 0; })
-# define unfreeze_record(rec)			({ 0; })
-# define record_frozen(rec)			({ 0; })
-#endif /* CONFIG_KPROBES */
-
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
 	rec->freelist = ftrace_free_records;
@@ -1091,14 +1060,6 @@ static void ftrace_replace_code(int enable)
 		    !(rec->flags & FTRACE_FL_CONVERTED))
 			continue;
 
-		/* ignore updates to this record's mcount site */
-		if (get_kprobe((void *)rec->ip)) {
-			freeze_record(rec);
-			continue;
-		} else {
-			unfreeze_record(rec);
-		}
-
 		failed = __ftrace_replace_code(rec, enable);
 		if (failed) {
 			rec->flags |= FTRACE_FL_FAILED;

From 9717e6cd3db22eade7dbae0fc9235c66325a7132 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 28 Jan 2010 13:57:44 +0100
Subject: [PATCH 249/640] perf_events: Optimize perf_event_task_tick()

Pretty much all of the calls do perf_disable/perf_enable cycles, pull
that out to cut back on hardware programming.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 40f8b07c5601..087025fe3ba1 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1573,12 +1573,8 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	raw_spin_lock(&ctx->lock);
 
 	/* Rotate the first entry last of non-pinned groups */
-	perf_disable();
-
 	list_rotate_left(&ctx->flexible_groups);
 
-	perf_enable();
-
 	raw_spin_unlock(&ctx->lock);
 }
 
@@ -1593,6 +1589,8 @@ void perf_event_task_tick(struct task_struct *curr)
 	cpuctx = &__get_cpu_var(perf_cpu_context);
 	ctx = curr->perf_event_ctxp;
 
+	perf_disable();
+
 	perf_ctx_adjust_freq(&cpuctx->ctx);
 	if (ctx)
 		perf_ctx_adjust_freq(ctx);
@@ -1608,6 +1606,8 @@ void perf_event_task_tick(struct task_struct *curr)
 	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
 	if (ctx)
 		task_ctx_sched_in(curr, EVENT_FLEXIBLE);
+
+	perf_enable();
 }
 
 static int event_enable_on_exec(struct perf_event *event,

From 8c48e444191de0ff84e85d41180d7bc3e74f14ef Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 Jan 2010 13:25:31 +0100
Subject: [PATCH 250/640] perf_events, x86: Implement intel core solo/duo
 support

Implement Intel Core Solo/Duo, aka.
Intel Architectural Performance Monitoring Version 1.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 133 ++++++++++++++-----------------
 1 file changed, 61 insertions(+), 72 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1846ead0576b..5b91992b6b25 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -227,6 +227,17 @@ static const u64 intel_perfmon_event_map[] =
 };
 
 static struct event_constraint intel_core_event_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+	INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_core2_event_constraints[] =
 {
 	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
 	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
@@ -1216,7 +1227,7 @@ static void intel_pmu_disable_all(void)
 		intel_pmu_disable_bts();
 }
 
-static void amd_pmu_disable_all(void)
+static void x86_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
@@ -1226,11 +1237,11 @@ static void amd_pmu_disable_all(void)
 
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
-		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		rdmsrl(x86_pmu.eventsel + idx, val);
 		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
 			continue;
 		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		wrmsrl(x86_pmu.eventsel + idx, val);
 	}
 }
 
@@ -1278,7 +1289,7 @@ static void intel_pmu_enable_all(void)
 	}
 }
 
-static void amd_pmu_enable_all(void)
+static void x86_pmu_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
@@ -1292,7 +1303,7 @@ static void amd_pmu_enable_all(void)
 
 		val = event->hw.config;
 		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		wrmsrl(x86_pmu.eventsel + idx, val);
 	}
 }
 
@@ -1546,7 +1557,7 @@ static inline void intel_pmu_ack_status(u64 ack)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 }
 
-static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	(void)checking_wrmsrl(hwc->config_base + idx,
 			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
@@ -1598,12 +1609,6 @@ intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
 	x86_pmu_disable_event(hwc, idx);
 }
 
-static inline void
-amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
-{
-	x86_pmu_disable_event(hwc, idx);
-}
-
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 
 /*
@@ -1723,15 +1728,14 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	x86_pmu_enable_event(hwc, idx);
+	__x86_pmu_enable_event(hwc, idx);
 }
 
-static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
 	if (cpuc->enabled)
-		x86_pmu_enable_event(hwc, idx);
+		__x86_pmu_enable_event(hwc, idx);
 }
 
 /*
@@ -1988,50 +1992,6 @@ static void intel_pmu_reset(void)
 	local_irq_restore(flags);
 }
 
-static int p6_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	struct perf_event *event;
-	struct hw_perf_event *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-	data.raw = NULL;
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
-
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		event = cpuc->events[idx];
-		hwc = &event->hw;
-
-		val = x86_perf_event_update(event, hwc, idx);
-		if (val & (1ULL << (x86_pmu.event_bits - 1)))
-			continue;
-
-		/*
-		 * event overflow
-		 */
-		handled		= 1;
-		data.period	= event->hw.last_period;
-
-		if (!x86_perf_event_set_period(event, hwc, idx))
-			continue;
-
-		if (perf_event_overflow(event, 1, &data, regs))
-			p6_pmu_disable_event(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
-}
-
 /*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
@@ -2098,7 +2058,7 @@ again:
 	return 1;
 }
 
-static int amd_pmu_handle_irq(struct pt_regs *regs)
+static int x86_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
@@ -2133,7 +2093,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 			continue;
 
 		if (perf_event_overflow(event, 1, &data, regs))
-			amd_pmu_disable_event(hwc, idx);
+			x86_pmu.disable(hwc, idx);
 	}
 
 	if (handled)
@@ -2374,7 +2334,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
 
 static __initconst struct x86_pmu p6_pmu = {
 	.name			= "p6",
-	.handle_irq		= p6_pmu_handle_irq,
+	.handle_irq		= x86_pmu_handle_irq,
 	.disable_all		= p6_pmu_disable_all,
 	.enable_all		= p6_pmu_enable_all,
 	.enable			= p6_pmu_enable_event,
@@ -2401,6 +2361,29 @@ static __initconst struct x86_pmu p6_pmu = {
 	.event_constraints	= intel_p6_event_constraints
 };
 
+static __initconst struct x86_pmu core_pmu = {
+	.name			= "core",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.get_event_constraints	= intel_get_event_constraints,
+	.event_constraints	= intel_core_event_constraints,
+};
+
 static __initconst struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
@@ -2427,11 +2410,11 @@ static __initconst struct x86_pmu intel_pmu = {
 
 static __initconst struct x86_pmu amd_pmu = {
 	.name			= "AMD",
-	.handle_irq		= amd_pmu_handle_irq,
-	.disable_all		= amd_pmu_disable_all,
-	.enable_all		= amd_pmu_enable_all,
-	.enable			= amd_pmu_enable_event,
-	.disable		= amd_pmu_disable_event,
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
 	.eventsel		= MSR_K7_EVNTSEL0,
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
@@ -2498,9 +2481,10 @@ static __init int intel_pmu_init(void)
 
 	version = eax.split.version_id;
 	if (version < 2)
-		return -ENODEV;
+		x86_pmu = core_pmu;
+	else
+		x86_pmu = intel_pmu;
 
-	x86_pmu				= intel_pmu;
 	x86_pmu.version			= version;
 	x86_pmu.num_events		= eax.split.num_events;
 	x86_pmu.event_bits		= eax.split.bit_width;
@@ -2510,12 +2494,17 @@ static __init int intel_pmu_init(void)
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
 	 */
-	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
+	if (version > 1)
+		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
 
 	/*
 	 * Install the hw-cache-events table:
 	 */
 	switch (boot_cpu_data.x86_model) {
+	case 14: /* 65 nm core solo/duo, "Yonah" */
+		pr_cont("Core events, ");
+		break;
+
 	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
 	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
 	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
@@ -2523,7 +2512,7 @@ static __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
-		x86_pmu.event_constraints = intel_core_event_constraints;
+		x86_pmu.event_constraints = intel_core2_event_constraints;
 		pr_cont("Core2 events, ");
 		break;
 

From fce877e3a429940a986e085a41e8b57f2d922e36 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 Jan 2010 13:25:12 +0100
Subject: [PATCH 251/640] bitops: Ensure the compile time HWEIGHT is only used
 for such

Avoid accidental misuse by failing to compile things

Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 10 +++++++---
 include/linux/bitops.h           | 33 +++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5b91992b6b25..96cfc1a4fe9f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,13 +93,16 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
-#define EVENT_CONSTRAINT(c, n, m) {	\
+#define __EVENT_CONSTRAINT(c, n, m, w) {\
 	{ .idxmsk64[0] = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
-	.weight = HWEIGHT64((u64)(n)),	\
+	.weight = (w),			\
 }
 
+#define EVENT_CONSTRAINT(c, n, m)	\
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
@@ -2622,7 +2625,8 @@ void __init init_hw_perf_events(void)
 	register_die_notifier(&perf_event_nmi_notifier);
 
 	unconstrained = (struct event_constraint)
-		EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
+		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
+				   0, x86_pmu.num_events);
 
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index ba0fd1eb4af7..25b8b2f33ae9 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -45,19 +45,30 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
-#define HWEIGHT8(w)			\
-      (	(!!((w) & (1ULL << 0))) +	\
-	(!!((w) & (1ULL << 1))) +	\
-	(!!((w) & (1ULL << 2))) +	\
-	(!!((w) & (1ULL << 3))) +	\
-	(!!((w) & (1ULL << 4))) +	\
-	(!!((w) & (1ULL << 5))) +	\
-	(!!((w) & (1ULL << 6))) +	\
+/*
+ * Clearly slow versions of the hweightN() functions, their benefit is
+ * of course compile time evaluation of constant arguments.
+ */
+#define HWEIGHT8(w)					\
+      (	BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) +	\
+	(!!((w) & (1ULL << 0))) +			\
+	(!!((w) & (1ULL << 1))) +			\
+	(!!((w) & (1ULL << 2))) +			\
+	(!!((w) & (1ULL << 3))) +			\
+	(!!((w) & (1ULL << 4))) +			\
+	(!!((w) & (1ULL << 5))) +			\
+	(!!((w) & (1ULL << 6))) +			\
 	(!!((w) & (1ULL << 7)))	)
 
-#define HWEIGHT16(w) (HWEIGHT8(w)  + HWEIGHT8(w >> 8))
-#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
-#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+#define HWEIGHT16(w) (HWEIGHT8(w)  + HWEIGHT8((w) >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
+
+/*
+ * Type invariant version that simply casts things to the
+ * largest type.
+ */
+#define HWEIGHT(w)   HWEIGHT64((u64)(w))
 
 /**
  * rol32 - rotate a 32-bit value left

From 447a194b393f32699607fd99617a40abd6a95114 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 1 Feb 2010 14:50:01 +0200
Subject: [PATCH 252/640] perf_events, x86: Fix bug in hw_perf_enable()

We cannot assume that because hwc->idx == assign[i], we can avoid
reprogramming the counter in hw_perf_enable().

The event may have been scheduled out and another event may have been
programmed into this counter. Thus, we need a more robust way of
verifying if the counter still contains config/data related to an event.

This patch adds a generation number to each counter on each cpu. Using
this mechanism we can verify reliabilty whether the content of a counter
corresponds to an event.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4b66dc67.0b38560a.1635.ffffae18@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 34 ++++++++++++++++++++++++++------
 include/linux/perf_event.h       |  2 ++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 96cfc1a4fe9f..a920f173a220 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -90,6 +90,7 @@ struct cpu_hw_events {
 	int			n_events;
 	int			n_added;
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+	u64			tags[X86_PMC_IDX_MAX];
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
@@ -1142,6 +1143,8 @@ static int __hw_perf_event_init(struct perf_event *event)
 	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 
 	hwc->idx = -1;
+	hwc->last_cpu = -1;
+	hwc->last_tag = ~0ULL;
 
 	/*
 	 * Count user and OS events unless requested not to.
@@ -1457,11 +1460,14 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 	return n;
 }
 
-
 static inline void x86_assign_hw_event(struct perf_event *event,
-				struct hw_perf_event *hwc, int idx)
+				struct cpu_hw_events *cpuc, int i)
 {
-	hwc->idx = idx;
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->idx = cpuc->assign[i];
+	hwc->last_cpu = smp_processor_id();
+	hwc->last_tag = ++cpuc->tags[i];
 
 	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
 		hwc->config_base = 0;
@@ -1480,6 +1486,15 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	}
 }
 
+static inline int match_prev_assignment(struct hw_perf_event *hwc,
+					struct cpu_hw_events *cpuc,
+					int i)
+{
+	return hwc->idx == cpuc->assign[i] &&
+		hwc->last_cpu == smp_processor_id() &&
+		hwc->last_tag == cpuc->tags[i];
+}
+
 static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
 
 void hw_perf_enable(void)
@@ -1508,7 +1523,14 @@ void hw_perf_enable(void)
 			event = cpuc->event_list[i];
 			hwc = &event->hw;
 
-			if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
+			/*
+			 * we can avoid reprogramming counter if:
+			 * - assigned same counter as last time
+			 * - running on same CPU as last time
+			 * - no other event has used the counter since
+			 */
+			if (hwc->idx == -1 ||
+			    match_prev_assignment(hwc, cpuc, i))
 				continue;
 
 			__x86_pmu_disable(event, cpuc);
@@ -1522,12 +1544,12 @@ void hw_perf_enable(void)
 			hwc = &event->hw;
 
 			if (hwc->idx == -1) {
-				x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+				x86_assign_hw_event(event, cpuc, i);
 				x86_perf_event_set_period(event, hwc, hwc->idx);
 			}
 			/*
 			 * need to mark as active because x86_pmu_disable()
-			 * clear active_mask and eventsp[] yet it preserves
+			 * clear active_mask and events[] yet it preserves
 			 * idx
 			 */
 			set_bit(hwc->idx, cpuc->active_mask);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 556b0f4a668e..071a7db52549 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -478,9 +478,11 @@ struct hw_perf_event {
 	union {
 		struct { /* hardware */
 			u64		config;
+			u64		last_tag;
 			unsigned long	config_base;
 			unsigned long	event_base;
 			int		idx;
+			int		last_cpu;
 		};
 		struct { /* software */
 			s64		remaining;

From f887f3019e56389a73617f4e70f512e82cc89adb Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Thu, 4 Feb 2010 16:46:42 +0800
Subject: [PATCH 253/640] perf tools: Clean up O_LARGEFILE et al usage

Setting _FILE_OFFSET_BITS and using O_LARGEFILE, lseek64, etc,
is redundant. Thanks H. Peter Anvin for pointing it out.

So, this patch removes O_LARGEFILE, lseek64, etc.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B6A8972.3070605@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c        |  3 +--
 tools/perf/util/header.c           | 21 ++++++++++-----------
 tools/perf/util/session.c          |  3 +--
 tools/perf/util/trace-event-read.c | 20 ++++++++++----------
 4 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 706f00196b87..3ad599b12c91 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -5,7 +5,6 @@
  * (or a CPU, or a PID) into the perf.data output file - for
  * later analysis via perf report.
  */
-#define _LARGEFILE64_SOURCE
 #define _FILE_OFFSET_BITS 64
 
 #include "builtin.h"
@@ -451,7 +450,7 @@ static int __cmd_record(int argc, const char **argv)
 		append_file = 0;
 	}
 
-	flags = O_CREAT|O_RDWR|O_LARGEFILE;
+	flags = O_CREAT|O_RDWR;
 	if (append_file)
 		file_new = 0;
 	else
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d5facd5ab1f7..6c9aa16ee51f 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1,4 +1,3 @@
-#define _LARGEFILE64_SOURCE
 #define _FILE_OFFSET_BITS 64
 
 #include <sys/types.h>
@@ -388,7 +387,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 	sec_size = sizeof(*feat_sec) * nr_sections;
 
 	sec_start = self->data_offset + self->data_size;
-	lseek64(fd, sec_start + sec_size, SEEK_SET);
+	lseek(fd, sec_start + sec_size, SEEK_SET);
 
 	if (perf_header__has_feat(self, HEADER_TRACE_INFO)) {
 		struct perf_file_section *trace_sec;
@@ -396,9 +395,9 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 		trace_sec = &feat_sec[idx++];
 
 		/* Write trace info */
-		trace_sec->offset = lseek64(fd, 0, SEEK_CUR);
+		trace_sec->offset = lseek(fd, 0, SEEK_CUR);
 		read_tracing_data(fd, attrs, nr_counters);
-		trace_sec->size = lseek64(fd, 0, SEEK_CUR) - trace_sec->offset;
+		trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
 	}
 
 
@@ -408,18 +407,18 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 		buildid_sec = &feat_sec[idx++];
 
 		/* Write build-ids */
-		buildid_sec->offset = lseek64(fd, 0, SEEK_CUR);
+		buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
 		err = dsos__write_buildid_table(fd);
 		if (err < 0) {
 			pr_debug("failed to write buildid table\n");
 			goto out_free;
 		}
-		buildid_sec->size = lseek64(fd, 0, SEEK_CUR) -
-					    buildid_sec->offset;
+		buildid_sec->size = lseek(fd, 0, SEEK_CUR) -
+					  buildid_sec->offset;
 		dsos__cache_build_ids();
 	}
 
-	lseek64(fd, sec_start, SEEK_SET);
+	lseek(fd, sec_start, SEEK_SET);
 	err = do_write(fd, feat_sec, sec_size);
 	if (err < 0)
 		pr_debug("failed to write feature section\n");
@@ -513,7 +512,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
 		pr_debug("failed to write perf header\n");
 		return err;
 	}
-	lseek64(fd, self->data_offset + self->data_size, SEEK_SET);
+	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
 
 	self->frozen = 1;
 	return 0;
@@ -567,7 +566,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
 
 	sec_size = sizeof(*feat_sec) * nr_sections;
 
-	lseek64(fd, self->data_offset + self->data_size, SEEK_SET);
+	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
 
 	if (perf_header__getbuffer64(self, fd, feat_sec, sec_size))
 		goto out_free;
@@ -641,7 +640,7 @@ static int perf_file_section__process(struct perf_file_section *self,
 				      struct perf_header *ph,
 				      int feat, int fd)
 {
-	if (lseek64(fd, self->offset, SEEK_SET) < 0) {
+	if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) {
 		pr_debug("Failed to lseek to %Ld offset for feature %d, "
 			 "continuing...\n", self->offset, feat);
 		return 0;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 74cbc64a3a3c..0de7258e70a5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,4 +1,3 @@
-#define _LARGEFILE64_SOURCE
 #define _FILE_OFFSET_BITS 64
 
 #include <linux/kernel.h>
@@ -15,7 +14,7 @@ static int perf_session__open(struct perf_session *self, bool force)
 {
 	struct stat input_stat;
 
-	self->fd = open(self->filename, O_RDONLY|O_LARGEFILE);
+	self->fd = open(self->filename, O_RDONLY);
 	if (self->fd < 0) {
 		pr_err("failed to open file: %s", self->filename);
 		if (!strcmp(self->filename, "perf.data"))
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index ca3c26d466f3..7cd1193918c7 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -18,7 +18,7 @@
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
-#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
 
 #include <dirent.h>
 #include <stdio.h>
@@ -83,7 +83,7 @@ static char *read_string(void)
 	char *str = NULL;
 	int size = 0;
 	int i;
-	s64 r;
+	off_t r;
 
 	for (;;) {
 		r = read(input_fd, buf, BUFSIZ);
@@ -117,8 +117,8 @@ static char *read_string(void)
 	i++;
 
 	/* move the file descriptor to the end of the string */
-	r = lseek64(input_fd, -(r - i), SEEK_CUR);
-	if (r < 0)
+	r = lseek(input_fd, -(r - i), SEEK_CUR);
+	if (r == (off_t)-1)
 		die("lseek");
 
 	if (str) {
@@ -282,8 +282,8 @@ static void update_cpu_data_index(int cpu)
 
 static void get_next_page(int cpu)
 {
-	off64_t save_seek;
-	off64_t ret;
+	off_t save_seek;
+	off_t ret;
 
 	if (!cpu_data[cpu].page)
 		return;
@@ -298,17 +298,17 @@ static void get_next_page(int cpu)
 		update_cpu_data_index(cpu);
 
 		/* other parts of the code may expect the pointer to not move */
-		save_seek = lseek64(input_fd, 0, SEEK_CUR);
+		save_seek = lseek(input_fd, 0, SEEK_CUR);
 
-		ret = lseek64(input_fd, cpu_data[cpu].offset, SEEK_SET);
-		if (ret < 0)
+		ret = lseek(input_fd, cpu_data[cpu].offset, SEEK_SET);
+		if (ret == (off_t)-1)
 			die("failed to lseek");
 		ret = read(input_fd, cpu_data[cpu].page, page_size);
 		if (ret < 0)
 			die("failed to read page");
 
 		/* reset the file pointer back */
-		lseek64(input_fd, save_seek, SEEK_SET);
+		lseek(input_fd, save_seek, SEEK_SET);
 
 		return;
 	}

From 2161db969313cb94ffd9377a525fb75c3fee9eeb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 4 Feb 2010 10:22:01 +0100
Subject: [PATCH 254/640] perf tools: Fix session init on non-modular kernels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

perf top and perf record refuses to initialize on non-modular kernels:
refuse to initialize:

 $ perf top -v
  map_groups__set_modules_path_dir: cannot open /lib/modules/2.6.33-rc6-tip-00586-g398dde3-dirty/

Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265223128-11786-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a60ba2ba1044..6882e9fec2d6 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1937,7 +1937,7 @@ int map_groups__create_kernel_maps(struct map_groups *self,
 		return -1;
 
 	if (symbol_conf.use_modules && map_groups__create_modules(self) < 0)
-		return -1;
+		return 0;
 	/*
 	 * Now that we have all the maps created, just set the ->end of them:
 	 */

From f044ba7835b84e69c68b620ca8fa27e5ef67759d Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.yan@oracle.com>
Date: Thu, 4 Feb 2010 08:46:56 +0000
Subject: [PATCH 255/640] Btrfs: fix race between allocate and release extent
 buffer.

Increase extent buffer's reference count while holding the lock.
Otherwise it can race with try_release_extent_buffer.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent_io.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 96577e8bf9fd..b177ed319612 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 		spin_unlock(&tree->buffer_lock);
 		goto free_eb;
 	}
-	spin_unlock(&tree->buffer_lock);
-
 	/* add one reference for the tree */
 	atomic_inc(&eb->refs);
+	spin_unlock(&tree->buffer_lock);
 	return eb;
 
 free_eb:

From 014e4ac4f7d9c981750491fa40ea35efadc9ed49 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 29 Jan 2010 10:42:11 +0000
Subject: [PATCH 256/640] Btrfs: make error return negative in
 btrfs_sync_file()

It appears the error return should be negative

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ae96fdae1f7d..413a30dafcda 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1133,7 +1133,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	}
 	mutex_lock(&dentry->d_inode->i_mutex);
 out:
-	return ret > 0 ? EIO : ret;
+	return ret > 0 ? -EIO : ret;
 }
 
 static const struct vm_operations_struct btrfs_file_vm_ops = {

From d7ce5843bb28ada6845ab2ae8510ba3f12d33154 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Tue, 2 Feb 2010 08:46:44 +0000
Subject: [PATCH 257/640] Btrfs: remove BUG_ON() due to mounting bad filesystem

Mounting a bad filesystem caused a BUG_ON(). The following is steps to
reproduce it.
 # mkfs.btrfs /dev/sda2
 # mount /dev/sda2 /mnt
 # mkfs.btrfs /dev/sda1 /dev/sda2
 (the program says that /dev/sda2 was mounted, and then exits. )
 # umount /mnt
 # mount /dev/sda1 /mnt

At the third step, mkfs.btrfs exited in the way of make filesystem. So the
initialization of the filesystem didn't finish. So the filesystem was bad, and
it caused BUG_ON() when mounting it. But BUG_ON() should be called by the wrong
code, not user's operation, so I think it is a bug of btrfs.

This patch fixes it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c    | 7 ++++++-
 fs/btrfs/relocation.c | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 87b25543d7d1..2b59201b955c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1982,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		ret = btrfs_recover_relocation(tree_root);
-		BUG_ON(ret);
+		if (ret < 0) {
+			printk(KERN_WARNING
+			       "btrfs: failed to recover relocation\n");
+			err = -EINVAL;
+			goto fail_trans_kthread;
+		}
 	}
 
 	location.objectid = BTRFS_FS_TREE_OBJECTID;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ed3e4a2ec2c8..ab7ab5318745 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3764,7 +3764,8 @@ out:
 				       BTRFS_DATA_RELOC_TREE_OBJECTID);
 		if (IS_ERR(fs_root))
 			err = PTR_ERR(fs_root);
-		btrfs_orphan_cleanup(fs_root);
+		else
+			btrfs_orphan_cleanup(fs_root);
 	}
 	return err;
 }

From 7a7965f83e89f0be506a96769938a721e4e5ae50 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.yan@oracle.com>
Date: Mon, 1 Feb 2010 02:41:17 +0000
Subject: [PATCH 258/640] Btrfs: Fix oopsen when dropping empty tree.

When dropping a empty tree, walk_down_tree() skips checking
extent information for the tree root. This will triggers a
BUG_ON in walk_up_proc().

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 432a2da4641e..559f72489b3b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5402,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 	int ret;
 
 	while (level >= 0) {
-		if (path->slots[level] >=
-		    btrfs_header_nritems(path->nodes[level]))
-			break;
-
 		ret = walk_down_proc(trans, root, path, wc, lookup_info);
 		if (ret > 0)
 			break;
@@ -5413,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 		if (level == 0)
 			break;
 
+		if (path->slots[level] >=
+		    btrfs_header_nritems(path->nodes[level]))
+			break;
+
 		ret = do_walk_down(trans, root, path, wc, &lookup_info);
 		if (ret > 0) {
 			path->slots[level]++;

From efd049fb26a162c3830fd3cb1001fdc09b147f3b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 2 Feb 2010 20:50:10 +0000
Subject: [PATCH 259/640] Btrfs: do not try and lookup the file extent when
 finishing ordered io

When running the following fio job

[torrent]
filename=torrent-test
rw=randwrite
size=4g
filesize=4g
bs=4k
ioengine=sync

you would see long stalls where no work was being done.  That is because we were
doing all this extra work to read in the file extent outside of the transaction,
however in the random io case this ends up hurting us because the file extents
are not there to begin with.  So axe this logic, since we end up reading in the
file extent when we go to update it anyway.  This took the fio job from 11 mb/s
with several ~10 second stalls to 24 mb/s to a couple of 1-2 second stalls.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 46 ++--------------------------------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8cd109972fa6..6782aa19130d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1681,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
  * before we start the transaction.  It limits the amount of btree
  * reads required while inside the transaction.
  */
-static noinline void reada_csum(struct btrfs_root *root,
-				struct btrfs_path *path,
-				struct btrfs_ordered_extent *ordered_extent)
-{
-	struct btrfs_ordered_sum *sum;
-	u64 bytenr;
-
-	sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
-			 list);
-	bytenr = sum->sums[0].bytenr;
-
-	/*
-	 * we don't care about the results, the point of this search is
-	 * just to get the btree leaves into ram
-	 */
-	btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
-}
-
 /* as ordered data IO finishes, this gets called so we can finish
  * an ordered extent if the range of bytes in the file it covers are
  * fully written.
@@ -1709,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_ordered_extent *ordered_extent = NULL;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-	struct btrfs_path *path;
 	int compressed = 0;
 	int ret;
 
@@ -1717,32 +1698,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	if (!ret)
 		return 0;
 
-	/*
-	 * before we join the transaction, try to do some of our IO.
-	 * This will limit the amount of IO that we have to do with
-	 * the transaction running.  We're unlikely to need to do any
-	 * IO if the file extents are new, the disk_i_size checks
-	 * covers the most common case.
-	 */
-	if (start < BTRFS_I(inode)->disk_i_size) {
-		path = btrfs_alloc_path();
-		if (path) {
-			ret = btrfs_lookup_file_extent(NULL, root, path,
-						       inode->i_ino,
-						       start, 0);
-			ordered_extent = btrfs_lookup_ordered_extent(inode,
-								     start);
-			if (!list_empty(&ordered_extent->list)) {
-				btrfs_release_path(root, path);
-				reada_csum(root, path, ordered_extent);
-			}
-			btrfs_free_path(path);
-		}
-	}
-
-	if (!ordered_extent)
-		ordered_extent = btrfs_lookup_ordered_extent(inode, start);
+	ordered_extent = btrfs_lookup_ordered_extent(inode, start);
 	BUG_ON(!ordered_extent);
+
 	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
 		BUG_ON(!list_empty(&ordered_extent->list));
 		ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);

From 23b5c50945f2294add0137799400329c0ebba290 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 4 Feb 2010 11:33:03 -0500
Subject: [PATCH 260/640] Btrfs: apply updated fallocate i_size fix

This version of the i_size fix for fallocate makes sure we only update
the i_size when the current fallocate is really operating outside of
i_size.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6782aa19130d..4deb280f8969 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5799,7 +5799,9 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
 		inode->i_ctime = CURRENT_TIME;
 		BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
 		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-		    cur_offset > inode->i_size) {
+			(actual_len > inode->i_size) &&
+			(cur_offset > inode->i_size)) {
+
 			if (cur_offset > actual_len)
 				i_size  = actual_len;
 			else

From 33c5fd121eabbccc9103daf6cda36941eb3c349f Mon Sep 17 00:00:00 2001
From: David John <davidjon@xenontk.org>
Date: Wed, 27 Jan 2010 15:19:08 +0530
Subject: [PATCH 261/640] drm/i915: Disable SR when more than one pipe is
 enabled

Self Refresh should be disabled on dual plane configs.  Otherwise, as
the SR watermark is not calculated for such configs, switching to non
VGA mode causes FIFO underrun and display flicker.

This fixes Korg Bug #14897.

Signed-off-by: David John <davidjon@xenontk.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: stable@kernel.org
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_display.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5f14dfbf715c..12775df1bbfd 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2520,6 +2520,10 @@ static void g4x_update_wm(struct drm_device *dev,  int planea_clock,
 		sr_entries = roundup(sr_entries / cacheline_size, 1);
 		DRM_DEBUG("self-refresh entries: %d\n", sr_entries);
 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
+	} else {
+		/* Turn off self refresh if both pipes are enabled */
+		I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
+					& ~FW_BLC_SELF_EN);
 	}
 
 	DRM_DEBUG("Setting FIFO watermarks - A: %d, B: %d, SR %d\n",
@@ -2563,6 +2567,10 @@ static void i965_update_wm(struct drm_device *dev, int planea_clock,
 			srwm = 1;
 		srwm &= 0x3f;
 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
+	} else {
+		/* Turn off self refresh if both pipes are enabled */
+		I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
+					& ~FW_BLC_SELF_EN);
 	}
 
 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
@@ -2631,6 +2639,10 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
 		if (srwm < 0)
 			srwm = 1;
 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN | (srwm & 0x3f));
+	} else {
+		/* Turn off self refresh if both pipes are enabled */
+		I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
+					& ~FW_BLC_SELF_EN);
 	}
 
 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",

From 67026e03244d76b8f1fa725b079d4182fe8910aa Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Tue, 2 Feb 2010 20:09:04 +0100
Subject: [PATCH 262/640] drm/i915: slow acpi_lid_open() causes flickering - V2

acpi_lid_open() could take up to 10ms on my computer.  Some component is
calling the drm GETCONNECTOR ioctl many times in a row.  This results in
flickering (for example, when starting a video).  Fix it by assuming an
always connected lid status.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_lvds.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 75a9772061cb..b1d0acbae4e4 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -622,6 +622,13 @@ static const struct dmi_system_id bad_lid_status[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"),
 		},
 	},
+	{
+		.ident = "Aspire 1810T",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1810T"),
+		},
+	},
 	{
 		.ident = "PC-81005",
 		.matches = {
@@ -643,7 +650,7 @@ static enum drm_connector_status intel_lvds_detect(struct drm_connector *connect
 {
 	enum drm_connector_status status = connector_status_connected;
 
-	if (!acpi_lid_open() && !dmi_check_system(bad_lid_status))
+	if (!dmi_check_system(bad_lid_status) && !acpi_lid_open())
 		status = connector_status_disconnected;
 
 	return status;

From 93533c291a0af78ca57115fc44d2e6c4c9517cd2 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun, 31 Jan 2010 10:40:48 +0000
Subject: [PATCH 263/640] drm/i915: Fix leak of relocs along do_execbuffer
 error path

Following a gpu hang, we would leak the relocation buffer. So simply
earrange the error path to always free the relocation buffer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_gem.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1ef7ec4f38fe..be0fd1a63321 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3584,6 +3584,9 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
 	uint32_t reloc_count = 0, i;
 	int ret = 0;
 
+	if (relocs == NULL)
+	    return 0;
+
 	for (i = 0; i < buffer_count; i++) {
 		struct drm_i915_gem_relocation_entry __user *user_relocs;
 		int unwritten;
@@ -3673,7 +3676,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct drm_gem_object *batch_obj;
 	struct drm_i915_gem_object *obj_priv;
 	struct drm_clip_rect *cliprects = NULL;
-	struct drm_i915_gem_relocation_entry *relocs;
+	struct drm_i915_gem_relocation_entry *relocs = NULL;
 	int ret = 0, ret2, i, pinned = 0;
 	uint64_t exec_offset;
 	uint32_t seqno, flush_domains, reloc_index;
@@ -3950,6 +3953,7 @@ err:
 
 	mutex_unlock(&dev->struct_mutex);
 
+pre_mutex_err:
 	/* Copy the updated relocations out regardless of current error
 	 * state.  Failure to update the relocs would mean that the next
 	 * time userland calls execbuf, it would do so with presumed offset
@@ -3964,7 +3968,6 @@ err:
 			ret = ret2;
 	}
 
-pre_mutex_err:
 	drm_free_large(object_list);
 	kfree(cliprects);
 

From feeb2721a7a0bd0cfa5b8847f80aec93aa2cc00d Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 3 Feb 2010 21:59:51 +0000
Subject: [PATCH 264/640] igb: make certain to reassign legacy interrupt
 vectors after reset

This change corrects an issue that will cause false hangs when using either
82575 or 82580 in legacy interrupt mode.  The issue is caused when there is
a slow traffic flow and an "ethtool -r" is executed while using legacy or
MSI interrupts.  MSI-X is not affected by this issue due to the fact that
we were already reconfiguring the vectors after reset.

If possible it would be best to push this for net-2.6 since it is resolving
a bug but if that is not possible then net-next-2.6 will be fine.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/igb/igb_main.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 997124d2992a..c881347cb26d 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -421,6 +421,8 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
 			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
 		if (tx_queue > IGB_N0_QUEUE)
 			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
+		if (!adapter->msix_entries && msix_vector == 0)
+			msixbm |= E1000_EIMS_OTHER;
 		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
 		q_vector->eims_value = msixbm;
 		break;
@@ -877,7 +879,6 @@ static int igb_request_irq(struct igb_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
-	struct e1000_hw *hw = &adapter->hw;
 	int err = 0;
 
 	if (adapter->msix_entries) {
@@ -909,20 +910,7 @@ static int igb_request_irq(struct igb_adapter *adapter)
 		igb_setup_all_tx_resources(adapter);
 		igb_setup_all_rx_resources(adapter);
 	} else {
-		switch (hw->mac.type) {
-		case e1000_82575:
-			wr32(E1000_MSIXBM(0),
-			     (E1000_EICR_RX_QUEUE0 |
-			      E1000_EICR_TX_QUEUE0 |
-			      E1000_EIMS_OTHER));
-			break;
-		case e1000_82580:
-		case e1000_82576:
-			wr32(E1000_IVAR0, E1000_IVAR_VALID);
-			break;
-		default:
-			break;
-		}
+		igb_assign_vector(adapter->q_vector[0], 0);
 	}
 
 	if (adapter->flags & IGB_FLAG_HAS_MSI) {
@@ -1140,6 +1128,8 @@ int igb_up(struct igb_adapter *adapter)
 	}
 	if (adapter->msix_entries)
 		igb_configure_msix(adapter);
+	else
+		igb_assign_vector(adapter->q_vector[0], 0);
 
 	/* Clear any pending interrupts. */
 	rd32(E1000_ICR);

From 21956b61f594f7924d98240da74bc81c28601fa9 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Tue, 2 Feb 2010 19:58:25 +0100
Subject: [PATCH 265/640] ALSA: ctxfi - fix PTP address initialization

After hours of debugging, I finally found the reason why some source
and runtime combination does not work. The PTP (page table pages)
address must be aligned. I am not sure how much, but alignment to
PAGE_SIZE is sufficient. Also, use ALSA's page allocation routines
to ensure proper virtual -> physical address translation.

Cc: <stable@kernel.org>
Signed-off-by: Jaroslav Kysela <perex@perex.cz>
---
 sound/pci/ctxfi/ctatc.c  | 15 ++-------------
 sound/pci/ctxfi/ctvmem.c | 38 ++++++++++++++++++--------------------
 sound/pci/ctxfi/ctvmem.h |  8 +++++---
 3 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index cb65bd0dd35b..459c1f62783b 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -166,18 +166,7 @@ static void ct_unmap_audio_buffer(struct ct_atc *atc, struct ct_atc_pcm *apcm)
 
 static unsigned long atc_get_ptp_phys(struct ct_atc *atc, int index)
 {
-	struct ct_vm *vm;
-	void *kvirt_addr;
-	unsigned long phys_addr;
-
-	vm = atc->vm;
-	kvirt_addr = vm->get_ptp_virt(vm, index);
-	if (kvirt_addr == NULL)
-		phys_addr = (~0UL);
-	else
-		phys_addr = virt_to_phys(kvirt_addr);
-
-	return phys_addr;
+	return atc->vm->get_ptp_phys(atc->vm, index);
 }
 
 static unsigned int convert_format(snd_pcm_format_t snd_format)
@@ -1669,7 +1658,7 @@ int __devinit ct_atc_create(struct snd_card *card, struct pci_dev *pci,
 	}
 
 	/* Set up device virtual memory management object */
-	err = ct_vm_create(&atc->vm);
+	err = ct_vm_create(&atc->vm, pci);
 	if (err < 0)
 		goto error1;
 
diff --git a/sound/pci/ctxfi/ctvmem.c b/sound/pci/ctxfi/ctvmem.c
index 6b78752e9503..65da6e466f80 100644
--- a/sound/pci/ctxfi/ctvmem.c
+++ b/sound/pci/ctxfi/ctvmem.c
@@ -138,7 +138,7 @@ ct_vm_map(struct ct_vm *vm, struct snd_pcm_substream *substream, int size)
 		return NULL;
 	}
 
-	ptp = vm->ptp[0];
+	ptp = (unsigned long *)vm->ptp[0].area;
 	pte_start = (block->addr >> CT_PAGE_SHIFT);
 	pages = block->size >> CT_PAGE_SHIFT;
 	for (i = 0; i < pages; i++) {
@@ -158,25 +158,25 @@ static void ct_vm_unmap(struct ct_vm *vm, struct ct_vm_block *block)
 }
 
 /* *
- * return the host (kmalloced) addr of the @index-th device
- * page talbe page on success, or NULL on failure.
- * The first returned NULL indicates the termination.
+ * return the host physical addr of the @index-th device
+ * page table page on success, or ~0UL on failure.
+ * The first returned ~0UL indicates the termination.
  * */
-static void *
-ct_get_ptp_virt(struct ct_vm *vm, int index)
+static dma_addr_t
+ct_get_ptp_phys(struct ct_vm *vm, int index)
 {
-	void *addr;
+	dma_addr_t addr;
 
-	addr = (index >= CT_PTP_NUM) ? NULL : vm->ptp[index];
+	addr = (index >= CT_PTP_NUM) ? ~0UL : vm->ptp[index].addr;
 
 	return addr;
 }
 
-int ct_vm_create(struct ct_vm **rvm)
+int ct_vm_create(struct ct_vm **rvm, struct pci_dev *pci)
 {
 	struct ct_vm *vm;
 	struct ct_vm_block *block;
-	int i;
+	int i, err = 0;
 
 	*rvm = NULL;
 
@@ -188,23 +188,21 @@ int ct_vm_create(struct ct_vm **rvm)
 
 	/* Allocate page table pages */
 	for (i = 0; i < CT_PTP_NUM; i++) {
-		vm->ptp[i] = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (!vm->ptp[i])
+		err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV,
+					  snd_dma_pci_data(pci),
+					  PAGE_SIZE, &vm->ptp[i]);
+		if (err < 0)
 			break;
 	}
-	if (!i) {
+	if (err < 0) {
 		/* no page table pages are allocated */
-		kfree(vm);
+		ct_vm_destroy(vm);
 		return -ENOMEM;
 	}
 	vm->size = CT_ADDRS_PER_PAGE * i;
-	/* Initialise remaining ptps */
-	for (; i < CT_PTP_NUM; i++)
-		vm->ptp[i] = NULL;
-
 	vm->map = ct_vm_map;
 	vm->unmap = ct_vm_unmap;
-	vm->get_ptp_virt = ct_get_ptp_virt;
+	vm->get_ptp_phys = ct_get_ptp_phys;
 	INIT_LIST_HEAD(&vm->unused);
 	INIT_LIST_HEAD(&vm->used);
 	block = kzalloc(sizeof(*block), GFP_KERNEL);
@@ -242,7 +240,7 @@ void ct_vm_destroy(struct ct_vm *vm)
 
 	/* free allocated page table pages */
 	for (i = 0; i < CT_PTP_NUM; i++)
-		kfree(vm->ptp[i]);
+		snd_dma_free_pages(&vm->ptp[i]);
 
 	vm->size = 0;
 
diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h
index 01e4fd0386a3..b23adfca4de6 100644
--- a/sound/pci/ctxfi/ctvmem.h
+++ b/sound/pci/ctxfi/ctvmem.h
@@ -22,6 +22,8 @@
 
 #include <linux/mutex.h>
 #include <linux/list.h>
+#include <linux/pci.h>
+#include <sound/memalloc.h>
 
 /* The chip can handle the page table of 4k pages
  * (emu20k1 can handle even 8k pages, but we don't use it right now)
@@ -41,7 +43,7 @@ struct snd_pcm_substream;
 
 /* Virtual memory management object for card device */
 struct ct_vm {
-	void *ptp[CT_PTP_NUM];		/* Device page table pages */
+	struct snd_dma_buffer ptp[CT_PTP_NUM];	/* Device page table pages */
 	unsigned int size;		/* Available addr space in bytes */
 	struct list_head unused;	/* List of unused blocks */
 	struct list_head used;		/* List of used blocks */
@@ -52,10 +54,10 @@ struct ct_vm {
 				   int size);
 	/* Unmap device logical addr area. */
 	void (*unmap)(struct ct_vm *, struct ct_vm_block *block);
-	void *(*get_ptp_virt)(struct ct_vm *vm, int index);
+	dma_addr_t (*get_ptp_phys)(struct ct_vm *vm, int index);
 };
 
-int ct_vm_create(struct ct_vm **rvm);
+int ct_vm_create(struct ct_vm **rvm, struct pci_dev *pci);
 void ct_vm_destroy(struct ct_vm *vm);
 
 #endif /* CTVMEM_H */

From 1b3f720bf033fde1fbb6231f9b156b918c5f68d8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 4 Feb 2010 14:00:41 -0800
Subject: [PATCH 266/640] pktgen: Fix freezing problem

Add missing try_to_freeze() to one of the pktgen_thread_worker() code
paths so that it doesn't block suspend/hibernation.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=15006

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-and-tested-by: Ciprian Dorin Craciun <ciprian.craciun@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index de0c2c726420..2e692afdc55d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3524,6 +3524,7 @@ static int pktgen_thread_worker(void *arg)
 			wait_event_interruptible_timeout(t->queue,
 							 t->control != 0,
 							 HZ/10);
+			try_to_freeze();
 			continue;
 		}
 

From 6f14a668f1a8b715a6e855f4e32705e54a6e86a1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 4 Feb 2010 17:57:37 +0900
Subject: [PATCH 267/640] idr: revert misallocation bug fix

Commit 859ddf09743a8cc680af33f7259ccd0fd36bfe9d tried to fix
misallocation bug but broke full bit marking by not clearing
pa[idp->layers] and also is causing X failures due to lookup failure
in drm code.  The cause of the latter hasn't been found yet.  Revert
the fix for now.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/idr.c b/lib/idr.c
index ba7d37cf7847..1cac726c44bc 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -140,7 +140,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 	id = *starting_id;
  restart:
 	p = idp->top;
-	l = p->layer;
+	l = idp->layers;
+	pa[l--] = NULL;
 	while (1) {
 		/*
 		 * We run around this while until we reach the leaf node...
@@ -154,8 +155,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			oid = id;
 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
 
-			/* did id go over the limit? */
-			if (id >= (1 << (idp->layers * IDR_BITS))) {
+			/* if already at the top layer, we need to grow */
+			if (!(p = pa[l])) {
 				*starting_id = id;
 				return IDR_NEED_TO_GROW;
 			}

From c38c7b64a2747a211c3a6e8e5919ee25ccd474e7 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Thu, 4 Feb 2010 17:27:27 +0100
Subject: [PATCH 268/640] drm/radeon/kms: move blit initialization after we
 disabled VGA

VGA might be overwritting VRAM and corrupt our blit shader leading
to corruption, it likely won't happen if you load fbcon right after
radeon. Thanks to Shawn Starr and Andre Maasikas for tracking down
this issue.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600.c  | 12 ++++++------
 drivers/gpu/drm/radeon/rv770.c | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index b833b4b97162..8c94040cb7f2 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1861,6 +1861,12 @@ int r600_startup(struct radeon_device *rdev)
 			return r;
 	}
 	r600_gpu_init(rdev);
+	r = r600_blit_init(rdev);
+	if (r) {
+		r600_blit_fini(rdev);
+		rdev->asic->copy = NULL;
+		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
+	}
 	/* pin copy shader into vram */
 	if (rdev->r600_blit.shader_obj) {
 		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
@@ -2045,12 +2051,6 @@ int r600_init(struct radeon_device *rdev)
 	r = r600_pcie_gart_init(rdev);
 	if (r)
 		return r;
-	r = r600_blit_init(rdev);
-	if (r) {
-		r600_blit_fini(rdev);
-		rdev->asic->copy = NULL;
-		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-	}
 
 	rdev->accel_working = true;
 	r = r600_startup(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 2d465768ac71..06b037eebef5 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -887,6 +887,12 @@ static int rv770_startup(struct radeon_device *rdev)
 			return r;
 	}
 	rv770_gpu_init(rdev);
+	r = r600_blit_init(rdev);
+	if (r) {
+		r600_blit_fini(rdev);
+		rdev->asic->copy = NULL;
+		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
+	}
 	/* pin copy shader into vram */
 	if (rdev->r600_blit.shader_obj) {
 		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
@@ -1055,12 +1061,6 @@ int rv770_init(struct radeon_device *rdev)
 	r = r600_pcie_gart_init(rdev);
 	if (r)
 		return r;
-	r = r600_blit_init(rdev);
-	if (r) {
-		r600_blit_fini(rdev);
-		rdev->asic->copy = NULL;
-		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-	}
 
 	rdev->accel_working = true;
 	r = rv770_startup(rdev);

From 062b389c8704e539e234cfd67c7e034a514f50bf Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Thu, 4 Feb 2010 20:36:39 +0100
Subject: [PATCH 269/640] drm/radeon/kms: fix regression rendering issue on
 R6XX/R7XX

It seems that some R6XX/R7XX silently ignore HDP flush when
programmed through ring, this patch addback an ioctl callback
to allow R6XX/R7XX hw to perform such flush through MMIO in
order to fix a regression. For more details see:

http://bugzilla.kernel.org/show_bug.cgi?id=15186

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600.c        | 15 +++++++++++++++
 drivers/gpu/drm/radeon/radeon.h      |  7 +++++++
 drivers/gpu/drm/radeon/radeon_asic.h | 11 +++++++++++
 drivers/gpu/drm/radeon/radeon_gem.c  |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 8c94040cb7f2..45d565bae71b 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2900,3 +2900,18 @@ int r600_debugfs_mc_info_init(struct radeon_device *rdev)
 	return 0;
 #endif
 }
+
+/**
+ * r600_ioctl_wait_idle - flush host path cache on wait idle ioctl
+ * rdev: radeon device structure
+ * bo: buffer object struct which userspace is waiting for idle
+ *
+ * Some R6XX/R7XX doesn't seems to take into account HDP flush performed
+ * through ring buffer, this leads to corruption in rendering, see
+ * http://bugzilla.kernel.org/show_bug.cgi?id=15186 to avoid this we
+ * directly perform HDP flush by writing register through MMIO.
+ */
+void r600_ioctl_wait_idle(struct radeon_device *rdev, struct radeon_bo *bo)
+{
+	WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 2d5f2bfa7201..37150fc406b5 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -661,6 +661,13 @@ struct radeon_asic {
 	void (*hpd_fini)(struct radeon_device *rdev);
 	bool (*hpd_sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
 	void (*hpd_set_polarity)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
+	/* ioctl hw specific callback. Some hw might want to perform special
+	 * operation on specific ioctl. For instance on wait idle some hw
+	 * might want to perform and HDP flush through MMIO as it seems that
+	 * some R6XX/R7XX hw doesn't take HDP flush into account if programmed
+	 * through ring.
+	 */
+	void (*ioctl_wait_idle)(struct radeon_device *rdev, struct radeon_bo *bo);
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index f2fbd2e4e9df..05ee1aeac3fd 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -117,6 +117,7 @@ static struct radeon_asic r100_asic = {
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
 	.hpd_set_polarity = &r100_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 
@@ -176,6 +177,7 @@ static struct radeon_asic r300_asic = {
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
 	.hpd_set_polarity = &r100_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 /*
@@ -219,6 +221,7 @@ static struct radeon_asic r420_asic = {
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
 	.hpd_set_polarity = &r100_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 
@@ -267,6 +270,7 @@ static struct radeon_asic rs400_asic = {
 	.hpd_fini = &r100_hpd_fini,
 	.hpd_sense = &r100_hpd_sense,
 	.hpd_set_polarity = &r100_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 
@@ -323,6 +327,7 @@ static struct radeon_asic rs600_asic = {
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
 	.hpd_set_polarity = &rs600_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 
@@ -370,6 +375,7 @@ static struct radeon_asic rs690_asic = {
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
 	.hpd_set_polarity = &rs600_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 
@@ -421,6 +427,7 @@ static struct radeon_asic rv515_asic = {
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
 	.hpd_set_polarity = &rs600_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 
@@ -463,6 +470,7 @@ static struct radeon_asic r520_asic = {
 	.hpd_fini = &rs600_hpd_fini,
 	.hpd_sense = &rs600_hpd_sense,
 	.hpd_set_polarity = &rs600_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
 };
 
 /*
@@ -504,6 +512,7 @@ void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
 void r600_hpd_set_polarity(struct radeon_device *rdev,
 			   enum radeon_hpd_id hpd);
+extern void r600_ioctl_wait_idle(struct radeon_device *rdev, struct radeon_bo *bo);
 
 static struct radeon_asic r600_asic = {
 	.init = &r600_init,
@@ -538,6 +547,7 @@ static struct radeon_asic r600_asic = {
 	.hpd_fini = &r600_hpd_fini,
 	.hpd_sense = &r600_hpd_sense,
 	.hpd_set_polarity = &r600_hpd_set_polarity,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
 };
 
 /*
@@ -582,6 +592,7 @@ static struct radeon_asic rv770_asic = {
 	.hpd_fini = &r600_hpd_fini,
 	.hpd_sense = &r600_hpd_sense,
 	.hpd_set_polarity = &r600_hpd_set_polarity,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
 };
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 0e1325e18534..db8e9a355a01 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -308,6 +308,9 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	}
 	robj = gobj->driver_private;
 	r = radeon_bo_wait(robj, NULL, false);
+	/* callback hw specific functions if any */
+	if (robj->rdev->asic->ioctl_wait_idle)
+		robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
 	mutex_lock(&dev->struct_mutex);
 	drm_gem_object_unreference(gobj);
 	mutex_unlock(&dev->struct_mutex);

From 9e5b2af75abc67c13005c706cf95bbbb78f7fddc Mon Sep 17 00:00:00 2001
From: Pauli Nieminen <suokkos@gmail.com>
Date: Thu, 4 Feb 2010 19:20:53 +0200
Subject: [PATCH 270/640] drm/r100/kms: Emit cache flush to the end of command
 buffer. (v2)

Cache flush is required in case CPU is accessing rendered data.

This fixes glean/readPixSanity test case and random rendering
errors in sauerbraten and warzone2100.

v2 Fix comment ordering in r100_fence_ring_emit and remove extra
   defines added in first version.

Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r100.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 11c9a3fe6810..626e79023e30 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -354,11 +354,17 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
 		return RREG32(RADEON_CRTC2_CRNT_FRAME);
 }
 
+/* Who ever call radeon_fence_emit should call ring_lock and ask
+ * for enough space (today caller are ib schedule and buffer move) */
 void r100_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence)
 {
-	/* Who ever call radeon_fence_emit should call ring_lock and ask
-	 * for enough space (today caller are ib schedule and buffer move) */
+	/* We have to make sure that caches are flushed before
+	 * CPU might read something from VRAM. */
+	radeon_ring_write(rdev, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, RADEON_RB3D_DC_FLUSH_ALL);
+	radeon_ring_write(rdev, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, RADEON_RB3D_ZC_FLUSH_ALL);
 	/* Wait until IDLE & CLEAN */
 	radeon_ring_write(rdev, PACKET0(0x1720, 0));
 	radeon_ring_write(rdev, (1 << 16) | (1 << 17));

From d7748bacbbee80b2cc4b690a74d5db2cd84acd7b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 2 Feb 2010 14:40:33 -0800
Subject: [PATCH 271/640] ati_pcigart: fix printk format warning

Fix ati_pcigart printk format warning:

drivers/gpu/drm/ati_pcigart.c:115: warning: format '%Lx' expects type 'long long unsigned int', but argument 3 has type 'dma_addr_t'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Dave Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ati_pcigart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c
index a1fce68e3bbe..17be051b7aa3 100644
--- a/drivers/gpu/drm/ati_pcigart.c
+++ b/drivers/gpu/drm/ati_pcigart.c
@@ -113,7 +113,7 @@ int drm_ati_pcigart_init(struct drm_device *dev, struct drm_ati_pcigart_info *ga
 
 		if (pci_set_dma_mask(dev->pdev, gart_info->table_mask)) {
 			DRM_ERROR("fail to set dma mask to 0x%Lx\n",
-				  gart_info->table_mask);
+				  (unsigned long long)gart_info->table_mask);
 			ret = 1;
 			goto done;
 		}

From 94cf6434a1bc5958d5da3be62f1272792dada2bf Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 2 Feb 2010 14:40:29 -0800
Subject: [PATCH 272/640] drivers/gpu/drm/radeon/radeon_combios.c: fix warning

drivers/gpu/drm/radeon/radeon_combios.c: In function 'radeon_combios_get_lvds_info':
drivers/gpu/drm/radeon/radeon_combios.c:893: warning: comparison is always false due to limited range of data type

Cc: Dave Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_combios.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 579c8920e081..e7b19440102e 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -971,8 +971,7 @@ struct radeon_encoder_lvds *radeon_combios_get_lvds_info(struct radeon_encoder
 			 lvds->native_mode.vdisplay);
 
 		lvds->panel_vcc_delay = RBIOS16(lcd_info + 0x2c);
-		if (lvds->panel_vcc_delay > 2000 || lvds->panel_vcc_delay < 0)
-			lvds->panel_vcc_delay = 2000;
+		lvds->panel_vcc_delay = min_t(u16, lvds->panel_vcc_delay, 2000);
 
 		lvds->panel_pwr_delay = RBIOS8(lcd_info + 0x24);
 		lvds->panel_digon_delay = RBIOS16(lcd_info + 0x38) & 0xf;

From 655efd3dc92cd0d37292157178d33deb0430aeaa Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Tue, 2 Feb 2010 11:51:45 +0100
Subject: [PATCH 273/640] drm/radeon/kms: don't call suspend path before
 cleaning up GPU

In suspend path we unmap the GART table while in cleaning up
path we will unbind buffer and thus try to write to unmapped
GART leading to oops. In order to avoid this we don't call the
suspend path in cleanup path. Cleanup path is clever enough
to desactive GPU like the suspend path is doing, thus this was
redondant.

Tested on: RV370, R420, RV515, RV570, RV610, RV770 (all PCIE)

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r100.c   |  4 +---
 drivers/gpu/drm/radeon/r300.c   |  5 ++---
 drivers/gpu/drm/radeon/r420.c   |  3 +--
 drivers/gpu/drm/radeon/r520.c   |  3 +--
 drivers/gpu/drm/radeon/r600.c   | 21 +++++++++++++--------
 drivers/gpu/drm/radeon/radeon.h |  1 +
 drivers/gpu/drm/radeon/rs400.c  |  2 --
 drivers/gpu/drm/radeon/rs600.c  |  2 --
 drivers/gpu/drm/radeon/rs690.c  |  2 --
 drivers/gpu/drm/radeon/rv515.c  |  4 +---
 drivers/gpu/drm/radeon/rv770.c  | 12 ++++++------
 11 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 626e79023e30..c0d4650cdb79 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3375,7 +3375,6 @@ int r100_suspend(struct radeon_device *rdev)
 
 void r100_fini(struct radeon_device *rdev)
 {
-	r100_suspend(rdev);
 	r100_cp_fini(rdev);
 	r100_wb_fini(rdev);
 	r100_ib_fini(rdev);
@@ -3487,13 +3486,12 @@ int r100_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		r100_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
+		radeon_irq_kms_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCI)
 			r100_pci_gart_fini(rdev);
-		radeon_irq_kms_fini(rdev);
 		rdev->accel_working = false;
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 0051d11b907c..e699e6d053dd 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1327,7 +1327,6 @@ int r300_suspend(struct radeon_device *rdev)
 
 void r300_fini(struct radeon_device *rdev)
 {
-	r300_suspend(rdev);
 	r100_cp_fini(rdev);
 	r100_wb_fini(rdev);
 	r100_ib_fini(rdev);
@@ -1418,15 +1417,15 @@ int r300_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		r300_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
+		radeon_irq_kms_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCIE)
 			rv370_pcie_gart_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCI)
 			r100_pci_gart_fini(rdev);
-		radeon_irq_kms_fini(rdev);
+		radeon_agp_fini(rdev);
 		rdev->accel_working = false;
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 4526faaacca8..d9373246c97f 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -389,16 +389,15 @@ int r420_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		r420_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
+		radeon_irq_kms_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCIE)
 			rv370_pcie_gart_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCI)
 			r100_pci_gart_fini(rdev);
 		radeon_agp_fini(rdev);
-		radeon_irq_kms_fini(rdev);
 		rdev->accel_working = false;
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index 9a189072f2b9..ddf5731eba0d 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -294,13 +294,12 @@ int r520_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		rv515_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
+		radeon_irq_kms_fini(rdev);
 		rv370_pcie_gart_fini(rdev);
 		radeon_agp_fini(rdev);
-		radeon_irq_kms_fini(rdev);
 		rdev->accel_working = false;
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 45d565bae71b..9661a469f3bd 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1654,6 +1654,12 @@ void r600_ring_init(struct radeon_device *rdev, unsigned ring_size)
 	rdev->cp.align_mask = 16 - 1;
 }
 
+void r600_cp_fini(struct radeon_device *rdev)
+{
+	r600_cp_stop(rdev);
+	radeon_ring_fini(rdev);
+}
+
 
 /*
  * GPU scratch registers helpers function.
@@ -2055,9 +2061,11 @@ int r600_init(struct radeon_device *rdev)
 	rdev->accel_working = true;
 	r = r600_startup(rdev);
 	if (r) {
-		r600_suspend(rdev);
+		dev_err(rdev->dev, "disabling GPU acceleration\n");
+		r600_cp_fini(rdev);
 		r600_wb_fini(rdev);
-		radeon_ring_fini(rdev);
+		r600_irq_fini(rdev);
+		radeon_irq_kms_fini(rdev);
 		r600_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
 	}
@@ -2083,20 +2091,17 @@ int r600_init(struct radeon_device *rdev)
 
 void r600_fini(struct radeon_device *rdev)
 {
-	/* Suspend operations */
-	r600_suspend(rdev);
-
 	r600_audio_fini(rdev);
 	r600_blit_fini(rdev);
+	r600_cp_fini(rdev);
+	r600_wb_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_irq_kms_fini(rdev);
-	radeon_ring_fini(rdev);
-	r600_wb_fini(rdev);
 	r600_pcie_gart_fini(rdev);
+	radeon_agp_fini(rdev);
 	radeon_gem_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_clocks_fini(rdev);
-	radeon_agp_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);
 	kfree(rdev->bios);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 37150fc406b5..f57480ba1355 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1150,6 +1150,7 @@ extern bool r600_card_posted(struct radeon_device *rdev);
 extern void r600_cp_stop(struct radeon_device *rdev);
 extern void r600_ring_init(struct radeon_device *rdev, unsigned ring_size);
 extern int r600_cp_resume(struct radeon_device *rdev);
+extern void r600_cp_fini(struct radeon_device *rdev);
 extern int r600_count_pipe_bits(uint32_t val);
 extern int r600_gart_clear_page(struct radeon_device *rdev, int i);
 extern int r600_mc_wait_for_idle(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 9f5418983e2a..eeeb0d6d7a44 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -448,7 +448,6 @@ int rs400_suspend(struct radeon_device *rdev)
 
 void rs400_fini(struct radeon_device *rdev)
 {
-	rs400_suspend(rdev);
 	r100_cp_fini(rdev);
 	r100_wb_fini(rdev);
 	r100_ib_fini(rdev);
@@ -527,7 +526,6 @@ int rs400_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		rs400_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index d5255751e7b3..c3818562a13e 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -610,7 +610,6 @@ int rs600_suspend(struct radeon_device *rdev)
 
 void rs600_fini(struct radeon_device *rdev)
 {
-	rs600_suspend(rdev);
 	r100_cp_fini(rdev);
 	r100_wb_fini(rdev);
 	r100_ib_fini(rdev);
@@ -689,7 +688,6 @@ int rs600_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		rs600_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index cd31da913771..06e2771aee5a 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -676,7 +676,6 @@ int rs690_suspend(struct radeon_device *rdev)
 
 void rs690_fini(struct radeon_device *rdev)
 {
-	rs690_suspend(rdev);
 	r100_cp_fini(rdev);
 	r100_wb_fini(rdev);
 	r100_ib_fini(rdev);
@@ -756,7 +755,6 @@ int rs690_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		rs690_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 62756717b044..0e1e6b8632b8 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -537,7 +537,6 @@ void rv515_set_safe_registers(struct radeon_device *rdev)
 
 void rv515_fini(struct radeon_device *rdev)
 {
-	rv515_suspend(rdev);
 	r100_cp_fini(rdev);
 	r100_wb_fini(rdev);
 	r100_ib_fini(rdev);
@@ -615,13 +614,12 @@ int rv515_init(struct radeon_device *rdev)
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
-		rv515_suspend(rdev);
 		r100_cp_fini(rdev);
 		r100_wb_fini(rdev);
 		r100_ib_fini(rdev);
+		radeon_irq_kms_fini(rdev);
 		rv370_pcie_gart_fini(rdev);
 		radeon_agp_fini(rdev);
-		radeon_irq_kms_fini(rdev);
 		rdev->accel_working = false;
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 06b037eebef5..c9320e727986 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1065,9 +1065,11 @@ int rv770_init(struct radeon_device *rdev)
 	rdev->accel_working = true;
 	r = rv770_startup(rdev);
 	if (r) {
-		rv770_suspend(rdev);
+		dev_err(rdev->dev, "disabling GPU acceleration\n");
+		r600_cp_fini(rdev);
 		r600_wb_fini(rdev);
-		radeon_ring_fini(rdev);
+		r600_irq_fini(rdev);
+		radeon_irq_kms_fini(rdev);
 		rv770_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
 	}
@@ -1089,13 +1091,11 @@ int rv770_init(struct radeon_device *rdev)
 
 void rv770_fini(struct radeon_device *rdev)
 {
-	rv770_suspend(rdev);
-
 	r600_blit_fini(rdev);
+	r600_cp_fini(rdev);
+	r600_wb_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_irq_kms_fini(rdev);
-	radeon_ring_fini(rdev);
-	r600_wb_fini(rdev);
 	rv770_pcie_gart_fini(rdev);
 	radeon_gem_fini(rdev);
 	radeon_fence_driver_fini(rdev);

From 23fff28a9b0529869bffef8aab0d3f350dd3b5a4 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 5 Feb 2010 11:57:42 +1000
Subject: [PATCH 274/640] drm/radeon/kms: disable HDMI audio for now on
 rv710/rv730

Support isn't correct yet and we are getting green tinges on the
displays.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600_audio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c
index 99e2c3891a7d..b1c1d3433454 100644
--- a/drivers/gpu/drm/radeon/r600_audio.c
+++ b/drivers/gpu/drm/radeon/r600_audio.c
@@ -35,7 +35,7 @@
  */
 static int r600_audio_chipset_supported(struct radeon_device *rdev)
 {
-	return rdev->family >= CHIP_R600
+	return (rdev->family >= CHIP_R600 && rdev->family < CHIP_RV710)
 		|| rdev->family == CHIP_RS600
 		|| rdev->family == CHIP_RS690
 		|| rdev->family == CHIP_RS740;

From 624ab4f87e99f10ea3b45e76039c477fd4d7a7e6 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@davers480.(none)>
Date: Wed, 27 Jan 2010 16:07:15 +1000
Subject: [PATCH 275/640] drm/radeon/kms: make initial state of load detect
 property correct.

this was incorrect on my rs480.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 55266416fa47..2d8e5a70f284 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1343,7 +1343,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
 				radeon_connector->dac_load_detect = false;
 			drm_connector_attach_property(&radeon_connector->base,
 						      rdev->mode_info.load_detect_property,
-						      1);
+						      radeon_connector->dac_load_detect);
 			drm_connector_attach_property(&radeon_connector->base,
 						      rdev->mode_info.tv_std_property,
 						      radeon_combios_get_tv_info(rdev));

From 2717568e7c44fe7dc3f4f52ea823811cfeede2b5 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 4 Feb 2010 06:57:58 +0000
Subject: [PATCH 276/640] usb: r8a66597-hcd: Flush the D-cache for the pipe-in
 transfer buffers.

This implements the same D-cache flushing logic for r8a66597-hcd as
Catalin's isp1760 (http://patchwork.kernel.org/patch/76391/) change,
with the same note applying here as well:

    When the HDC driver writes the data to the transfer buffers it
    pollutes the D-cache (unlike DMA drivers where the device writes
    the data). If the corresponding pages get mapped into user space,
    there are no additional cache flushing operations performed and
    this causes random user space faults on architectures with
    separate I and D caches (Harvard) or those with aliasing D-cache.

This fixes up crashes during USB boot on SH7724 and others:

	http://marc.info/?l=linux-sh&m=126439837308912&w=2

Reported-by: Goda Yusuke <goda.yusuke@renesas.com>
Tested-by: Goda Yusuke <goda.yusuke@renesas.com>
Cc: stable@kernel.org
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
---
 drivers/usb/host/r8a66597-hcd.c | 37 ++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 0ceec123ddfd..50a3e2d6a6cc 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -35,7 +35,9 @@
 #include <linux/usb.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/mm.h>
 #include <linux/irq.h>
+#include <asm/cacheflush.h>
 
 #include "../core/hcd.h"
 #include "r8a66597.h"
@@ -820,6 +822,26 @@ static void enable_r8a66597_pipe(struct r8a66597 *r8a66597, struct urb *urb,
 	enable_r8a66597_pipe_dma(r8a66597, dev, pipe, urb);
 }
 
+static void r8a66597_urb_done(struct r8a66597 *r8a66597, struct urb *urb,
+			      int status)
+__releases(r8a66597->lock)
+__acquires(r8a66597->lock)
+{
+	if (usb_pipein(urb->pipe) && usb_pipetype(urb->pipe) != PIPE_CONTROL) {
+		void *ptr;
+
+		for (ptr = urb->transfer_buffer;
+		     ptr < urb->transfer_buffer + urb->transfer_buffer_length;
+		     ptr += PAGE_SIZE)
+			flush_dcache_page(virt_to_page(ptr));
+	}
+
+	usb_hcd_unlink_urb_from_ep(r8a66597_to_hcd(r8a66597), urb);
+	spin_unlock(&r8a66597->lock);
+	usb_hcd_giveback_urb(r8a66597_to_hcd(r8a66597), urb, status);
+	spin_lock(&r8a66597->lock);
+}
+
 /* this function must be called with interrupt disabled */
 static void force_dequeue(struct r8a66597 *r8a66597, u16 pipenum, u16 address)
 {
@@ -838,15 +860,9 @@ static void force_dequeue(struct r8a66597 *r8a66597, u16 pipenum, u16 address)
 		list_del(&td->queue);
 		kfree(td);
 
-		if (urb) {
-			usb_hcd_unlink_urb_from_ep(r8a66597_to_hcd(r8a66597),
-					urb);
+		if (urb)
+			r8a66597_urb_done(r8a66597, urb, -ENODEV);
 
-			spin_unlock(&r8a66597->lock);
-			usb_hcd_giveback_urb(r8a66597_to_hcd(r8a66597), urb,
-					-ENODEV);
-			spin_lock(&r8a66597->lock);
-		}
 		break;
 	}
 }
@@ -1283,10 +1299,7 @@ __releases(r8a66597->lock) __acquires(r8a66597->lock)
 		if (usb_pipeisoc(urb->pipe))
 			urb->start_frame = r8a66597_get_frame(hcd);
 
-		usb_hcd_unlink_urb_from_ep(r8a66597_to_hcd(r8a66597), urb);
-		spin_unlock(&r8a66597->lock);
-		usb_hcd_giveback_urb(hcd, urb, status);
-		spin_lock(&r8a66597->lock);
+		r8a66597_urb_done(r8a66597, urb, status);
 	}
 
 	if (restart) {

From 2c940db250c1610d95ea5331dc819b8bd4db96ae Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 4 Feb 2010 06:58:28 +0000
Subject: [PATCH 277/640] usb: r8a66597-hcd: Fix up spinlock recursion in root
 hub polling.

The current root hub polling code exhibits a spinlock recursion on the
private controller lock. r8a66597_root_hub_control() is called from
r8a66597_timer() which grabs the lock and disables IRQs. The following
chain emerges:

  r8a66597_timer() <-- lock taken
    r8a66597_root_hub_control()
      r8a66597_check_syssts()
        usb_hcd_poll_rh_status() <-- acquires the same lock
	/* insert death here */

The entire chain requires IRQs to be disabled, so we just unlock and
relock around the call to usb_hcd_poll_rh_status() while leaving the
IRQ state unchanged.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
---
 drivers/usb/host/r8a66597-hcd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 50a3e2d6a6cc..bee558aed427 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -1022,6 +1022,8 @@ static void start_root_hub_sampling(struct r8a66597 *r8a66597, int port,
 /* this function must be called with interrupt disabled */
 static void r8a66597_check_syssts(struct r8a66597 *r8a66597, int port,
 					u16 syssts)
+__releases(r8a66597->lock)
+__acquires(r8a66597->lock)
 {
 	if (syssts == SE0) {
 		r8a66597_write(r8a66597, ~ATTCH, get_intsts_reg(port));
@@ -1039,7 +1041,9 @@ static void r8a66597_check_syssts(struct r8a66597 *r8a66597, int port,
 			usb_hcd_resume_root_hub(r8a66597_to_hcd(r8a66597));
 	}
 
+	spin_unlock(&r8a66597->lock);
 	usb_hcd_poll_rh_status(r8a66597_to_hcd(r8a66597));
+	spin_lock(&r8a66597->lock);
 }
 
 /* this function must be called with interrupt disabled */

From a17538f93c16f0e15e35dc31eedad87e2d9c5c26 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 5 Feb 2010 13:41:54 +1000
Subject: [PATCH 278/640] drm/radeon/kms: rs400/480 MC setup is different than
 r300.

Boot testing on my rs480 laptop found the MC idle never happened
on startup, a quick check with AMD found the idle bit is in a different
place on the rs4xx than r300.

Implement a new rs400 mc idle function to fix this.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/rs400.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index eeeb0d6d7a44..287fcebfb4e6 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -223,15 +223,31 @@ int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 	return 0;
 }
 
+int rs400_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(0x0150);
+		if (tmp & (1 << 2)) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
 void rs400_gpu_init(struct radeon_device *rdev)
 {
 	/* FIXME: HDP same place on rs400 ? */
 	r100_hdp_reset(rdev);
 	/* FIXME: is this correct ? */
 	r420_pipes_init(rdev);
-	if (r300_mc_wait_for_idle(rdev)) {
-		printk(KERN_WARNING "Failed to wait MC idle while "
-		       "programming pipes. Bad things might happen.\n");
+	if (rs400_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "rs400: Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen. %08x\n", RREG32(0x150));
 	}
 }
 
@@ -370,8 +386,8 @@ void rs400_mc_program(struct radeon_device *rdev)
 	r100_mc_stop(rdev, &save);
 
 	/* Wait for mc idle */
-	if (r300_mc_wait_for_idle(rdev))
-		dev_warn(rdev->dev, "Wait MC idle timeout before updating MC.\n");
+	if (rs400_mc_wait_for_idle(rdev))
+		dev_warn(rdev->dev, "rs400: Wait MC idle timeout before updating MC.\n");
 	WREG32(R_000148_MC_FB_LOCATION,
 		S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
 		S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));

From 5ff55717674470b96562f931f778c878080755b7 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 5 Feb 2010 13:57:03 +1000
Subject: [PATCH 279/640] drm/radeon/kms: fix r300 vram width calculations

This was incorrect according to the docs and the UMS driver does
it like this.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r300.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index e699e6d053dd..43b55a030b4d 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -506,11 +506,14 @@ void r300_vram_info(struct radeon_device *rdev)
 
 	/* DDR for all card after R300 & IGP */
 	rdev->mc.vram_is_ddr = true;
+
 	tmp = RREG32(RADEON_MEM_CNTL);
-	if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
-		rdev->mc.vram_width = 128;
-	} else {
-		rdev->mc.vram_width = 64;
+	tmp &= R300_MEM_NUM_CHANNELS_MASK;
+	switch (tmp) {
+	case 0: rdev->mc.vram_width = 64; break;
+	case 1: rdev->mc.vram_width = 128; break;
+	case 2: rdev->mc.vram_width = 256; break;
+	default:  rdev->mc.vram_width = 128; break;
 	}
 
 	r100_vram_init_sizes(rdev);

From 38fd2c6ff526e6a59edfa8e08f6f0724646784c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Thu, 28 Jan 2010 18:16:30 +0100
Subject: [PATCH 280/640] drm/radeon/kms: suspend and resume audio stuff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes FDO bug #26214

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600.c       | 8 ++++++++
 drivers/gpu/drm/radeon/r600_audio.c | 3 +--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 9661a469f3bd..29faebc105df 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1950,6 +1950,13 @@ int r600_resume(struct radeon_device *rdev)
 		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
 		return r;
 	}
+
+	r = r600_audio_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: audio resume failed\n");
+		return r;
+	}
+
 	return r;
 }
 
@@ -1957,6 +1964,7 @@ int r600_suspend(struct radeon_device *rdev)
 {
 	int r;
 
+	r600_audio_fini(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r600_cp_stop(rdev);
 	rdev->cp.ready = false;
diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c
index b1c1d3433454..0dcb6904c4ff 100644
--- a/drivers/gpu/drm/radeon/r600_audio.c
+++ b/drivers/gpu/drm/radeon/r600_audio.c
@@ -261,7 +261,6 @@ void r600_audio_fini(struct radeon_device *rdev)
 	if (!r600_audio_chipset_supported(rdev))
 		return;
 
-	WREG32_P(R600_AUDIO_ENABLE, 0x0, ~0x81000000);
-
 	del_timer(&rdev->audio_timer);
+	WREG32_P(R600_AUDIO_ENABLE, 0x0, ~0x81000000);
 }

From 5ecaafdbf44b1ba400b746c60c401d54c7ee0863 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Fri, 5 Feb 2010 01:24:34 -0500
Subject: [PATCH 281/640] kprobes: Add mcount to the kprobes blacklist

Since mcount function can be called from everywhere,
it should be blacklisted. Moreover, the "mcount" symbol
is a special symbol name. So, it is better to put it in
the generic blacklist.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100205062433.3745.36726.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/kprobes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c3340e836c37..ccec774c716d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -94,6 +94,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
 	{"native_get_debugreg",},
 	{"irq_entries_start",},
 	{"common_interrupt",},
+	{"mcount",},	/* mcount can be called from everywhere */
 	{NULL}    /* Terminator */
 };
 

From 1eb6dc7dabcb4aa762d96f4f6978f3ef86321d68 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Thu, 4 Feb 2010 22:21:47 +0200
Subject: [PATCH 282/640] ALSA: hda - Delay switching to polling mode if an
 interrupt was missing

My sound codec seems sometimes (very rarely) to omit interrupts (ALC268)
However, interrupt mode still works.
Thus if we get timeout, poll the codec once.

If we get 3 such polls in a row, then switch to polling mode.

This patch is maybe an bandaid, but this might be a workaround for hardware bug.

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 565de38a3fc7..d853e2c33bb7 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -426,6 +426,7 @@ struct azx {
 
 	/* flags */
 	int position_fix;
+	int poll_count;
 	unsigned int running :1;
 	unsigned int initialized :1;
 	unsigned int single_cmd :1;
@@ -506,7 +507,7 @@ static char *driver_short_names[] __devinitdata = {
 #define get_azx_dev(substream) (substream->runtime->private_data)
 
 static int azx_acquire_irq(struct azx *chip, int do_disconnect);
-
+static int azx_send_cmd(struct hda_bus *bus, unsigned int val);
 /*
  * Interface for HD codec
  */
@@ -664,11 +665,12 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus,
 {
 	struct azx *chip = bus->private_data;
 	unsigned long timeout;
+	int do_poll = 0;
 
  again:
 	timeout = jiffies + msecs_to_jiffies(1000);
 	for (;;) {
-		if (chip->polling_mode) {
+		if (chip->polling_mode || do_poll) {
 			spin_lock_irq(&chip->reg_lock);
 			azx_update_rirb(chip);
 			spin_unlock_irq(&chip->reg_lock);
@@ -676,6 +678,9 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus,
 		if (!chip->rirb.cmds[addr]) {
 			smp_rmb();
 			bus->rirb_error = 0;
+
+			if (!do_poll)
+				chip->poll_count = 0;
 			return chip->rirb.res[addr]; /* the last value */
 		}
 		if (time_after(jiffies, timeout))
@@ -688,6 +693,16 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus,
 		}
 	}
 
+	if (!chip->polling_mode && chip->poll_count < 2) {
+		snd_printdd(SFX "azx_get_response timeout, "
+			   "polling the codec once: last cmd=0x%08x\n",
+			   chip->last_cmd[addr]);
+		do_poll = 1;
+		chip->poll_count++;
+		goto again;
+	}
+
+
 	if (!chip->polling_mode) {
 		snd_printk(KERN_WARNING SFX "azx_get_response timeout, "
 			   "switching to polling mode: last cmd=0x%08x\n",

From 9492837a6f54b069e13e40e3c89898bb8837a386 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Thu, 4 Feb 2010 22:26:37 +0200
Subject: [PATCH 283/640] ALSA: cosmetic: make hda intel interrupt name
 consistent with others

This renames the interrupt name in /proc/interrupt.
HDA Intel -> hda_intel

This also eliminates space from the name, probably helping some
parsers.
Don't think anybody depends on this name in userspace

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index d853e2c33bb7..b8faa6dc5abe 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2058,7 +2058,7 @@ static int azx_acquire_irq(struct azx *chip, int do_disconnect)
 {
 	if (request_irq(chip->pci->irq, azx_interrupt,
 			chip->msi ? 0 : IRQF_SHARED,
-			"HDA Intel", chip)) {
+			"hda_intel", chip)) {
 		printk(KERN_ERR "hda-intel: unable to grab IRQ %d, "
 		       "disabling device\n", chip->pci->irq);
 		if (do_disconnect)

From 9d4c7464458770d309169f7a7ce1ea6f8a4a7de5 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Fri, 5 Feb 2010 10:19:41 +0100
Subject: [PATCH 284/640] ALSA: ice1724 - aureon - fix wm8770 volume offset

The volume register is from 0..0x7f and 0..0x1a range is mute.
Also, fix mute combining in wm_vol_put(). The wrong behaviour was
noticed by Peter Christensen.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
---
 sound/pci/ice1712/aureon.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c
index 765d7bd4c3d4..9e66f6d306f8 100644
--- a/sound/pci/ice1712/aureon.c
+++ b/sound/pci/ice1712/aureon.c
@@ -703,11 +703,13 @@ static void wm_set_vol(struct snd_ice1712 *ice, unsigned int index, unsigned sho
 {
 	unsigned char nvol;
 
-	if ((master & WM_VOL_MUTE) || (vol & WM_VOL_MUTE))
+	if ((master & WM_VOL_MUTE) || (vol & WM_VOL_MUTE)) {
 		nvol = 0;
-	else
+	} else {
 		nvol = ((vol % WM_VOL_CNT) * (master % WM_VOL_CNT)) /
 								WM_VOL_MAX;
+		nvol += 0x1b;
+	}
 
 	wm_put(ice, index, nvol);
 	wm_put_nocache(ice, index, 0x180 | nvol);
@@ -778,7 +780,7 @@ static int wm_master_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_
 	for (ch = 0; ch < 2; ch++) {
 		unsigned int vol = ucontrol->value.integer.value[ch];
 		if (vol > WM_VOL_MAX)
-			continue;
+			vol = WM_VOL_MAX;
 		vol |= spec->master[ch] & WM_VOL_MUTE;
 		if (vol != spec->master[ch]) {
 			int dac;
@@ -834,8 +836,8 @@ static int wm_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *
 	for (i = 0; i < voices; i++) {
 		unsigned int vol = ucontrol->value.integer.value[i];
 		if (vol > WM_VOL_MAX)
-			continue;
-		vol |= spec->vol[ofs+i];
+			vol = WM_VOL_MAX;
+		vol |= spec->vol[ofs+i] & WM_VOL_MUTE;
 		if (vol != spec->vol[ofs+i]) {
 			spec->vol[ofs+i] = vol;
 			idx  = WM_DAC_ATTEN + ofs + i;

From ae54abed636d18f7939c965f21ad126001dbe34c Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Fri, 5 Feb 2010 13:11:45 +0100
Subject: [PATCH 285/640] cfq-iosched: split seeky coop queues after one slice

Currently we split seeky coop queues after 1s, which is too big. Below patch
marks seeky coop queue split_coop flag after one slice. After that, if new
requests come in, the queues will be splitted. Patch is suggested by Corrado.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Reviewed-by: Corrado Zoccolo <czoccolo@gmail.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/cfq-iosched.c | 49 +++++++++++++++------------------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 17b768d0d42f..023f4e69a337 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -42,16 +42,13 @@ static const int cfq_hist_divisor = 4;
  */
 #define CFQ_MIN_TT		(2)
 
-/*
- * Allow merged cfqqs to perform this amount of seeky I/O before
- * deciding to break the queues up again.
- */
-#define CFQQ_COOP_TOUT		(HZ)
-
 #define CFQ_SLICE_SCALE		(5)
 #define CFQ_HW_QUEUE_MIN	(5)
 #define CFQ_SERVICE_SHIFT       12
 
+#define CFQQ_SEEK_THR		8 * 1024
+#define CFQQ_SEEKY(cfqq)	((cfqq)->seek_mean > CFQQ_SEEK_THR)
+
 #define RQ_CIC(rq)		\
 	((struct cfq_io_context *) (rq)->elevator_private)
 #define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elevator_private2)
@@ -137,7 +134,6 @@ struct cfq_queue {
 	u64 seek_total;
 	sector_t seek_mean;
 	sector_t last_request_pos;
-	unsigned long seeky_start;
 
 	pid_t pid;
 
@@ -314,6 +310,7 @@ enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_slice_new,	/* no requests dispatched in slice */
 	CFQ_CFQQ_FLAG_sync,		/* synchronous queue */
 	CFQ_CFQQ_FLAG_coop,		/* cfqq is shared */
+	CFQ_CFQQ_FLAG_split_coop,	/* shared cfqq will be splitted */
 	CFQ_CFQQ_FLAG_deep,		/* sync cfqq experienced large depth */
 	CFQ_CFQQ_FLAG_wait_busy,	/* Waiting for next request */
 };
@@ -342,6 +339,7 @@ CFQ_CFQQ_FNS(prio_changed);
 CFQ_CFQQ_FNS(slice_new);
 CFQ_CFQQ_FNS(sync);
 CFQ_CFQQ_FNS(coop);
+CFQ_CFQQ_FNS(split_coop);
 CFQ_CFQQ_FNS(deep);
 CFQ_CFQQ_FNS(wait_busy);
 #undef CFQ_CFQQ_FNS
@@ -1565,6 +1563,15 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	cfq_clear_cfqq_wait_request(cfqq);
 	cfq_clear_cfqq_wait_busy(cfqq);
 
+	/*
+	 * If this cfqq is shared between multiple processes, check to
+	 * make sure that those processes are still issuing I/Os within
+	 * the mean seek distance.  If not, it may be time to break the
+	 * queues apart again.
+	 */
+	if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
+		cfq_mark_cfqq_split_coop(cfqq);
+
 	/*
 	 * store what was left of this slice, if the queue idled/timed out
 	 */
@@ -1663,9 +1670,6 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
 		return cfqd->last_position - blk_rq_pos(rq);
 }
 
-#define CFQQ_SEEK_THR		8 * 1024
-#define CFQQ_SEEKY(cfqq)	((cfqq)->seek_mean > CFQQ_SEEK_THR)
-
 static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			       struct request *rq, bool for_preempt)
 {
@@ -3000,19 +3004,6 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	total = cfqq->seek_total + (cfqq->seek_samples/2);
 	do_div(total, cfqq->seek_samples);
 	cfqq->seek_mean = (sector_t)total;
-
-	/*
-	 * If this cfqq is shared between multiple processes, check to
-	 * make sure that those processes are still issuing I/Os within
-	 * the mean seek distance.  If not, it may be time to break the
-	 * queues apart again.
-	 */
-	if (cfq_cfqq_coop(cfqq)) {
-		if (CFQQ_SEEKY(cfqq) && !cfqq->seeky_start)
-			cfqq->seeky_start = jiffies;
-		else if (!CFQQ_SEEKY(cfqq))
-			cfqq->seeky_start = 0;
-	}
 }
 
 /*
@@ -3453,14 +3444,6 @@ cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
 	return cic_to_cfqq(cic, 1);
 }
 
-static int should_split_cfqq(struct cfq_queue *cfqq)
-{
-	if (cfqq->seeky_start &&
-	    time_after(jiffies, cfqq->seeky_start + CFQQ_COOP_TOUT))
-		return 1;
-	return 0;
-}
-
 /*
  * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
  * was the last process referring to said cfqq.
@@ -3469,9 +3452,9 @@ static struct cfq_queue *
 split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
 {
 	if (cfqq_process_refs(cfqq) == 1) {
-		cfqq->seeky_start = 0;
 		cfqq->pid = current->pid;
 		cfq_clear_cfqq_coop(cfqq);
+		cfq_clear_cfqq_split_coop(cfqq);
 		return cfqq;
 	}
 
@@ -3510,7 +3493,7 @@ new_queue:
 		/*
 		 * If the queue was seeky for too long, break it apart.
 		 */
-		if (cfq_cfqq_coop(cfqq) && should_split_cfqq(cfqq)) {
+		if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
 			cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
 			cfqq = split_cfqq(cic, cfqq);
 			if (!cfqq)

From 531c2dc70d339c5dfa8c3eb628c3459dc6f3a075 Mon Sep 17 00:00:00 2001
From: "Stephen M. Cameron" <scameron@beardog.cce.hp.com>
Date: Fri, 5 Feb 2010 13:14:04 +0100
Subject: [PATCH 286/640] cciss: Make cciss_seq_show handle holes in the
 h->drv[] array

It is possible (and expected) for there to be holes in the h->drv[]
array, that is, some elements may be NULL pointers.  cciss_seq_show
needs to be made aware of this possibility to avoid an Oops.

To reproduce the Oops which this fixes:

1) Create two "arrays" in the Array Configuratino Utility and
   several logical drives on each array.
2) cat /proc/driver/cciss/cciss* in an infinite loop
3) delete some of the logical drives in the first "array."

Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/cciss.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 873e594860d3..9291614ac6b7 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -337,6 +337,9 @@ static int cciss_seq_show(struct seq_file *seq, void *v)
 	if (*pos > h->highest_lun)
 		return 0;
 
+	if (drv == NULL) /* it's possible for h->drv[] to have holes. */
+		return 0;
+
 	if (drv->heads == 0)
 		return 0;
 

From 3b9447fb7fa1829731290e64ef928d4f6461310a Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Fri, 5 Feb 2010 00:55:33 +0200
Subject: [PATCH 287/640] ASoC: pandora: Add APLL supply to fix audio output

Pandora's external DAC is using 256*Fs output from the TWL4030
codec, and TWL4030 needs to have APLL enabled for it's 256*Fs
output to function.

Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/omap/omap3pandora.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c
index 71b2c161158d..68980c19a3bc 100644
--- a/sound/soc/omap/omap3pandora.c
+++ b/sound/soc/omap/omap3pandora.c
@@ -145,6 +145,7 @@ static const struct snd_soc_dapm_widget omap3pandora_in_dapm_widgets[] = {
 };
 
 static const struct snd_soc_dapm_route omap3pandora_out_map[] = {
+	{"PCM DAC", NULL, "APLL Enable"},
 	{"Headphone Amplifier", NULL, "PCM DAC"},
 	{"Line Out", NULL, "PCM DAC"},
 	{"Headphone Jack", NULL, "Headphone Amplifier"},

From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 5 Feb 2010 16:09:11 +1100
Subject: [PATCH 288/640] percpu: add __percpu for sparse

This is to make the annotation of percpu variables during the next merge
window less painfull.

Extracted from a patch by Rusty Russell.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5be3dab4a695..188fcae10a99 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__

From 73d2eaac8a3f1ec1d6d0a80ea7302a439ca9b933 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Fri, 5 Feb 2010 01:42:43 -0500
Subject: [PATCH 289/640] CS5536: apply pci quirk for BIOS SMBUS bug

The new cs5535-* drivers use PCI header config info rather than MSRs to
determine the memory region to use for things like GPIOs and MFGPTs.  As
anticipated, we've run into a buggy BIOS:

[    0.081818] pci 0000:00:14.0: reg 10: [io  0x6000-0x7fff]
[    0.081906] pci 0000:00:14.0: reg 14: [io  0x6100-0x61ff]
[    0.082015] pci 0000:00:14.0: reg 18: [io  0x6200-0x63ff]
[    0.082917] pci 0000:00:14.2: reg 20: [io  0xe000-0xe00f]
[    0.083551] pci 0000:00:15.0: reg 10: [mem 0xa0010000-0xa0010fff]
[    0.084436] pci 0000:00:15.1: reg 10: [mem 0xa0011000-0xa0011fff]
[    0.088816] PCI: pci_cache_line_size set to 32 bytes
[    0.088938] pci 0000:00:14.0: address space collision: [io 0x6100-0x61ff] already in use
[    0.089052] pci 0000:00:14.0: can't reserve [io  0x6100-0x61ff]

This is a Soekris board, and its BIOS sets the size of the PCI ISA bridge
device's BAR0 to 8k.  In reality, it should be 8 bytes (BAR0 is used for
SMBus stuff).  This quirk checks for an incorrect size, and resets it
accordingly.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Tested-by: Leigh Porter <leigh@leighporter.org>
Tested-by: Jens Rottmann <JRottmann@LiPPERTEmbedded.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/quirks.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index c74694345b6e..d58b94030ef3 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -338,6 +338,23 @@ static void __devinit quirk_s3_64M(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3,	PCI_DEVICE_ID_S3_868,		quirk_s3_64M);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_S3,	PCI_DEVICE_ID_S3_968,		quirk_s3_64M);
 
+/*
+ * Some CS5536 BIOSes (for example, the Soekris NET5501 board w/ comBIOS
+ * ver. 1.33  20070103) don't set the correct ISA PCI region header info.
+ * BAR0 should be 8 bytes; instead, it may be set to something like 8k
+ * (which conflicts w/ BAR1's memory range).
+ */
+static void __devinit quirk_cs5536_vsa(struct pci_dev *dev)
+{
+	if (pci_resource_len(dev, 0) != 8) {
+		struct resource *res = &dev->resource[0];
+		res->end = res->start + 8 - 1;
+		dev_info(&dev->dev, "CS5536 ISA bridge bug detected "
+				"(incorrect header); workaround applied.\n");
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, quirk_cs5536_vsa);
+
 static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region,
 	unsigned size, int nr, const char *name)
 {

From 1c010ff8912cbc08d80e865aab9c32b6b00c527d Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 5 Feb 2010 17:48:13 +0100
Subject: [PATCH 290/640] i2c-tiny-usb: Fix on big-endian systems

The functionality bit vector is always returned as a little-endian
32-bit number by the device, so it must be byte-swapped to the host
endianness.

On the other hand, the delay value is handled by the USB stack, so no
byte swapping is needed on our side.

This fixes bug #15105:
http://bugzilla.kernel.org/show_bug.cgi?id=15105

Reported-by: Jens Richter <jens@richter-stutensee.de>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Jens Richter <jens@richter-stutensee.de>
Cc: Till Harbaum <till@harbaum.org>
Cc: stable@kernel.org
---
 drivers/i2c/busses/i2c-tiny-usb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c
index b1c050ff311d..e29b6d5ba8ef 100644
--- a/drivers/i2c/busses/i2c-tiny-usb.c
+++ b/drivers/i2c/busses/i2c-tiny-usb.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/module.h>
+#include <linux/types.h>
 
 /* include interfaces to usb layer */
 #include <linux/usb.h>
@@ -31,8 +32,8 @@
 #define CMD_I2C_IO_END		(1<<1)
 
 /* i2c bit delay, default is 10us -> 100kHz */
-static int delay = 10;
-module_param(delay, int, 0);
+static unsigned short delay = 10;
+module_param(delay, ushort, 0);
 MODULE_PARM_DESC(delay, "bit delay in microseconds, "
 		 "e.g. 10 for 100kHz (default is 100kHz)");
 
@@ -109,7 +110,7 @@ static int usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 
 static u32 usb_func(struct i2c_adapter *adapter)
 {
-	u32 func;
+	__le32 func;
 
 	/* get functionality from adapter */
 	if (usb_read(adapter, CMD_GET_FUNC, 0, 0, &func, sizeof(func)) !=
@@ -118,7 +119,7 @@ static u32 usb_func(struct i2c_adapter *adapter)
 		return 0;
 	}
 
-	return func;
+	return le32_to_cpu(func);
 }
 
 /* This is the actual algorithm we define */
@@ -216,8 +217,7 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface,
 		 "i2c-tiny-usb at bus %03d device %03d",
 		 dev->usb_dev->bus->busnum, dev->usb_dev->devnum);
 
-	if (usb_write(&dev->adapter, CMD_SET_DELAY,
-		      cpu_to_le16(delay), 0, NULL, 0) != 0) {
+	if (usb_write(&dev->adapter, CMD_SET_DELAY, delay, 0, NULL, 0) != 0) {
 		dev_err(&dev->adapter.dev,
 			"failure setting delay to %dus\n", delay);
 		retval = -EIO;

From 15c697ce1c5b408c5e20dcdc6aea2968d1125b75 Mon Sep 17 00:00:00 2001
From: Michael Poole <mdpoole@troilus.org>
Date: Fri, 5 Feb 2010 12:23:43 -0500
Subject: [PATCH 291/640] Bluetooth: Keep a copy of each HID device's report
 descriptor

The report descriptor is read by user space (via the Service
Discovery Protocol), so it is only available during the ioctl
to connect. However, the HID probe function that needs the
descriptor might not be called until a specific module is
loaded. Keep a copy of the descriptor so it is available for
later use.

Signed-off-by: Michael Poole <mdpoole@troilus.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hidp/core.c | 49 +++++++++++++++++++--------------------
 net/bluetooth/hidp/hidp.h |  4 +++-
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 6cf526d06e21..fc6ec1e72652 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -703,29 +703,9 @@ static void hidp_close(struct hid_device *hid)
 static int hidp_parse(struct hid_device *hid)
 {
 	struct hidp_session *session = hid->driver_data;
-	struct hidp_connadd_req *req = session->req;
-	unsigned char *buf;
-	int ret;
 
-	buf = kmalloc(req->rd_size, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	if (copy_from_user(buf, req->rd_data, req->rd_size)) {
-		kfree(buf);
-		return -EFAULT;
-	}
-
-	ret = hid_parse_report(session->hid, buf, req->rd_size);
-
-	kfree(buf);
-
-	if (ret)
-		return ret;
-
-	session->req = NULL;
-
-	return 0;
+	return hid_parse_report(session->hid, session->rd_data,
+			session->rd_size);
 }
 
 static int hidp_start(struct hid_device *hid)
@@ -770,12 +750,24 @@ static int hidp_setup_hid(struct hidp_session *session,
 	bdaddr_t src, dst;
 	int err;
 
+	session->rd_data = kzalloc(req->rd_size, GFP_KERNEL);
+	if (!session->rd_data)
+		return -ENOMEM;
+
+	if (copy_from_user(session->rd_data, req->rd_data, req->rd_size)) {
+		err = -EFAULT;
+		goto fault;
+	}
+	session->rd_size = req->rd_size;
+
 	hid = hid_allocate_device();
-	if (IS_ERR(hid))
-		return PTR_ERR(hid);
+	if (IS_ERR(hid)) {
+		err = PTR_ERR(hid);
+		goto fault;
+	}
 
 	session->hid = hid;
-	session->req = req;
+
 	hid->driver_data = session;
 
 	baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
@@ -806,6 +798,10 @@ failed:
 	hid_destroy_device(hid);
 	session->hid = NULL;
 
+fault:
+	kfree(session->rd_data);
+	session->rd_data = NULL;
+
 	return err;
 }
 
@@ -900,6 +896,9 @@ unlink:
 		session->hid = NULL;
 	}
 
+	kfree(session->rd_data);
+	session->rd_data = NULL;
+
 purge:
 	skb_queue_purge(&session->ctrl_transmit);
 	skb_queue_purge(&session->intr_transmit);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index faf3d74c3586..a4e215d50c10 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -154,7 +154,9 @@ struct hidp_session {
 	struct sk_buff_head ctrl_transmit;
 	struct sk_buff_head intr_transmit;
 
-	struct hidp_connadd_req *req;
+	/* Report descriptor */
+	__u8 *rd_data;
+	uint rd_size;
 };
 
 static inline void hidp_schedule(struct hidp_session *session)

From 85f8d3e5faea8bd36c3e5196f8334f7db45e19b2 Mon Sep 17 00:00:00 2001
From: Ray Copeland <ray.copeland@aprius.com>
Date: Fri, 5 Feb 2010 19:58:35 +0100
Subject: [PATCH 292/640] hwmon: (adt7462) Wrong ADT7462_VOLT_COUNT

The #define ADT7462_VOLT_COUNT is wrong, it should be 13 not 12. All the
for loops that use this as a limit count are of the typical form, "for
(n = 0; n < ADT7462_VOLT_COUNT; n++)", so to loop through all voltages
w/o missing the last one it is necessary for the count to be one greater
than it is.  (Specifically, you will miss the +1.5V 3GPIO input with count
= 12 vs. 13.)

Signed-off-by: Ray Copeland <ray.copeland@aprius.com>
Acked-by: "Darrick J. Wong" <djwong@us.ibm.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: stable@kernel.org
---
 drivers/hwmon/adt7462.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c
index a31e77c776ae..b8156b4893bb 100644
--- a/drivers/hwmon/adt7462.c
+++ b/drivers/hwmon/adt7462.c
@@ -179,7 +179,7 @@ static const unsigned short normal_i2c[] = { 0x58, 0x5C, I2C_CLIENT_END };
  *
  * Some, but not all, of these voltages have low/high limits.
  */
-#define ADT7462_VOLT_COUNT	12
+#define ADT7462_VOLT_COUNT	13
 
 #define ADT7462_VENDOR		0x41
 #define ADT7462_DEVICE		0x62

From 197027e6ef830d60e10f76efc8d12bf3b6c35db5 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 5 Feb 2010 19:58:36 +0100
Subject: [PATCH 293/640] hwmon: (lm78) Request I/O ports individually for
 probing

Different motherboards have different PNP declarations for LM78/LM79
chips. Some declare the whole range of I/O ports (8 ports), some
declare only the useful ports (2 ports at offset 5) and some declare
fancy ranges, for example 4 ports at offset 4. To properly handle all
cases, request all ports individually for probing. After we have
determined that we really have an LM78 or LM79 chip, the useful port
range will be requested again, as a single block.

This fixes the driver on the Olivetti M3000 DT 540, at least.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: stable@kernel.org
---
 drivers/hwmon/lm78.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index cadcbd90ff3b..72ff2c4e757d 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -851,17 +851,16 @@ static struct lm78_data *lm78_update_device(struct device *dev)
 static int __init lm78_isa_found(unsigned short address)
 {
 	int val, save, found = 0;
+	int port;
 
-	/* We have to request the region in two parts because some
-	   boards declare base+4 to base+7 as a PNP device */
-	if (!request_region(address, 4, "lm78")) {
-		pr_debug("lm78: Failed to request low part of region\n");
-		return 0;
-	}
-	if (!request_region(address + 4, 4, "lm78")) {
-		pr_debug("lm78: Failed to request high part of region\n");
-		release_region(address, 4);
-		return 0;
+	/* Some boards declare base+0 to base+7 as a PNP device, some base+4
+	 * to base+7 and some base+5 to base+6. So we better request each port
+	 * individually for the probing phase. */
+	for (port = address; port < address + LM78_EXTENT; port++) {
+		if (!request_region(port, 1, "lm78")) {
+			pr_debug("lm78: Failed to request port 0x%x\n", port);
+			goto release;
+		}
 	}
 
 #define REALLY_SLOW_IO
@@ -925,8 +924,8 @@ static int __init lm78_isa_found(unsigned short address)
 			val & 0x80 ? "LM79" : "LM78", (int)address);
 
  release:
-	release_region(address + 4, 4);
-	release_region(address, 4);
+	for (port--; port >= address; port--)
+		release_region(port, 1);
 	return found;
 }
 

From b0bcdd3cd0adb85a7686b396ba50493871b1135c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 5 Feb 2010 19:58:36 +0100
Subject: [PATCH 294/640] hwmon: (w83781d) Request I/O ports individually for
 probing

Different motherboards have different PNP declarations for
W83781D/W83782D chips. Some declare the whole range of I/O ports (8
ports), some declare only the useful ports (2 ports at offset 5) and
some declare fancy ranges, for example 4 ports at offset 4. To
properly handle all cases, request all ports individually for probing.
After we have determined that we really have a W83781D or W83782D
chip, the useful port range will be requested again, as a single
block.

I did not see a board which needs this yet, but I know of one for lm78
driver and I'd like to keep the logic of these two drivers in sync.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: stable@kernel.org
---
 drivers/hwmon/w83781d.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index 05f9225b6f94..32d4adee73db 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -1793,17 +1793,17 @@ static int __init
 w83781d_isa_found(unsigned short address)
 {
 	int val, save, found = 0;
+	int port;
 
-	/* We have to request the region in two parts because some
-	   boards declare base+4 to base+7 as a PNP device */
-	if (!request_region(address, 4, "w83781d")) {
-		pr_debug("w83781d: Failed to request low part of region\n");
-		return 0;
-	}
-	if (!request_region(address + 4, 4, "w83781d")) {
-		pr_debug("w83781d: Failed to request high part of region\n");
-		release_region(address, 4);
-		return 0;
+	/* Some boards declare base+0 to base+7 as a PNP device, some base+4
+	 * to base+7 and some base+5 to base+6. So we better request each port
+	 * individually for the probing phase. */
+	for (port = address; port < address + W83781D_EXTENT; port++) {
+		if (!request_region(port, 1, "w83781d")) {
+			pr_debug("w83781d: Failed to request port 0x%x\n",
+				 port);
+			goto release;
+		}
 	}
 
 #define REALLY_SLOW_IO
@@ -1877,8 +1877,8 @@ w83781d_isa_found(unsigned short address)
 			val == 0x30 ? "W83782D" : "W83781D", (int)address);
 
  release:
-	release_region(address + 4, 4);
-	release_region(address, 4);
+	for (port--; port >= address; port--)
+		release_region(port, 1);
 	return found;
 }
 

From b953df7c70740cd7593072ebec77a8f658505630 Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Fri, 5 Feb 2010 21:52:37 +0800
Subject: [PATCH 295/640] dmaengine: correct onstack wait_queue_head
 declaration

Use DECLARE_WAIT_QUEUE_HEAD_ONSTACK to make lockdep happy

Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Cc: Maciej Sosnowski <maciej.sosnowski@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmatest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 8b905161fbf4..948d563941c9 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -467,7 +467,7 @@ err_srcs:
 
 	if (iterations > 0)
 		while (!kthread_should_stop()) {
-			DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit);
+			DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
 			interruptible_sleep_on(&wait_dmatest_exit);
 		}
 

From bd6b0bf87d8cf3d9cfeadeb12dbf5449e3e50765 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 5 Feb 2010 10:26:27 +0100
Subject: [PATCH 296/640] ocfs2: Fix contiguousness check in
 ocfs2_try_to_merge_extent_map()

The wrong member was compared in the continguousness check.

Acked-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/extent_map.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index d35a27f4523e..5328529e7fd2 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -192,7 +192,7 @@ static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 		emi->ei_clusters += ins->ei_clusters;
 		return 1;
 	} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
-		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
+		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 		   ins->ei_flags == emi->ei_flags) {
 		emi->ei_phys = ins->ei_phys;
 		emi->ei_cpos = ins->ei_cpos;

From f12f98dba6ea1517cd7fbb912208893b9c014c15 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 5 Feb 2010 13:14:00 -0500
Subject: [PATCH 297/640] cifs: fix length calculation for converted unicode
 readdir names

cifs_from_ucs2 returns the length of the converted name, including the
length of the NULL terminator. We don't want to include the NULL
terminator in the dentry name length however since that'll throw off the
hash calculation for the dentry cache.

I believe that this is the root cause of several problems that have
cropped up recently that seem to be papered over with the "noserverino"
mount option. More confirmation of that would be good, but this is
clearly a bug and it fixes at least one reproducible problem that
was reported.

This patch fixes at least this reproducer in this kernel.org bug:

    http://bugzilla.kernel.org/show_bug.cgi?id=15088#c12

Reported-by: Bjorn Tore Sund <bjorn.sund@it.uib.no>
Acked-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/readdir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f84062f9a985..f5618f8cc462 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -666,6 +666,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
 					   min(len, max_len), nlt,
 					   cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
+		pqst->len -= nls_nullsize(nlt);
 	} else {
 		pqst->name = filename;
 		pqst->len = len;

From 301a6a317797ca362951ea21da397c05236f0070 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 6 Feb 2010 07:08:53 +0000
Subject: [PATCH 298/640] [CIFS] Maximum username length check in session setup
 does not match

Fix length check reported by D. Binderman (see below)

d binderman <dcb314@hotmail.com> wrote:
>
> I just ran the sourceforge tool cppcheck over the source code of the
> new Linux kernel 2.6.33-rc6
>
> It said
>
> [./cifs/sess.c:250]: (error) Buffer access out-of-bounds

May turn out to be harmless, but best to be safe. Note max
username length is defined to 32 due to Linux (Windows
maximum is 20).

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/sess.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7085a6275c4c..aaa9c1c5a5bd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -223,9 +223,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 		/* null user mount */
 		*bcc_ptr = 0;
 		*(bcc_ptr+1) = 0;
-	} else { /* 300 should be long enough for any conceivable user name */
+	} else {
 		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
-					  300, nls_cp);
+					  MAX_USERNAME_SIZE, nls_cp);
 	}
 	bcc_ptr += 2 * bytes_ret;
 	bcc_ptr += 2; /* account for null termination */
@@ -246,11 +246,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 	/* copy user */
 	if (ses->userName == NULL) {
 		/* BB what about null user mounts - check that we do this BB */
-	} else { /* 300 should be long enough for any conceivable user name */
-		strncpy(bcc_ptr, ses->userName, 300);
+	} else {
+		strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE);
 	}
-	/* BB improve check for overflow */
-	bcc_ptr += strnlen(ses->userName, 300);
+	bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
 	*bcc_ptr = 0;
 	bcc_ptr++; /* account for null termination */
 

From 6f5a55f1a6c5abee15a0e878e5c74d9f1569b8b0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 5 Feb 2010 16:16:50 -0800
Subject: [PATCH 299/640] Fix potential crash with sys_move_pages

We incorrectly depended on the 'node_state/node_isset()' functions
testing the node range, rather than checking it explicitly.  That's not
reliable, even if it might often happen to work.  So do the proper
explicit test.

Reported-by: Marcus Meissner <meissner@suse.de>
Acked-and-tested-by: Brice Goglin <Brice.Goglin@inria.fr>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/migrate.c b/mm/migrate.c
index efddbf0926b2..9a0db5bbabe4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
 				goto out_pm;
 
 			err = -ENODEV;
+			if (node < 0 || node >= MAX_NUMNODES)
+				goto out_pm;
+
 			if (!node_state(node, N_HIGH_MEMORY))
 				goto out_pm;
 

From 29275254caedfedce960cfe6df24b90cb04fe431 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 6 Feb 2010 14:17:12 -0800
Subject: [PATCH 300/640] Linux 2.6.33-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 394aec712c7d..f8e02e9491d0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 33
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*

From 8dd5ca532c2d2c2b85f16bc038ebfff05b8853e1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Jan 2010 22:11:38 -0500
Subject: [PATCH 301/640] befs: fix leak

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/befs/linuxvfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 33baf27fac78..34ddda888e63 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -873,6 +873,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	brelse(bh);
 
       unacquire_priv_sbp:
+	kfree(befs_sb->mount_opts.iocharset);
 	kfree(sb->s_fs_info);
 
       unacquire_none:

From 4b06e5b9ad8abb20105b2b25e42c509ebe9b2d76 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Fri, 29 Jan 2010 09:56:22 +0900
Subject: [PATCH 302/640] freeze_bdev: don't deactivate successfully frozen
 MS_RDONLY sb

Thanks Thomas and Christoph for testing and review.
I removed 'smp_wmb()' before up_write from the previous patch,
since up_write() should have necessary ordering constraints.
(I.e. the change of s_frozen is visible to others after up_write)
I'm quite sure the change is harmless but if you are uncomfortable
with Tested-by/Reviewed-by on the modified patch, please remove them.

If MS_RDONLY, freeze_bdev should just up_write(s_umount) instead of
deactivate_locked_super().
Also, keep sb->s_frozen consistent so that remount can check the frozen state.

Otherwise a crash reported here can happen:
http://lkml.org/lkml/2010/1/16/37
http://lkml.org/lkml/2010/1/28/53

This patch should be applied for 2.6.32 stable series, too.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Thomas Backlund <tmb@mandriva.org>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: stable@kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 73d6a735b8f3..d11d0289f3d2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -246,7 +246,8 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	if (!sb)
 		goto out;
 	if (sb->s_flags & MS_RDONLY) {
-		deactivate_locked_super(sb);
+		sb->s_frozen = SB_FREEZE_TRANS;
+		up_write(&sb->s_umount);
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		return sb;
 	}
@@ -307,7 +308,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	BUG_ON(sb->s_bdev != bdev);
 	down_write(&sb->s_umount);
 	if (sb->s_flags & MS_RDONLY)
-		goto out_deactivate;
+		goto out_unfrozen;
 
 	if (sb->s_op->unfreeze_fs) {
 		error = sb->s_op->unfreeze_fs(sb);
@@ -321,11 +322,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 		}
 	}
 
+out_unfrozen:
 	sb->s_frozen = SB_UNFROZEN;
 	smp_wmb();
 	wake_up(&sb->s_wait_unfrozen);
 
-out_deactivate:
 	if (sb)
 		deactivate_locked_super(sb);
 out_unlock:

From 1e41568d7378d1ba8c64ba137b9ddd00b59f893a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Jan 2010 05:43:08 -0500
Subject: [PATCH 303/640] Take ima_path_check() in nfsd past dentry_open() in
 nfsd_open()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/vfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c194793b642b..325959e264ce 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,6 +752,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
+	host_err = ima_path_check(&(*filp)->f_path,
+				  access & (MAY_READ | MAY_WRITE | MAY_EXEC));
 out_nfserr:
 	err = nfserrno(host_err);
 out:
@@ -2127,7 +2129,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 	 */
 	path.mnt = exp->ex_path.mnt;
 	path.dentry = dentry;
-	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
 nfsd_out:
 	return err? nfserrno(err) : 0;
 }

From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2010 15:35:41 -0500
Subject: [PATCH 304/640] fix ima breakage

The "Untangling ima mess, part 2 with counters" patch messed
up the counters.  Based on conversations with Al Viro, this patch
streamlines ima_path_check() by removing the counter maintaince.
The counters are now updated independently, from measuring the file,
in __dentry_open() and alloc_file() by calling ima_counts_get().
ima_path_check() is called from nfsd and do_filp_open().
It also did not measure all files that should have been measured.
Reason: ima_path_check() got bogus value passed as mask.
[AV: mea culpa]
[AV: add missing nfsd bits]

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |   6 +-
 fs/nfsd/vfs.c                     |   3 +-
 include/linux/ima.h               |   4 +-
 security/integrity/ima/ima_main.c | 236 ++++++++++++------------------
 4 files changed, 97 insertions(+), 152 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 94a5e60779f9..cd77b6375efd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,8 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC));
+			error = ima_path_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1797,8 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC));
+		error = ima_path_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 325959e264ce..32477e3a645c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,8 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(&(*filp)->f_path,
-				  access & (MAY_READ | MAY_WRITE | MAY_EXEC));
+	host_err = ima_path_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 99dc6d5cf7e5..aa55a8f1f5b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a89f44d5e030..75aee18f6163 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -84,6 +84,36 @@ out:
 	return found;
 }
 
+/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
+ *
+ * When opening a file for read, if the file is already open for write,
+ * the file could change, resulting in a file measurement error.
+ *
+ * Opening a file for write, if the file is already open for read, results
+ * in a time of measure, time of use (ToMToU) error.
+ *
+ * In either case invalidate the PCR.
+ */
+enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
+static void ima_read_write_check(enum iint_pcr_error error,
+				 struct ima_iint_cache *iint,
+				 struct inode *inode,
+				 const unsigned char *filename)
+{
+	switch (error) {
+	case TOMTOU:
+		if (iint->readcount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "ToMToU");
+		break;
+	case OPEN_WRITERS:
+		if (iint->writecount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "open_writers");
+		break;
+	}
+}
+
 /*
  * Update the counts given an fmode_t
  */
@@ -98,6 +128,47 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
+/*
+ * ima_counts_get - increment file counts
+ *
+ * Maintain read/write counters for all files, but only
+ * invalidate the PCR for measured files:
+ * 	- Opening a file for write when already open for read,
+ *	  results in a time of measure, time of use (ToMToU) error.
+ *	- Opening a file for read when already open for write,
+ * 	  could result in a file measurement error.
+ *
+ */
+void ima_counts_get(struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	fmode_t mode = file->f_mode;
+	struct ima_iint_cache *iint;
+	int rc;
+
+	if (!ima_initialized || !S_ISREG(inode->i_mode))
+		return;
+	iint = ima_iint_find_get(inode);
+	if (!iint)
+		return;
+	mutex_lock(&iint->mutex);
+	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
+	if (rc < 0)
+		goto out;
+
+	if (mode & FMODE_WRITE) {
+		ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name);
+		goto out;
+	}
+	ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name);
+out:
+	ima_inc_counts(iint, file->f_mode);
+	mutex_unlock(&iint->mutex);
+
+	kref_put(&iint->refcount, iint_free);
+}
+
 /*
  * Decrement ima counts
  */
@@ -153,123 +224,6 @@ void ima_file_free(struct file *file)
 	kref_put(&iint->refcount, iint_free);
 }
 
-/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
- *
- * When opening a file for read, if the file is already open for write,
- * the file could change, resulting in a file measurement error.
- *
- * Opening a file for write, if the file is already open for read, results
- * in a time of measure, time of use (ToMToU) error.
- *
- * In either case invalidate the PCR.
- */
-enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
-static void ima_read_write_check(enum iint_pcr_error error,
-				 struct ima_iint_cache *iint,
-				 struct inode *inode,
-				 const unsigned char *filename)
-{
-	switch (error) {
-	case TOMTOU:
-		if (iint->readcount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "ToMToU");
-		break;
-	case OPEN_WRITERS:
-		if (iint->writecount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "open_writers");
-		break;
-	}
-}
-
-static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
-				const unsigned char *filename)
-{
-	int rc = 0;
-
-	ima_inc_counts(iint, file->f_mode);
-
-	rc = ima_collect_measurement(iint, file);
-	if (!rc)
-		ima_store_measurement(iint, file, filename);
-	return rc;
-}
-
-/**
- * ima_path_check - based on policy, collect/store measurement.
- * @path: contains a pointer to the path to be measured
- * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
- *
- * Measure the file being open for readonly, based on the
- * ima_must_measure() policy decision.
- *
- * Keep read/write counters for all files, but only
- * invalidate the PCR for measured files:
- * 	- Opening a file for write when already open for read,
- *	  results in a time of measure, time of use (ToMToU) error.
- *	- Opening a file for read when already open for write,
- * 	  could result in a file measurement error.
- *
- * Always return 0 and audit dentry_open failures.
- * (Return code will be based upon measurement appraisal.)
- */
-int ima_path_check(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-	struct file *file = NULL;
-	int rc;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return 0;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return 0;
-
-	mutex_lock(&iint->mutex);
-
-	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
-	if (rc < 0)
-		goto out;
-
-	if ((mask & MAY_WRITE) || (mask == 0))
-		ima_read_write_check(TOMTOU, iint, inode,
-				     path->dentry->d_name.name);
-
-	if ((mask & (MAY_WRITE | MAY_READ | MAY_EXEC)) != MAY_READ)
-		goto out;
-
-	ima_read_write_check(OPEN_WRITERS, iint, inode,
-			     path->dentry->d_name.name);
-	if (!(iint->flags & IMA_MEASURED)) {
-		struct dentry *dentry = dget(path->dentry);
-		struct vfsmount *mnt = mntget(path->mnt);
-
-		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
-				   current_cred());
-		if (IS_ERR(file)) {
-			int audit_info = 0;
-
-			integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-					    dentry->d_name.name,
-					    "add_measurement",
-					    "dentry_open failed",
-					    1, audit_info);
-			file = NULL;
-			goto out;
-		}
-		rc = get_path_measurement(iint, file, dentry->d_name.name);
-	}
-out:
-	mutex_unlock(&iint->mutex);
-	if (file)
-		fput(file);
-	kref_put(&iint->refcount, iint_free);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ima_path_check);
-
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
 {
@@ -297,33 +251,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_get - increment file counts
- *
- * - for IPC shm and shmat file.
- * - for nfsd exported files.
- *
- * Increment the counts for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_counts_get(struct file *file)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-	mutex_lock(&iint->mutex);
-	ima_inc_counts(iint, file->f_mode);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-EXPORT_SYMBOL_GPL(ima_counts_get);
-
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
  * @file: pointer to the file to be measured (May be NULL)
@@ -369,6 +296,27 @@ int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
+/**
+ * ima_path_check - based on policy, collect/store measurement.
+ * @file: pointer to the file to be measured
+ * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
+ *
+ * Measure files based on the ima_must_measure() policy decision.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * (Return code will be based upon measurement appraisal.)
+ */
+int ima_path_check(struct file *file, int mask)
+{
+	int rc;
+
+	rc = process_measurement(file, file->f_dentry->d_name.name,
+				 mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+				 PATH_CHECK);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ima_path_check);
+
 static int __init init_ima(void)
 {
 	int error;

From 54bb6552bd9405dc7685653157a4ec260c77a71c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 9 Dec 2009 15:29:01 -0500
Subject: [PATCH 305/640] ima: initialize ima before inodes can be allocated

ima wants to create an inode information struct (iint) when inodes are
allocated.  This means that at least the part of ima which does this
allocation (the allocation is filled with information later) should
before any inodes are created.  To accomplish this we split the ima
initialization routine placing the kmem cache allocator inside a
security_initcall() function.  Since this makes use of radix trees we also
need to make sure that is initialized before security_initcall().

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 init/main.c                       | 2 +-
 security/integrity/ima/ima.h      | 1 -
 security/integrity/ima/ima_iint.c | 9 +++------
 security/integrity/ima/ima_main.c | 1 -
 4 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/init/main.c b/init/main.c
index dac44a9356a5..4cb47a159f02 100644
--- a/init/main.c
+++ b/init/main.c
@@ -657,9 +657,9 @@ asmlinkage void __init start_kernel(void)
 	proc_caches_init();
 	buffer_init();
 	key_init();
+	radix_tree_init();
 	security_init();
 	vfs_caches_init(totalram_pages);
-	radix_tree_init();
 	signals_init();
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c41afe6639a0..aa25a7eb2d0e 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -65,7 +65,6 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode,
 			 const char *cause, int result, int info);
 
 /* Internal IMA function definitions */
-void ima_iintcache_init(void);
 int ima_init(void);
 void ima_cleanup(void);
 int ima_fs_init(void);
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index fa592ff1ac1c..0d83edcfc402 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -52,9 +52,6 @@ int ima_inode_alloc(struct inode *inode)
 	struct ima_iint_cache *iint = NULL;
 	int rc = 0;
 
-	if (!ima_initialized)
-		return 0;
-
 	iint = kmem_cache_alloc(iint_cache, GFP_NOFS);
 	if (!iint)
 		return -ENOMEM;
@@ -118,8 +115,6 @@ void ima_inode_free(struct inode *inode)
 {
 	struct ima_iint_cache *iint;
 
-	if (!ima_initialized)
-		return;
 	spin_lock(&ima_iint_lock);
 	iint = radix_tree_delete(&ima_iint_store, (unsigned long)inode);
 	spin_unlock(&ima_iint_lock);
@@ -141,9 +136,11 @@ static void init_once(void *foo)
 	kref_set(&iint->refcount, 1);
 }
 
-void __init ima_iintcache_init(void)
+static int __init ima_iintcache_init(void)
 {
 	iint_cache =
 	    kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0,
 			      SLAB_PANIC, init_once);
+	return 0;
 }
+security_initcall(ima_iintcache_init);
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 75aee18f6163..eb1cf6498cc9 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -321,7 +321,6 @@ static int __init init_ima(void)
 {
 	int error;
 
-	ima_iintcache_init();
 	error = ima_init();
 	ima_initialized = 1;
 	return error;

From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 26 Jan 2010 17:02:40 -0500
Subject: [PATCH 306/640] ima: rename ima_path_check to ima_file_check

ima_path_check actually deals with files!  call it ima_file_check instead.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        | 4 ++--
 fs/nfsd/vfs.c                     | 2 +-
 include/linux/ima.h               | 4 ++--
 security/integrity/ima/ima_main.c | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index cd77b6375efd..d62fdc875f22 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,7 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(filp, acc_mode);
+			error = ima_file_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1796,7 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(filp, acc_mode);
+		error = ima_file_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 32477e3a645c..97d79eff6b7f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,7 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(*filp, access);
+	host_err = ima_file_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index aa55a8f1f5b9..975837e7d6c0 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct file *file, int mask);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct file *file, int mask)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index eb1cf6498cc9..b76e1f03ea2b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -14,7 +14,7 @@
  *
  * File: ima_main.c
  *	implements the IMA hooks: ima_bprm_check, ima_file_mmap,
- *	and ima_path_check.
+ *	and ima_file_check.
  */
 #include <linux/module.h>
 #include <linux/file.h>
@@ -306,7 +306,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct file *file, int mask)
+int ima_file_check(struct file *file, int mask)
 {
 	int rc;
 
@@ -315,7 +315,7 @@ int ima_path_check(struct file *file, int mask)
 				 PATH_CHECK);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ima_path_check);
+EXPORT_SYMBOL_GPL(ima_file_check);
 
 static int __init init_ima(void)
 {

From 1e93d0052d9a6b3d0b382eedceb18b519d603baf Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 26 Jan 2010 17:02:41 -0500
Subject: [PATCH 307/640] ima: rename PATH_CHECK to FILE_CHECK

With the movement of the ima hooks functions were renamed from *path* to
*file* since they always deal with struct file.  This patch renames some of
the ima internal flags to make them consistent with the rest of the code.

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/ABI/testing/ima_policy | 12 ++++++------
 security/integrity/ima/ima.h         |  2 +-
 security/integrity/ima/ima_api.c     |  4 ++--
 security/integrity/ima/ima_main.c    |  4 ++--
 security/integrity/ima/ima_policy.c  |  9 ++++++---
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 6434f0df012e..6cd6daefaaed 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -20,7 +20,7 @@ Description:
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
 
-		base: 	func:= [BPRM_CHECK][FILE_MMAP][INODE_PERMISSION]
+		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK]
 			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
 			fsmagic:= hex value
 			uid:= decimal value
@@ -40,11 +40,11 @@ Description:
 
 			measure func=BPRM_CHECK
 			measure func=FILE_MMAP mask=MAY_EXEC
-			measure func=INODE_PERM mask=MAY_READ uid=0
+			measure func=FILE_CHECK mask=MAY_READ uid=0
 
 		The default policy measures all executables in bprm_check,
 		all files mmapped executable in file_mmap, and all files
-		open for read by root in inode_permission.
+		open for read by root in do_filp_open.
 
 		Examples of LSM specific definitions:
 
@@ -54,8 +54,8 @@ Description:
 
 			dont_measure obj_type=var_log_t
 			dont_measure obj_type=auditd_log_t
-			measure subj_user=system_u func=INODE_PERM mask=MAY_READ
-			measure subj_role=system_r func=INODE_PERM mask=MAY_READ
+			measure subj_user=system_u func=FILE_CHECK mask=MAY_READ
+			measure subj_role=system_r func=FILE_CHECK mask=MAY_READ
 
 		Smack:
-			measure subj_user=_ func=INODE_PERM mask=MAY_READ
+			measure subj_user=_ func=FILE_CHECK mask=MAY_READ
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index aa25a7eb2d0e..47fb65d1fcbd 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -130,7 +130,7 @@ void iint_free(struct kref *kref);
 void iint_rcu_free(struct rcu_head *rcu);
 
 /* IMA policy related functions */
-enum ima_hooks { PATH_CHECK = 1, FILE_MMAP, BPRM_CHECK };
+enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK };
 
 int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask);
 void ima_init_policy(void);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 3cd58b60afd2..2a5e0bcf3887 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -95,12 +95,12 @@ err_out:
  * ima_must_measure - measure decision based on policy.
  * @inode: pointer to inode to measure
  * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXECUTE)
- * @function: calling function (PATH_CHECK, BPRM_CHECK, FILE_MMAP)
+ * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP)
  *
  * The policy is defined in terms of keypairs:
  * 		subj=, obj=, type=, func=, mask=, fsmagic=
  *	subj,obj, and type: are LSM specific.
- * 	func: PATH_CHECK | BPRM_CHECK | FILE_MMAP
+ * 	func: FILE_CHECK | BPRM_CHECK | FILE_MMAP
  * 	mask: contains the permission mask
  *	fsmagic: hex value
  *
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index b76e1f03ea2b..294b005d6520 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -153,7 +153,7 @@ void ima_counts_get(struct file *file)
 	if (!iint)
 		return;
 	mutex_lock(&iint->mutex);
-	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
+	rc = ima_must_measure(iint, inode, MAY_READ, FILE_CHECK);
 	if (rc < 0)
 		goto out;
 
@@ -312,7 +312,7 @@ int ima_file_check(struct file *file, int mask)
 
 	rc = process_measurement(file, file->f_dentry->d_name.name,
 				 mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
-				 PATH_CHECK);
+				 FILE_CHECK);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index e1278399b345..4759d0f99335 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -67,7 +67,7 @@ static struct ima_measure_rule_entry default_rules[] = {
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC,
 	 .flags = IMA_FUNC | IMA_MASK},
-	{.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0,
+	{.action = MEASURE,.func = FILE_CHECK,.mask = MAY_READ,.uid = 0,
 	 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
 };
 
@@ -282,8 +282,11 @@ static int ima_parse_rule(char *rule, struct ima_measure_rule_entry *entry)
 			break;
 		case Opt_func:
 			audit_log_format(ab, "func=%s ", args[0].from);
-			if (strcmp(args[0].from, "PATH_CHECK") == 0)
-				entry->func = PATH_CHECK;
+			if (strcmp(args[0].from, "FILE_CHECK") == 0)
+				entry->func = FILE_CHECK;
+			/* PATH_CHECK is for backwards compat */
+			else if (strcmp(args[0].from, "PATH_CHECK") == 0)
+				entry->func = FILE_CHECK;
 			else if (strcmp(args[0].from, "FILE_MMAP") == 0)
 				entry->func = FILE_MMAP;
 			else if (strcmp(args[0].from, "BPRM_CHECK") == 0)

From 89068c576bf324ef6fbd50dfc745148f7def202c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Feb 2010 03:07:29 -0500
Subject: [PATCH 308/640] Take ima_file_free() to proper place.

Hooks: Just Say No.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c     | 1 +
 security/security.c | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index 69652c5bd5f0..b98404b54383 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -253,6 +253,7 @@ void __fput(struct file *file)
 	if (file->f_op && file->f_op->release)
 		file->f_op->release(inode, file);
 	security_file_free(file);
+	ima_file_free(file);
 	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
 		cdev_put(inode->i_cdev);
 	fops_put(file->f_op);
diff --git a/security/security.c b/security/security.c
index 24e060be9fa5..122b748d0f4c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -666,8 +666,6 @@ int security_file_alloc(struct file *file)
 void security_file_free(struct file *file)
 {
 	security_ops->file_free_security(file);
-	if (file->f_dentry)
-		ima_file_free(file);
 }
 
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)

From ee11b90b12eb1ec25e1044bac861e90bfd19ec9e Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Date: Sun, 7 Feb 2010 11:46:15 -0200
Subject: [PATCH 309/640] perf top: Fix annotate for userspace

First, for programs and prelinked libraries, annotate code was
fooled by objdump output IPs (src->eip in the code) being
wrongly converted to absolute IPs. In such case there were no
conversion needed, but in

   src->eip = strtoull(src->line, NULL, 16);
   src->eip = map->unmap_ip(map, src->eip); // = eip + map->start - map->pgoff

we were reading absolute address from objdump (e.g. 8048604) and
then almost doubling it, because eip & map->start are
approximately close for small programs.

Needless to say, that later, in record_precise_ip() there was no
matching with real runtime IPs.

And second, like with `perf annotate` the problem with
non-prelinked *.so was that we were doing rip -> objdump address
conversion wrong.

Also, because unlike `perf annotate`, `perf top` code does
annotation based on absolute IPs for performance reasons(*), new
helper for mapping objdump addresse to IP is introduced.

(*) we get samples info in absolute IPs, and since we do lots of
    hit-testing on absolute IPs at runtime in record_precise_ip(), it's
    better to convert objdump addresses to IPs once and do no conversion
    at runtime.

I also had to fix how objdump output is parsed (with hardcoded
8/16 characters format, which was inappropriate for ET_DYN dsos
with small addresses like '4ac')

Also note, that not all objdump output lines has associtated
IPs, e.g. look at source lines here:

    000004ac <my_strlen>:
    extern "C"
    int my_strlen(const char *s)
     4ac:   55                      push   %ebp
     4ad:   89 e5                   mov    %esp,%ebp
     4af:   83 ec 10                sub    $0x10,%esp
    {
        int len = 0;
     4b2:   c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%ebp)
     4b9:   eb 08                   jmp    4c3 <my_strlen+0x17>

        while (*s) {
            ++len;
     4bb:   83 45 fc 01             addl   $0x1,-0x4(%ebp)
            ++s;
     4bf:   83 45 08 01             addl   $0x1,0x8(%ebp)

So we mark them with eip=0, and ignore such lines in annotate
lookup code.

Signed-off-by: Kirill Smelkov <kirr@landau.phys.spbu.ru>
[ Note: one hunk of this patch was applied by Mike in 57d8188 ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <1265550376-12665-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 18 +++++++++---------
 tools/perf/util/map.c    |  8 ++++++++
 tools/perf/util/map.h    |  4 ++--
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e4156bc4566d..befa57e2284d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -216,7 +216,7 @@ static void parse_source(struct sym_entry *syme)
 	while (!feof(file)) {
 		struct source_line *src;
 		size_t dummy = 0;
-		char *c;
+		char *c, *sep;
 
 		src = malloc(sizeof(struct source_line));
 		assert(src != NULL);
@@ -235,14 +235,11 @@ static void parse_source(struct sym_entry *syme)
 		*source->lines_tail = src;
 		source->lines_tail = &src->next;
 
-		if (strlen(src->line)>8 && src->line[8] == ':') {
-			src->eip = strtoull(src->line, NULL, 16);
-			src->eip = map->unmap_ip(map, src->eip);
-		}
-		if (strlen(src->line)>8 && src->line[16] == ':') {
-			src->eip = strtoull(src->line, NULL, 16);
-			src->eip = map->unmap_ip(map, src->eip);
-		}
+		src->eip = strtoull(src->line, &sep, 16);
+		if (*sep == ':')
+			src->eip = map__objdump_2ip(map, src->eip);
+		else /* this line has no ip info (e.g. source line) */
+			src->eip = 0;
 	}
 	pclose(file);
 out_assign:
@@ -277,6 +274,9 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
 		goto out_unlock;
 
 	for (line = syme->src->lines; line; line = line->next) {
+		/* skip lines without IP info */
+		if (line->eip == 0)
+			continue;
 		if (line->eip == ip) {
 			line->count[counter]++;
 			break;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index af5805f51314..138e3cb2b727 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -222,3 +222,11 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
 			rip;
 	return addr;
 }
+
+u64 map__objdump_2ip(struct map *map, u64 addr)
+{
+	u64 ip = map->dso->adjust_symbols ?
+			addr :
+			map->unmap_ip(map, addr);	/* RIP -> IP */
+	return ip;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 9cee9c788dbf..86f77cb1d060 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -61,9 +61,9 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip)
 }
 
 
-/* rip -> addr suitable for passing to `objdump --start-address=` */
+/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
 u64 map__rip_2objdump(struct map *map, u64 rip);
-
+u64 map__objdump_2ip(struct map *map, u64 addr);
 
 struct symbol;
 struct mmap_event;

From 5f485364365f00853e5249cb3ae31f876936b552 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 7 Feb 2010 11:46:16 -0200
Subject: [PATCH 310/640] perf top: Use address pattern in lookup_sym_source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because we may have aliases, like __GI___strcoll_l in
/lib64/libc-2.10.2.so that appears in objdump as:

$ objdump --start-address=0x0000003715a86420 \
           --stop-address=0x0000003715a872dc -dS /lib64/libc-2.10.2.so

0000003715a86420 <__strcoll_l>:
  3715a86420:	55                   	push   %rbp
  3715a86421:	48 89 e5             	mov    %rsp,%rbp
  3715a86424:	41 57                	push   %r15
[root@doppio linux-2.6-tip]#

So look for the address exactly at the start of the line instead
so that annotation can work for in these cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Kirill Smelkov <kirr@landau.phys.spbu.ru>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1265550376-12665-2-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index befa57e2284d..c72ab50d65ca 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -202,10 +202,9 @@ static void parse_source(struct sym_entry *syme)
 	len = sym->end - sym->start;
 
 	sprintf(command,
-		"objdump --start-address=0x%016Lx "
-			 "--stop-address=0x%016Lx -dS %s",
-		map__rip_2objdump(map, sym->start),
-		map__rip_2objdump(map, sym->end), path);
+		"objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s",
+		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
+		BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
 
 	file = popen(command, "r");
 	if (!file)
@@ -292,13 +291,15 @@ static void lookup_sym_source(struct sym_entry *syme)
 {
 	struct symbol *symbol = sym_entry__symbol(syme);
 	struct source_line *line;
-	char pattern[PATH_MAX];
+	const size_t pattern_len = BITS_PER_LONG / 4 + 2;
+	char pattern[pattern_len + 1];
 
-	sprintf(pattern, "<%s>:", symbol->name);
+	sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4,
+		map__rip_2objdump(syme->map, symbol->start));
 
 	pthread_mutex_lock(&syme->src->lock);
 	for (line = syme->src->lines; line; line = line->next) {
-		if (strstr(line->line, pattern)) {
+		if (memcmp(line->line, pattern, pattern_len) == 0) {
 			syme->src->source = line;
 			break;
 		}

From 076dc4a65a6d99a16979e2c7917e669fb8c91ee5 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Fri, 5 Feb 2010 12:16:47 -0500
Subject: [PATCH 311/640] x86/alternatives: Fix build warning

Fixes these warnings:

 arch/x86/kernel/alternative.c: In function 'alternatives_text_reserved':
 arch/x86/kernel/alternative.c:402: warning: comparison of distinct pointer types lacks a cast
 arch/x86/kernel/alternative.c:402: warning: comparison of distinct pointer types lacks a cast
 arch/x86/kernel/alternative.c:405: warning: comparison of distinct pointer types lacks a cast
 arch/x86/kernel/alternative.c:405: warning: comparison of distinct pointer types lacks a cast

Caused by:

  2cfa197: ftrace/alternatives: Introducing *_text_reserved functions

Changes in v2:
  - Use local variables to compare, instead of type casts.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
LKML-Reference: <20100205171647.15750.37221.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/alternative.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3c13284ff86d..e63b80e5861c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -395,12 +395,14 @@ int alternatives_text_reserved(void *start, void *end)
 {
 	struct smp_alt_module *mod;
 	u8 **ptr;
+	u8 *text_start = start;
+	u8 *text_end = end;
 
 	list_for_each_entry(mod, &smp_alt_modules, next) {
-		if (mod->text > end || mod->text_end < start)
+		if (mod->text > text_end || mod->text_end < text_start)
 			continue;
 		for (ptr = mod->locks; ptr < mod->locks_end; ptr++)
-			if (start <= *ptr && end >= *ptr)
+			if (text_start <= *ptr && text_end >= *ptr)
 				return 1;
 	}
 

From 80e1e823989ec44d8e35bdfddadbddcffec90424 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 7 Feb 2010 10:11:23 -0800
Subject: [PATCH 312/640] Fix race in tty_fasync() properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 703625118069 ("tty: fix race in tty_fasync") and
commit b04da8bfdfbb ("fnctl: f_modown should call write_lock_irqsave/
restore") that tried to fix up some of the fallout but was incomplete.

It turns out that we really cannot hold 'tty->ctrl_lock' over calling
__f_setown, because not only did that cause problems with interrupt
disables (which the second commit fixed), it also causes a potential
ABBA deadlock due to lock ordering.

Thanks to Tetsuo Handa for following up on the issue, and running
lockdep to show the problem.  It goes roughly like this:

 - f_getown gets filp->f_owner.lock for reading without interrupts
   disabled, so an interrupt that happens while that lock is held can
   cause a lockdep chain from f_owner.lock -> sighand->siglock.

 - at the same time, the tty->ctrl_lock -> f_owner.lock chain that
   commit 703625118069 introduced, together with the pre-existing
   sighand->siglock -> tty->ctrl_lock chain means that we have a lock
   dependency the other way too.

So instead of extending tty->ctrl_lock over the whole __f_setown() call,
we now just take a reference to the 'pid' structure while holding the
lock, and then release it after having done the __f_setown.  That still
guarantees that 'struct pid' won't go away from under us, which is all
we really ever needed.

Reported-and-tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Américo Wang <xiyou.wangcong@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c | 4 +++-
 fs/fcntl.c            | 6 ++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index c6f3b48be9dd..dcb9083ecde0 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1951,8 +1951,10 @@ static int tty_fasync(int fd, struct file *filp, int on)
 			pid = task_pid(current);
 			type = PIDTYPE_PID;
 		}
-		retval = __f_setown(filp, pid, type, 0);
+		get_pid(pid);
 		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+		retval = __f_setown(filp, pid, type, 0);
+		put_pid(pid);
 		if (retval)
 			goto out;
 	} else {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 5ef953e6f908..97e01dc0d95f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -199,9 +199,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
                      int force)
 {
-	unsigned long flags;
-
-	write_lock_irqsave(&filp->f_owner.lock, flags);
+	write_lock_irq(&filp->f_owner.lock);
 	if (force || !filp->f_owner.pid) {
 		put_pid(filp->f_owner.pid);
 		filp->f_owner.pid = get_pid(pid);
@@ -213,7 +211,7 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 			filp->f_owner.euid = cred->euid;
 		}
 	}
-	write_unlock_irqrestore(&filp->f_owner.lock, flags);
+	write_unlock_irq(&filp->f_owner.lock);
 }
 
 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,

From 142698282ceb6811ad3482c218b7292037cb67ff Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Wed, 27 Jan 2010 20:05:20 +0000
Subject: [PATCH 313/640] sh: Correct the offset of the return address in
 ret_from_exception

The address that ret_from_exception and ret_from_irq will return to is
found in the stack slot for SPC, not PR. This error was causing the
DWARF unwinder to pick up the wrong return address on the stack and then
unwind using the unwind tables for the wrong function.

While I'm here I might as well add CFI annotations for the other
registers since they could be useful when unwinding.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/entry-common.S | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index f0abd58c3a69..2b15ae60c3a0 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -70,8 +70,14 @@ ret_from_exception:
 	CFI_STARTPROC simple
 	CFI_DEF_CFA r14, 0
 	CFI_REL_OFFSET 17, 64
-	CFI_REL_OFFSET 15, 0
+	CFI_REL_OFFSET 15, 60
 	CFI_REL_OFFSET 14, 56
+	CFI_REL_OFFSET 13, 52
+	CFI_REL_OFFSET 12, 48
+	CFI_REL_OFFSET 11, 44
+	CFI_REL_OFFSET 10, 40
+	CFI_REL_OFFSET 9, 36
+	CFI_REL_OFFSET 8, 32
 	preempt_stop()
 ENTRY(ret_from_irq)
 	!

From 1dca56f13899b9e256f56198026019835aaf9a3a Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Wed, 27 Jan 2010 20:44:59 +0000
Subject: [PATCH 314/640] sh: Setup frame pointer in handle_exception path

In order to allow the DWARF unwinder to unwind through exceptions we
need to setup the frame pointer register (r14).

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh3/entry.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 3f7e2a22c7c2..29021bc67493 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -365,6 +365,8 @@ handle_exception:
 	 mov.l	@k2, k2		! read out vector and keep in k2
 
 handle_exception_special:
+	setup_frame_reg
+
 	! Setup return address and jump to exception handler
 	mov.l	7f, r9		! fetch return address
 	stc	r2_bank, r0	! k2 (vector)

From 944a3438615da65f11e2559840404a2cac5f65ea Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Sat, 30 Jan 2010 17:36:20 +0000
Subject: [PATCH 315/640] sh: Don't continue unwinding across interrupts

Unfortunately, due to poor DWARF info in current toolchains, unwinding
through interrutps cannot be done reliably. The problem is that the
DWARF info for function epilogues is wrong.

Take this standard epilogue sequence,

80003cc4:       e3 6f           mov     r14,r15
80003cc6:       26 4f           lds.l   @r15+,pr
80003cc8:       f6 6e           mov.l   @r15+,r14
						<---- interrupt here
80003cca:       f6 6b           mov.l   @r15+,r11
80003ccc:       f6 6a           mov.l   @r15+,r10
80003cce:       f6 69           mov.l   @r15+,r9
80003cd0:       0b 00           rts

If we take an interrupt at the highlighted point, the DWARF info will
bogusly claim that the return address can be found at some offset from
the frame pointer, even though the frame pointer was just restored. The
worst part is if the unwinder finds a text address at the bogus stack
address - unwinding will continue, for a bit, until it finally comes
across an unexpected address on the stack and blows up.

The only solution is to stop unwinding once we've calculated the
function that was executing when the interrupt occurred. This PC can be
easily calculated from pt_regs->pc.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/dwarf.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index 88d28ec3780a..e51168064e56 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -540,6 +540,8 @@ void dwarf_free_frame(struct dwarf_frame *frame)
 	mempool_free(frame, dwarf_frame_pool);
 }
 
+extern void ret_from_irq(void);
+
 /**
  *	dwarf_unwind_stack - unwind the stack
  *
@@ -678,6 +680,24 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	addr = frame->cfa + reg->addr;
 	frame->return_addr = __raw_readl(addr);
 
+	/*
+	 * Ah, the joys of unwinding through interrupts.
+	 *
+	 * Interrupts are tricky - the DWARF info needs to be _really_
+	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
+	 * info. For example, I've seen interrupts occur in epilogues
+	 * just after the frame pointer (r14) had been restored. The
+	 * problem was that the DWARF info claimed that the CFA could be
+	 * reached by using the value of the frame pointer before it was
+	 * restored.
+	 *
+	 * So until the compiler can be trusted to produce reliable
+	 * DWARF info when it really matters, let's stop unwinding once
+	 * we've calculated the function that was interrupted.
+	 */
+	if (prev && prev->pc == (unsigned long)ret_from_irq)
+		frame->return_addr = 0;
+
 	return frame;
 
 bail:

From 1af0b2fc676009d9b5b71a82ea6a3c2b20b7ea56 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@console-pimps.org>
Date: Sat, 30 Jan 2010 17:37:25 +0000
Subject: [PATCH 316/640] sh: Remove superfluous setup_frame_reg call

There's no need to setup the frame pointer again in
call_handle_tlbmiss. The frame pointer will already have been setup in
handle_interrupt.

Signed-off-by: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh3/entry.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 29021bc67493..f6a389c996cb 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -132,7 +132,6 @@ ENTRY(tlb_protection_violation_store)
 	 mov	#1, r5
 
 call_handle_tlbmiss:
-	setup_frame_reg
 	mov.l	1f, r0
 	mov	r5, r8
 	mov.l	@r0, r6

From 36350e00696df148507246c817cf6f86329479fd Mon Sep 17 00:00:00 2001
From: Mark Nelson <markn@au1.ibm.com>
Date: Sun, 7 Feb 2010 16:45:12 +0000
Subject: [PATCH 317/640] powerpc/pseries: Fix kexec regression caused by CPPR
 tracking

The code to track the CPPR values added by commit
49bd3647134ea47420067aea8d1401e722bf2aac ("powerpc/pseries: Track previous
CPPR values to correctly EOI interrupts") broke kexec on pseries because
the kexec code in xics.c calls xics_set_cpu_priority() before the IPI has
been EOI'ed. This wasn't a problem previously but it now triggers a BUG_ON
in xics_set_cpu_priority() because os_cppr->index isn't 0.

Fix this problem by setting the index on the CPPR stack to 0 before calling
xics_set_cpu_priority() in xics_teardown_cpu().

Also make it clear that we only want to set the priority when there's just
one CPPR value in the stack, and enforce it by updating the value of
os_cppr->stack[0] rather than os_cppr->stack[os_cppr->index].

While we're at it change the BUG_ON to a WARN_ON.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Mark Nelson <markn@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/xics.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 1ee66db003be..f5f79196721c 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -784,9 +784,13 @@ static void xics_set_cpu_priority(unsigned char cppr)
 {
 	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
 
-	BUG_ON(os_cppr->index != 0);
+	/*
+	 * we only really want to set the priority when there's
+	 * just one cppr value on the stack
+	 */
+	WARN_ON(os_cppr->index != 0);
 
-	os_cppr->stack[os_cppr->index] = cppr;
+	os_cppr->stack[0] = cppr;
 
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		lpar_cppr_info(cppr);
@@ -821,8 +825,14 @@ void xics_setup_cpu(void)
 
 void xics_teardown_cpu(void)
 {
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
 	int cpu = smp_processor_id();
 
+	/*
+	 * we have to reset the cppr index to 0 because we're
+	 * not going to return from the IPI
+	 */
+	os_cppr->index = 0;
 	xics_set_cpu_priority(0);
 
 	/* Clear any pending IPI request */

From 1ca137cdcd4e11af03dbe073d48a470b833a456d Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 5 Feb 2010 19:02:24 +1000
Subject: [PATCH 318/640] drm/radeon/kms: change Kconfig text to reflect the
 new option.

Ingo pointed out that we really don't give the user enough warning to make
a decision here. So revise the Kconfig text with a better warning.

Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/Kconfig | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
index 5982321be4d5..1c02d23f6fcc 100644
--- a/drivers/gpu/drm/radeon/Kconfig
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -1,10 +1,14 @@
 config DRM_RADEON_KMS
-	bool "Enable modesetting on radeon by default"
+	bool "Enable modesetting on radeon by default - NEW DRIVER"
 	depends on DRM_RADEON
 	help
-	  Choose this option if you want kernel modesetting enabled by default,
-	  and you have a new enough userspace to support this. Running old
-	  userspaces with this enabled will cause pain.
+	  Choose this option if you want kernel modesetting enabled by default.
+
+	  This is a completely new driver. It's only part of the existing drm
+	  for compatibility reasons. It requires an entirely different graphics
+	  stack above it and works very differently from the old drm stack.
+	  i.e. don't enable this unless you know what you are doing it may
+	  cause issues or bugs compared to the previous userspace driver stack.
 
 	  When kernel modesetting is enabled the IOCTL of radeon/drm
 	  driver are considered as invalid and an error message is printed

From a9f0c381973097462d9688dc26fe66f4f020502e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 7 Feb 2010 23:10:04 -0800
Subject: [PATCH 319/640] Input: psmouse - make sure we don't schedule
 reconnects after cleanup

Set state of the device as "initializing" during and after cleanup
to ensure that unsolicited data from the device is not passed on.
We especially want to avoid processing new device announcements
"0xaa 0x00" that can come up before we perform reconnect operation.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/psmouse-base.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 9774bdfaa482..d8c0c8d6992c 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1141,7 +1141,14 @@ static void psmouse_cleanup(struct serio *serio)
 		psmouse_deactivate(parent);
 	}
 
-	psmouse_deactivate(psmouse);
+	psmouse_set_state(psmouse, PSMOUSE_INITIALIZING);
+
+	/*
+	 * Disable stream mode so cleanup routine can proceed undisturbed.
+	 */
+	if (ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_DISABLE))
+		printk(KERN_WARNING "psmouse.c: Failed to disable mouse on %s\n",
+			psmouse->ps2dev.serio->phys);
 
 	if (psmouse->cleanup)
 		psmouse->cleanup(psmouse);

From a6013411118a6c8c34f1bd8b047b36fdf9711590 Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Mon, 1 Feb 2010 12:15:58 +0100
Subject: [PATCH 320/640] microblaze: Invalidate dcache before enabling it

We found that on write-trough kernel is necessary to do that invalidation.
One WB is possible to use invalidation too.

Signed-off-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 5372b24ad049..bb8c4b9ccb80 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -54,6 +54,7 @@ void __init setup_arch(char **cmdline_p)
 
 	microblaze_cache_init();
 
+	invalidate_dcache();
 	enable_dcache();
 
 	invalidate_icache();

From adefdceef4c1cefee2678724cd57824c8ca80091 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 1 Feb 2010 10:35:22 -0300
Subject: [PATCH 321/640] V4L/DVB: Fix the risk of an oops at dvb_dmx_release

dvb_dmx_init tries to allocate virtual memory for 2 pointers: filter and feed.

If the second vmalloc fails, filter is freed, but the pointer keeps pointing
to the old place. Later, when dvb_dmx_release() is called, it will try to
free an already freed memory, causing an OOPS.

Reviewed-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_demux.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c
index b78cfb7d1897..a78408e76e75 100644
--- a/drivers/media/dvb/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb/dvb-core/dvb_demux.c
@@ -1246,6 +1246,7 @@ int dvb_dmx_init(struct dvb_demux *dvbdemux)
 	dvbdemux->feed = vmalloc(dvbdemux->feednum * sizeof(struct dvb_demux_feed));
 	if (!dvbdemux->feed) {
 		vfree(dvbdemux->filter);
+		dvbdemux->filter = NULL;
 		return -ENOMEM;
 	}
 	for (i = 0; i < dvbdemux->filternum; i++) {

From bc081cc8693800ebb118cc2cc6a859dd0b45921b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 1 Feb 2010 11:50:42 -0300
Subject: [PATCH 322/640] V4L/DVB: dvb_demux: Don't use vmalloc at
 dvb_dmx_swfilter_packet

As dvb_dmx_swfilter_packet() is protected by a spinlock, it shouldn't sleep.
However, vmalloc() may call sleep. So, move the initialization of
dvb_demux::cnt_storage field to a better place.

Reviewed-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_demux.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c
index a78408e76e75..67f189b7aa1f 100644
--- a/drivers/media/dvb/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb/dvb-core/dvb_demux.c
@@ -426,16 +426,7 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
 		};
 	};
 
-	if (dvb_demux_tscheck) {
-		if (!demux->cnt_storage)
-			demux->cnt_storage = vmalloc(MAX_PID + 1);
-
-		if (!demux->cnt_storage) {
-			printk(KERN_WARNING "Couldn't allocate memory for TS/TEI check. Disabling it\n");
-			dvb_demux_tscheck = 0;
-			goto no_dvb_demux_tscheck;
-		}
-
+	if (demux->cnt_storage) {
 		/* check pkt counter */
 		if (pid < MAX_PID) {
 			if (buf[1] & 0x80)
@@ -454,7 +445,6 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
 		};
 		/* end check */
 	};
-no_dvb_demux_tscheck:
 
 	list_for_each_entry(feed, &demux->feed_list, list_head) {
 		if ((feed->pid != pid) && (feed->pid != 0x2000))
@@ -1258,6 +1248,13 @@ int dvb_dmx_init(struct dvb_demux *dvbdemux)
 		dvbdemux->feed[i].index = i;
 	}
 
+	if (dvb_demux_tscheck) {
+		dvbdemux->cnt_storage = vmalloc(MAX_PID + 1);
+
+		if (!dvbdemux->cnt_storage)
+			printk(KERN_WARNING "Couldn't allocate memory for TS/TEI check. Disabling it\n");
+	}
+
 	INIT_LIST_HEAD(&dvbdemux->frontend_list);
 
 	for (i = 0; i < DMX_TS_PES_OTHER; i++) {

From 691c9ae099b9bcb5c27125af00a4a90120977458 Mon Sep 17 00:00:00 2001
From: Francesco Lavra <francescolavra@interfree.it>
Date: Sun, 7 Feb 2010 09:49:58 -0300
Subject: [PATCH 323/640] V4L/DVB: dvb-core: fix initialization of feeds list
 in demux filter

A DVB demultiplexer device can be used to set up either a PES filter or
a section filter. In the former case, the ts field of the feed union of
struct dmxdev_filter is used, in the latter case the sec field of the
same union is used.
The ts field is a struct list_head, and is currently initialized in the
open() method of the demux device. When for a given demuxer a section
filter is set up, the sec field is played with, thus if a PES filter
needs to be set up after that the ts field will be corrupted, causing a
kernel oops.
This fix moves the list head initialization to
dvb_dmxdev_pes_filter_set(), so that the ts field is properly
initialized every time a PES filter is set up.

Signed-off-by: Francesco Lavra <francescolavra@interfree.it>
Cc: stable <stable@kernel.org>
Reviewed-by: Andy Walls <awalls@radix.net>
Tested-by: hermann pitton <hermann-pitton@arcor.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dmxdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index c37790ad92d0..9ddc57909d49 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -761,7 +761,6 @@ static int dvb_demux_open(struct inode *inode, struct file *file)
 	dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192);
 	dmxdevfilter->type = DMXDEV_TYPE_NONE;
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED);
-	INIT_LIST_HEAD(&dmxdevfilter->feed.ts);
 	init_timer(&dmxdevfilter->timer);
 
 	dvbdev->users++;
@@ -887,6 +886,7 @@ static int dvb_dmxdev_pes_filter_set(struct dmxdev *dmxdev,
 	dmxdevfilter->type = DMXDEV_TYPE_PES;
 	memcpy(&dmxdevfilter->params, params,
 	       sizeof(struct dmx_pes_filter_params));
+	INIT_LIST_HEAD(&dmxdevfilter->feed.ts);
 
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);
 

From f7e7ee36757f68778700cde1aaed89e1d23e59fd Mon Sep 17 00:00:00 2001
From: "austin_zhang@linux.intel.com" <austin_zhang@linux.intel.com>
Date: Fri, 5 Feb 2010 09:02:42 -0800
Subject: [PATCH 324/640] perf record: Fix existing process callgraph symbol

When 'perf record -g' a existing process, even with debuginfo
packages, still cannnot get symbol from 'perf report'.

try:

 perf record -g -p `pidof xxx` -f
 perf report

    68.26%    :1181           b74870f2  [.] 0x000000b74870f2
              |
              |--32.09%-- 0xb73b5b44
              |          0xb7487102
              |          0xb748a4e2
              |          0xb748633d
              |          0xb73b41cd
              |          0xb73b4467
              |          0xb747d531

The reason is: for existing process, in __cmd_record(),
the pid is 0 rather than the existing process id.

Signed-off-by: Austin Zhang <austin_zhang@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4710.10.255.24.35.1265389362.squirrel@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3ad599b12c91..771533ced6a8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -570,7 +570,7 @@ static int __cmd_record(int argc, const char **argv)
 	}
 
 	if (!system_wide && profile_cpu == -1)
-		event__synthesize_thread(pid, process_synthesized_event,
+		event__synthesize_thread(target_pid, process_synthesized_event,
 					 session);
 	else
 		event__synthesize_threads(process_synthesized_event, session);

From ccd4bb1beb3316de4611de24d223ad761b5a7e95 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Mon, 8 Feb 2010 17:39:58 +0000
Subject: [PATCH 325/640] [CIFS] Don't cache timestamps on utimes due to coarse
 granularity

force revalidate of the file when any of the timestamps are set since
some filesytem types do not have finer granularity timestamps and
we can not always detect which file systems round timestamps down
to determine whether we can cache the mtime on setattr
samba bugzilla 3775

Acked-by: Shirish Pargaonkar <sharishp@us.ibm.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cf18ee765590..e3fda978f481 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1762,8 +1762,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 	}
 
-	if (!rc)
+	if (!rc) {
 		rc = inode_setattr(inode, attrs);
+
+		/* force revalidate when any of these times are set since some
+		   of the fs types (eg ext3, fat) do not have fine enough
+		   time granularity to match protocol, and we do not have a
+		   a way (yet) to query the server fs's time granularity (and
+		   whether it rounds times down).
+		*/
+		if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
+			cifsInode->time = 0;
+	}
 out:
 	kfree(args);
 	kfree(full_path);

From 05507fa2ac8d5e503bcf33ee43329449027d9060 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 5 Feb 2010 13:30:36 -0500
Subject: [PATCH 326/640] cifs: fix dentry hash calculation for
 case-insensitive mounts

case-insensitive mounts shouldn't use full_name_hash(). Make sure we
use the parent dentry's d_hash routine when one is set.

Reported-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/readdir.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f5618f8cc462..c343b14ba2d3 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -77,6 +77,11 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
 
 	cFYI(1, ("For %s", name->name));
 
+	if (parent->d_op && parent->d_op->d_hash)
+		parent->d_op->d_hash(parent, name);
+	else
+		name->hash = full_name_hash(name->name, name->len);
+
 	dentry = d_lookup(parent, name);
 	if (dentry) {
 		/* FIXME: check for inode number changes? */
@@ -671,8 +676,6 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
 		pqst->name = filename;
 		pqst->len = len;
 	}
-	pqst->hash = full_name_hash(pqst->name, pqst->len);
-/*	cFYI(1, ("filldir on %s",pqst->name));  */
 	return rc;
 }
 

From 9edd7ca0a3e3999c260642c92fa008892d82ca6e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 11:16:26 -0800
Subject: [PATCH 327/640] netfilter: nf_conntrack: fix memory corruption with
 multiple namespaces

As discovered by Jon Masters <jonathan@jonmasters.org>, the "untracked"
conntrack, which is located in the data section, might be accidentally
freed when a new namespace is instantiated while the untracked conntrack
is attached to a skb because the reference count it re-initialized.

The best fix would be to use a seperate untracked conntrack per
namespace since it includes a namespace pointer. Unfortunately this is
not possible without larger changes since the namespace is not easily
available everywhere we need it. For now move the untracked conntrack
initialization to the init_net setup function to make sure the reference
count is not re-initialized and handle cleanup in the init_net cleanup
function to make sure namespaces can exit properly while the untracked
conntrack is in use in other namespaces.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0e98c3282d42..37e2b88313f2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1113,6 +1113,10 @@ static void nf_ct_release_dying_list(struct net *net)
 
 static void nf_conntrack_cleanup_init_net(void)
 {
+	/* wait until all references to nf_conntrack_untracked are dropped */
+	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+		schedule();
+
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
 	kmem_cache_destroy(nf_conntrack_cachep);
@@ -1127,9 +1131,6 @@ static void nf_conntrack_cleanup_net(struct net *net)
 		schedule();
 		goto i_see_dead_people;
 	}
-	/* wait until all references to nf_conntrack_untracked are dropped */
-	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
-		schedule();
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
@@ -1288,6 +1289,14 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 		goto err_helper;
 
+	/* Set up fake conntrack: to never be deleted, not in any hashes */
+#ifdef CONFIG_NET_NS
+	nf_conntrack_untracked.ct_net = &init_net;
+#endif
+	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+	/*  - and look it like as a confirmed connection */
+	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+
 	return 0;
 
 err_helper:
@@ -1333,15 +1342,6 @@ static int nf_conntrack_init_net(struct net *net)
 	if (ret < 0)
 		goto err_ecache;
 
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
-	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
 	return 0;
 
 err_ecache:

From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Feb 2010 11:16:56 -0800
Subject: [PATCH 328/640] netfilter: nf_conntrack: per netns
 nf_conntrack_cachep

nf_conntrack_cachep is currently shared by all netns instances, but
because of SLAB_DESTROY_BY_RCU special semantics, this is wrong.

If we use a shared slab cache, one object can instantly flight between
one hash table (netns ONE) to another one (netns TWO), and concurrent
reader (doing a lookup in netns ONE, 'finding' an object of netns TWO)
can be fooled without notice, because no RCU grace period has to be
observed between object freeing and its reuse.

We dont have this problem with UDP/TCP slab caches because TCP/UDP
hashtables are global to the machine (and each object has a pointer to
its netns).

If we use per netns conntrack hash tables, we also *must* use per netns
conntrack slab caches, to guarantee an object can not escape from one
namespace to another one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
[Patrick: added unique slab name allocation]
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/conntrack.h     |  2 ++
 net/netfilter/nf_conntrack_core.c | 39 ++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index ba1ba0c5efd1..aed23b6c8478 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_nulls_head	unconfirmed;
@@ -28,5 +29,6 @@ struct netns_ct {
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
+	char			*slabname;
 };
 #endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 37e2b88313f2..9de4bd4c0dd7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	 * Do not use kmem_cache_zalloc(), as this cache uses
 	 * SLAB_DESTROY_BY_RCU.
 	 */
-	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
+	ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&net->ct.count);
@@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	nf_ct_ext_destroy(ct);
 	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
-	kmem_cache_free(nf_conntrack_cachep, ct);
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
-	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
 }
 
@@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void)
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
-	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
-						sizeof(struct nf_conn),
-						0, SLAB_DESTROY_BY_RCU, NULL);
-	if (!nf_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		ret = -ENOMEM;
-		goto err_cache;
-	}
-
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void)
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_cache:
 	return ret;
 }
 
@@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_stat;
 	}
+
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	if (!net->ct.slabname) {
+		ret = -ENOMEM;
+		goto err_slabname;
+	}
+
+	net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
+							sizeof(struct nf_conn), 0,
+							SLAB_DESTROY_BY_RCU, NULL);
+	if (!net->ct.nf_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+		ret = -ENOMEM;
+		goto err_cache;
+	}
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
@@ -1352,6 +1355,10 @@ err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+err_cache:
+	kfree(net->ct.slabname);
+err_slabname:
 	free_percpu(net->ct.stat);
 err_stat:
 	return ret;

From 13ccdfc2af03e09e60791f7d4bc4ccf53398af7c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 8 Feb 2010 11:17:22 -0800
Subject: [PATCH 329/640] netfilter: nf_conntrack: restrict runtime expect
 hashsize modifications

Expectation hashtable size was simply glued to a variable with no code
to rehash expectations, so it was a bug to allow writing to it.
Make "expect_hashsize" readonly.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index fdf5d2a1d9b4..4ad7d1d809af 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -569,7 +569,7 @@ static void exp_proc_remove(struct net *net)
 #endif /* CONFIG_PROC_FS */
 }
 
-module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
+module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
 
 int nf_conntrack_expect_init(struct net *net)
 {

From 14c7dbe043d01a83a30633ab6b109ba2ac61d9f7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 8 Feb 2010 11:17:43 -0800
Subject: [PATCH 330/640] netfilter: xtables: compat out of scope fix

As per C99 6.2.4(2) when temporary table data goes out of scope,
the behaviour is undefined:

	if (compat) {
		struct foo tmp;
		...
		private = &tmp;
	}
	[dereference private]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 4 ++--
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 06632762ba5f..90203e1b9187 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -925,10 +925,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct arpt_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(NFPROTO_ARP);
 			private = &tmp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 572330a552ef..3ce53cf13d5a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct ipt_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(AF_INET);
 			private = &tmp;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 480d7f8c9802..8a7e0f52e177 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1164,10 +1164,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct ip6t_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(AF_INET6);
 			private = &tmp;

From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 11:18:07 -0800
Subject: [PATCH 331/640] netfilter: nf_conntrack: fix hash resizing with
 namespaces

As noticed by Jon Masters <jonathan@jonmasters.org>, the conntrack hash
size is global and not per namespace, but modifiable at runtime through
/sys/module/nf_conntrack/hashsize. Changing the hash size will only
resize the hash in the current namespace however, so other namespaces
will use an invalid hash size. This can cause crashes when enlarging
the hashsize, or false negative lookups when shrinking it.

Move the hash size into the per-namespace data and only use the global
hash size to initialize the per-namespace value when instanciating a
new namespace. Additionally restrict hash resizing to init_net for
now as other namespaces are not handled currently.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/conntrack.h                 |  1 +
 include/net/netns/ipv4.h                      |  1 +
 .../netfilter/nf_conntrack_l3proto_ipv4.c     |  2 +-
 .../nf_conntrack_l3proto_ipv4_compat.c        |  4 +-
 net/ipv4/netfilter/nf_nat_core.c              | 22 ++++----
 net/netfilter/nf_conntrack_core.c             | 53 ++++++++++---------
 net/netfilter/nf_conntrack_expect.c           |  2 +-
 net/netfilter/nf_conntrack_helper.c           |  2 +-
 net/netfilter/nf_conntrack_netlink.c          |  2 +-
 net/netfilter/nf_conntrack_standalone.c       |  7 +--
 10 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index aed23b6c8478..63d449807d9b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	unsigned int		htable_size;
 	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2eb3814d6258..9a4b8b714079 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -40,6 +40,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_security;
 	struct xt_table		*nat_table;
 	struct hlist_head	*nat_bysource;
+	unsigned int		nat_htable_size;
 	int			nat_vmalloced;
 #endif
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d171b123a656..d1ea38a7c490 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	},
 	{
 		.procname	= "ip_conntrack_buckets",
-		.data		= &nf_conntrack_htable_size,
+		.data		= &init_net.ct.htable_size,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8668a3defda6..2fb7b76da94f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe1a64479dd0..26066a2327ad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock);
 
 static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
-/* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size __read_mostly;
-
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
 						__read_mostly;
@@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
@@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all,
 			    tuple->dst.protonum, 0);
-	return ((u64)hash * nf_nat_htable_size) >> 32;
+	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -147,7 +144,7 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(tuple);
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
@@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 
 static int __net_init nf_nat_net_init(struct net *net)
 {
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
-						      &net->ipv4.nat_vmalloced, 0);
+	/* Leave them the same for the moment. */
+	net->ipv4.nat_htable_size = net->ct.htable_size;
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
+						       &net->ipv4.nat_vmalloced, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     nf_nat_htable_size);
+			     net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
@@ -724,9 +723,6 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	/* Leave them the same for the moment. */
-	nf_nat_htable_size = nf_conntrack_htable_size;
-
 	ret = register_pernet_subsys(&nf_nat_net_ops);
 	if (ret < 0)
 		goto cleanup_extend;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9de4bd4c0dd7..4d79e3c1616c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -30,6 +30,7 @@
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <linux/mm.h>
+#include <linux/nsproxy.h>
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+static inline u_int32_t hash_conntrack(const struct net *net,
+				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, nf_conntrack_htable_size,
+	return __hash_conntrack(tuple, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
@@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	int dropped = 0;
 
 	rcu_read_lock();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 		if (cnt >= NF_CT_EVICTION_RANGE)
 			break;
 
-		hash = (hash + 1) % nf_conntrack_htable_size;
+		hash = (hash + 1) % net->ct.htable_size;
 	}
 	rcu_read_unlock();
 
@@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(orig);
+		unsigned int hash = hash_conntrack(net, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 	struct hlist_nulls_node *n;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 		hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
@@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
 	int i, bucket, vmalloced, old_vmalloced;
 	unsigned int hashsize, old_size;
-	int rnd;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
 	/* On boot, we can set this without any fancy locking. */
 	if (!nf_conntrack_htable_size)
 		return param_set_uint(val, kp);
@@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
-	/* We have to rehahs for the new table anyway, so we also can
-	 * use a newrandom seed */
-	get_random_bytes(&rnd, sizeof(rnd));
-
 	/* Lookups in the old hash might happen in parallel, which means we
 	 * might get false negatives during connection lookup. New connections
 	 * created because of a false negative won't make it into the hash
 	 * though since that required taking the lock.
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < init_net.ct.htable_size; i++) {
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+			bucket = __hash_conntrack(&h->tuple, hashsize,
+						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
 	}
-	old_size = nf_conntrack_htable_size;
+	old_size = init_net.ct.htable_size;
 	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
-	nf_conntrack_htable_size = hashsize;
+	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
 	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
-	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
@@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_cache;
 	}
-	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+
+	net->ct.htable_size = nf_conntrack_htable_size;
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
@@ -1353,7 +1356,7 @@ err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4ad7d1d809af..2f25ff610982 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net)
 
 	if (net_eq(net, &init_net)) {
 		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			nf_ct_expect_hsize = net->ct.htable_size / 256;
 			if (!nf_ct_expect_hsize)
 				nf_ct_expect_hsize = 1;
 		}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..4b1a56bd074c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	/* Get rid of expecteds, set helpers to NULL. */
 	hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
 		unhelp(h, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 			unhelp(h, me);
 	}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 42f21c01a93e..0ffe689dfe97 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 	last = (struct nf_conn *)cb->args[1];
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+	for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) {
 restart:
 		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnnode) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 028aba667ef7..e310f1561bb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
@@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &init_net.ct.htable_size,
 		.maxlen         = sizeof(unsigned int),
 		.mode           = 0444,
 		.proc_handler   = proc_dointvec,
@@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		goto out_kmemdup;
 
 	table[1].data = &net->ct.count;
+	table[2].data = &net->ct.htable_size;
 	table[3].data = &net->ct.sysctl_checksum;
 	table[4].data = &net->ct.sysctl_log_invalid;
 

From 9858ae38011d699d4c2fa7f3493a47accf43a0f5 Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Mon, 25 Jan 2010 16:20:52 +0530
Subject: [PATCH 332/640] [SCSI] mptfusion : mptscsih_abort return value should
 be SUCCESS instead of value 0.

retval should be SUCCESS/FAILED which is defined at scsi.h
retval = 0 is directing wrong return value. It must be retval = SUCCESS.

Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/message/fusion/mptscsih.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 57752751712b..81279b3d694c 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -1796,7 +1796,7 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "task abort: "
 		   "Command not in the active list! (sc=%p)\n", ioc->name,
 		   SCpnt));
-		retval = 0;
+		retval = SUCCESS;
 		goto out;
 	}
 

From 7dec9cf1dfa283feca4b761160112ea4838a6a8c Mon Sep 17 00:00:00 2001
From: Swen Schillig <swen@vnet.ibm.com>
Date: Tue, 26 Jan 2010 17:49:19 +0100
Subject: [PATCH 333/640] [SCSI] zfcp: Report FC BSG errors in correct field

The status FC_CTELS_STATUS_REJECT for all FC BSG errors is not
appropriate. Instead, report -EIO in the result field if there was a
problem in zfcp with the FC BSG request. If the request is good from
our point of view, report result 0, status FC_CTELS_STATUS_OK and let
userspace read the Accept or Reject from the payload (as documented in
scsi_bsg_fc.h).

Signed-off-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/s390/scsi/zfcp_fc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 0f7b493fb105..271399f62f1b 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -671,12 +671,11 @@ static void zfcp_fc_ct_els_job_handler(void *data)
 {
 	struct fc_bsg_job *job = data;
 	struct zfcp_fsf_ct_els *zfcp_ct_els = job->dd_data;
-	int status = zfcp_ct_els->status;
-	int reply_status;
+	struct fc_bsg_reply *jr = job->reply;
 
-	reply_status = status ? FC_CTELS_STATUS_REJECT : FC_CTELS_STATUS_OK;
-	job->reply->reply_data.ctels_reply.status = reply_status;
-	job->reply->reply_payload_rcv_len = job->reply_payload.payload_len;
+	jr->reply_payload_rcv_len = job->reply_payload.payload_len;
+	jr->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
+	jr->result = zfcp_ct_els->status ? -EIO : 0;
 	job->job_done(job);
 }
 

From 0f19bc681ed0849a2b95778460a0a8132e3700e2 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Fri, 29 Jan 2010 18:09:30 +0800
Subject: [PATCH 334/640] [SCSI] qla2xxx: make msix interrupt handler safe for
 irq

Yinghai has reported a lockdep warning on qla2xxx:

[   77.965784] WARNING: at kernel/lockdep.c:2332
trace_hardirqs_on_caller+0xc6/0x14b()
[   77.977492] Hardware name: Sun
[   77.979485] Modules linked in:
[   77.994337] Pid: 0, comm: swapper Not tainted
2.6.33-rc4-tip-yh-03949-g3a8e3f5-dirty #64
[   78.000120] Call Trace:
[   78.013298]  <IRQ>  [<ffffffff81076b54>] warn_slowpath_common+0x7c/0x94
[   78.017746]  [<ffffffff81cd712c>] ? _raw_spin_unlock_irq+0x30/0x36
[   78.035171]  [<ffffffff81076b80>] warn_slowpath_null+0x14/0x16
[   78.040152]  [<ffffffff810a2ae8>] trace_hardirqs_on_caller+0xc6/0x14b
[   78.055400]  [<ffffffff810a2b7a>] trace_hardirqs_on+0xd/0xf
[   78.058951]  [<ffffffff81cd712c>] _raw_spin_unlock_irq+0x30/0x36
[   78.074889]  [<ffffffff816461ef>] qla24xx_msix_default+0x243/0x281
[   78.091598]  [<ffffffff810a5752>] ? __lock_release+0xa5/0xae
[   78.096799]  [<ffffffff810c02ae>] handle_IRQ_event+0x53/0x113
[   78.111568]  [<ffffffff810c2061>] handle_edge_irq+0xf3/0x13b
[   78.116255]  [<ffffffff81035109>] handle_irq+0x24/0x2f
[   78.132063]  [<ffffffff81cdc4b4>] do_IRQ+0x5c/0xc3
[   78.134684]  [<ffffffff81cd7393>] ret_from_intr+0x0/0xf
[   78.137903]  <EOI>  [<ffffffff81039a56>] ? mwait_idle+0xaf/0xbb
[   78.155674]  [<ffffffff81039a4d>] ? mwait_idle+0xa6/0xbb
[   78.158600]  [<ffffffff81031c7c>] cpu_idle+0x61/0xa1
[   78.174333]  [<ffffffff81c85d7a>] rest_init+0x7e/0x80
[   78.178122]  [<ffffffff82832d1f>] start_kernel+0x316/0x31d
[   78.193623]  [<ffffffff82832297>] x86_64_start_reservations+0xa7/0xab
[   78.198924]  [<ffffffff8283237f>] x86_64_start_kernel+0xe4/0xeb
[   78.214540] ---[ end trace be4529f30a2e4ef5 ]---

This was happened when qla2xxx msix interrupt handler is trying to enable
IRQs by spin_unlock_irq(). We should make interrupt handler safe for IRQs,
use spin_lock_irqsave/spin_unlock_irqrestore, this will not break the IRQs
status in interrupt handler.

Reported-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Acked-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/qla2xxx/qla_isr.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index ffd0efdff40e..0ced91c5ebd3 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1917,6 +1917,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 	struct rsp_que *rsp;
 	struct device_reg_24xx __iomem *reg;
 	struct scsi_qla_host *vha;
+	unsigned long flags;
 
 	rsp = (struct rsp_que *) dev_id;
 	if (!rsp) {
@@ -1927,7 +1928,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 	ha = rsp->hw;
 	reg = &ha->iobase->isp24;
 
-	spin_lock_irq(&ha->hardware_lock);
+	spin_lock_irqsave(&ha->hardware_lock, flags);
 
 	vha = qla25xx_get_host(rsp);
 	qla24xx_process_response_queue(vha, rsp);
@@ -1935,7 +1936,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 		WRT_REG_DWORD(&reg->hccr, HCCRX_CLR_RISC_INT);
 		RD_REG_DWORD_RELAXED(&reg->hccr);
 	}
-	spin_unlock_irq(&ha->hardware_lock);
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	return IRQ_HANDLED;
 }
@@ -1946,6 +1947,7 @@ qla25xx_msix_rsp_q(int irq, void *dev_id)
 	struct qla_hw_data *ha;
 	struct rsp_que *rsp;
 	struct device_reg_24xx __iomem *reg;
+	unsigned long flags;
 
 	rsp = (struct rsp_que *) dev_id;
 	if (!rsp) {
@@ -1958,10 +1960,10 @@ qla25xx_msix_rsp_q(int irq, void *dev_id)
 	/* Clear the interrupt, if enabled, for this response queue */
 	if (rsp->options & ~BIT_6) {
 		reg = &ha->iobase->isp24;
-		spin_lock_irq(&ha->hardware_lock);
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 		WRT_REG_DWORD(&reg->hccr, HCCRX_CLR_RISC_INT);
 		RD_REG_DWORD_RELAXED(&reg->hccr);
-		spin_unlock_irq(&ha->hardware_lock);
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	}
 	queue_work_on((int) (rsp->id - 1), ha->wq, &rsp->q_work);
 
@@ -1979,6 +1981,7 @@ qla24xx_msix_default(int irq, void *dev_id)
 	uint32_t	stat;
 	uint32_t	hccr;
 	uint16_t	mb[4];
+	unsigned long flags;
 
 	rsp = (struct rsp_que *) dev_id;
 	if (!rsp) {
@@ -1990,7 +1993,7 @@ qla24xx_msix_default(int irq, void *dev_id)
 	reg = &ha->iobase->isp24;
 	status = 0;
 
-	spin_lock_irq(&ha->hardware_lock);
+	spin_lock_irqsave(&ha->hardware_lock, flags);
 	vha = pci_get_drvdata(ha->pdev);
 	do {
 		stat = RD_REG_DWORD(&reg->host_status);
@@ -2039,7 +2042,7 @@ qla24xx_msix_default(int irq, void *dev_id)
 		}
 		WRT_REG_DWORD(&reg->hccr, HCCRX_CLR_RISC_INT);
 	} while (0);
-	spin_unlock_irq(&ha->hardware_lock);
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags) &&
 	    (status & MBX_INTERRUPT) && ha->flags.mbox_int) {

From 84eb8fb42c120ff32b201c1cdd910033c888f699 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 5 Jan 2010 19:41:44 +0900
Subject: [PATCH 335/640] [SCSI] compat_ioct: fix bsg SG_IO

bsg's SG_IO doesn't work on 32-bit userspace and 64-bit kernelspace.

The problem is that both sg and bsg drivers use SG_IO
ioctl. sg_ioctl_trans() does 32/64-bit conversion even against bsg
header. It messes up bsg header. bsg driver gets garbage.

This patch fixes sg_ioctl_trans to handle only sg header (struct
sg_io_hdr).

Reported-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 fs/compat_ioctl.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c5c45de1a2ee..7cbbc7ab4b50 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -301,6 +301,12 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
 	u32 data;
 	void __user *dxferp;
 	int err;
+	int interface_id;
+
+	if (get_user(interface_id, &sgio32->interface_id))
+		return -EFAULT;
+	if (interface_id != 'S')
+		return sys_ioctl(fd, cmd, (unsigned long)sgio32);
 
 	if (get_user(iovec_count, &sgio32->iovec_count))
 		return -EFAULT;

From a67093d46e3caed1a42d694a7de452b61db30562 Mon Sep 17 00:00:00 2001
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Date: Thu, 4 Feb 2010 14:17:59 -0800
Subject: [PATCH 336/640] [SCSI] qla2xxx: Obtain proper host structure during
 response-queue processing.

Original code incorrectly assumed only status-type-0
IOCBs would be queued to the response-queue, and thus all
entries would safely reference a VHA from the IOCB
'handle.'

Cc: stable@kernel.org
Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/qla2xxx/qla_gbl.h |  1 -
 drivers/scsi/qla2xxx/qla_isr.c | 29 +----------------------------
 drivers/scsi/qla2xxx/qla_mid.c |  8 +++++---
 3 files changed, 6 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index f61fb8d01330..8bc6f53691e9 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -453,6 +453,5 @@ extern void qla24xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla25xx_wrt_req_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla25xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
 extern void qla24xx_wrt_rsp_reg(struct qla_hw_data *, uint16_t, uint16_t);
-extern struct scsi_qla_host * qla25xx_get_host(struct rsp_que *);
 
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 0ced91c5ebd3..6fc63b98818c 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1930,7 +1930,7 @@ qla24xx_msix_rsp_q(int irq, void *dev_id)
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
-	vha = qla25xx_get_host(rsp);
+	vha = pci_get_drvdata(ha->pdev);
 	qla24xx_process_response_queue(vha, rsp);
 	if (!ha->flags.disable_msix_handshake) {
 		WRT_REG_DWORD(&reg->hccr, HCCRX_CLR_RISC_INT);
@@ -2280,30 +2280,3 @@ int qla25xx_request_irq(struct rsp_que *rsp)
 	msix->rsp = rsp;
 	return ret;
 }
-
-struct scsi_qla_host *
-qla25xx_get_host(struct rsp_que *rsp)
-{
-	srb_t *sp;
-	struct qla_hw_data *ha = rsp->hw;
-	struct scsi_qla_host *vha = NULL;
-	struct sts_entry_24xx *pkt;
-	struct req_que *req;
-	uint16_t que;
-	uint32_t handle;
-
-	pkt = (struct sts_entry_24xx *) rsp->ring_ptr;
-	que = MSW(pkt->handle);
-	handle = (uint32_t) LSW(pkt->handle);
-	req = ha->req_q_map[que];
-	if (handle < MAX_OUTSTANDING_COMMANDS) {
-		sp = req->outstanding_cmds[handle];
-		if (sp)
-			return  sp->fcport->vha;
-		else
-			goto base_que;
-	}
-base_que:
-	vha = pci_get_drvdata(ha->pdev);
-	return vha;
-}
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index b901aa267e7d..ff17dee28613 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -636,13 +636,15 @@ failed:
 
 static void qla_do_work(struct work_struct *work)
 {
+	unsigned long flags;
 	struct rsp_que *rsp = container_of(work, struct rsp_que, q_work);
 	struct scsi_qla_host *vha;
+	struct qla_hw_data *ha = rsp->hw;
 
-	spin_lock_irq(&rsp->hw->hardware_lock);
-	vha = qla25xx_get_host(rsp);
+	spin_lock_irqsave(&rsp->hw->hardware_lock, flags);
+	vha = pci_get_drvdata(ha->pdev);
 	qla24xx_process_response_queue(vha, rsp);
-	spin_unlock_irq(&rsp->hw->hardware_lock);
+	spin_unlock_irqrestore(&rsp->hw->hardware_lock, flags);
 }
 
 /* create response queue */

From 562ada612058133a5483c68a73605f3c5f42fffe Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 15 Jan 2010 18:54:03 -0600
Subject: [PATCH 337/640] net/9p: fix virtio transport to correctly update
 status on connect

The 9p virtio transport was not updating its connection status correctly
preventing it from being able to mount the server.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ea1e3daabefe..67c4bc704c5a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -311,6 +311,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 	}
 
 	client->trans = (void *)chan;
+	client->status = Connected;
 	chan->client = client;
 
 	return 0;

From 349d3bb878d71978650a0634b5445af3c1cc1cd8 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 15 Jan 2010 19:01:10 -0600
Subject: [PATCH 338/640] net/9p: fail when user specifies a transport which we
 can't find

If the user specifies a transport and we can't find it, we failed back
to the default trainsport silently.  This patch will make the code
complain more loudly and return an error code.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/9p/client.c b/net/9p/client.c
index 8af95b2dddd6..90a2eb926d19 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -108,6 +108,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			break;
 		case Opt_trans:
 			clnt->trans_mod = v9fs_get_trans_by_name(&args[0]);
+			if(clnt->trans_mod == NULL) {
+				P9_DPRINTK(P9_DEBUG_ERROR,
+				   "Could not find request transport: %s\n",
+				   (char *) &args[0]);
+				ret = -EINVAL;
+				goto free_and_return;
+			}
 			break;
 		case Opt_legacy:
 			clnt->dotu = 0;
@@ -117,6 +124,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 		}
 	}
 
+free_and_return:
 	kfree(options);
 	return ret;
 }

From 9d6939dac77102b09396ee0b89392ec7639612a7 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 15 Jan 2010 19:01:56 -0600
Subject: [PATCH 339/640] net/9p: fix statsize inside twstat

stat structures contain a size prefix.  In our twstat messages
we were including the size of the size prefix in the prefix, which is not
what the protocol wants, and Inferno servers would complain.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 90a2eb926d19..a2e2d61b903b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1222,10 +1222,11 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional)
 {
 	int ret;
 
+	/* NOTE: size shouldn't include its own length */
 	/* size[2] type[2] dev[4] qid[13] */
 	/* mode[4] atime[4] mtime[4] length[8]*/
 	/* name[s] uid[s] gid[s] muid[s] */
-	ret = 2+2+4+13+4+4+4+8+2+2+2+2;
+	ret = 2+4+13+4+4+4+8+2+2+2+2;
 
 	if (wst->name)
 		ret += strlen(wst->name);
@@ -1266,7 +1267,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
 		wst->n_uid, wst->n_gid, wst->n_muid);
 
-	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst);
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;

From 260c64d23532caf19abb77e696971da05c388489 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 8 Feb 2010 13:42:26 -0500
Subject: [PATCH 340/640] Revert "nfsd4: fix error return when pseudoroot
 missing"

Commit f39bde24b275ddc45d fixed the error return from PUTROOTFH in the
case where there is no pseudofilesystem.

This is really a case we shouldn't hit on a correctly configured server:
in the absence of a root filehandle, there's no point accepting version
4 NFS rpc calls at all.

But the shared responsibility between kernel and userspace here means
the kernel on its own can't eliminate the possiblity of this happening.
And we have indeed gotten this wrong in distro's, so new client-side
mount code that attempts to negotiate v4 by default first has to work
around this case.

Therefore when commit f39bde24b275ddc45d arrived at roughly the same
time as the new v4-default mount code, which explicitly checked only for
the previous error, the result was previously fine mounts suddenly
failing.

We'll fix both sides for now: revert the error change, and make the
client-side mount workaround more robust.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c487810a2366..a0c4016413f1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1316,19 +1316,11 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
 
 static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
 {
-	struct svc_export *exp;
 	u32 fsidv[2];
 
 	mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
 
-	exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
-	/*
-	 * We shouldn't have accepting an nfsv4 request at all if we
-	 * don't have a pseudoexport!:
-	 */
-	if (IS_ERR(exp) && PTR_ERR(exp) == -ENOENT)
-		exp = ERR_PTR(-ESERVERFAULT);
-	return exp;
+	return rqst_exp_find(rqstp, FSID_NUM, fsidv);
 }
 
 /*

From 86a06abab0ffbb9d8ce2b7f6b6652412ce2d2c36 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 5 Feb 2010 17:55:56 -0800
Subject: [PATCH 341/640] ocfs2/dlm: Fix printing of lockname

The debug call printing the name of the lock resource was chopping
off the last character. This patch fixes the problem.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmdebug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 42b0bad7a612..0cd24cf54396 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 	assert_spin_locked(&res->spinlock);
 
 	stringify_lockname(res->lockname.name, res->lockname.len,
-			   buf, sizeof(buf) - 1);
+			   buf, sizeof(buf));
 	printk("lockres: %s, owner=%u, state=%u\n",
 	       buf, res->owner, res->state);
 	printk("  last used: %lu, refcnt: %u, on purge list: %s\n",

From 6efd806634f7526f723f3aa7ceffd3887a932d9c Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 5 Feb 2010 15:41:23 -0800
Subject: [PATCH 342/640] ocfs2/cluster: Make o2net connect messages
 KERN_NOTICE

Connect and disconnect messages are more than informational as they are required
during root cause analysis for failures. This patch changes them from KERN_INFO
to KERN_NOTICE.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Faseh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/tcp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 938ba181a3d9..d8d0c65ac03c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -485,7 +485,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 	}
 
 	if (was_valid && !valid) {
-		printk(KERN_INFO "o2net: no longer connected to "
+		printk(KERN_NOTICE "o2net: no longer connected to "
 		       SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
 		o2net_complete_nodes_nsw(nn);
 	}
@@ -493,7 +493,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
 	if (!was_valid && valid) {
 		o2quo_conn_up(o2net_num_from_nn(nn));
 		cancel_delayed_work(&nn->nn_connect_expired);
-		printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
+		printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n",
 		       o2nm_this_node() > sc->sc_node->nd_num ?
 		       		"connected to" : "accepted connection from",
 		       SC_NODEF_ARGS(sc));
@@ -1476,7 +1476,7 @@ static void o2net_idle_timer(unsigned long data)
 
 	do_gettimeofday(&now);
 
-	printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
+	printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
 	     "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
 		     o2net_idle_timeout() / 1000,
 		     o2net_idle_timeout() % 1000);

From 0da780c269957783d341fc3559e6b4c9912af7b4 Mon Sep 17 00:00:00 2001
From: Benoit Papillault <benoit.papillault@free.fr>
Date: Fri, 5 Feb 2010 01:21:03 +0100
Subject: [PATCH 343/640] mac80211: Fix probe request filtering in IBSS mode

We only reply to probe request if either the requested SSID is the
broadcast SSID or if the requested SSID matches our own SSID. This
latter case was not properly handled since we were replying to different
SSID with the same length as our own SSID.

Signed-off-by: Benoit Papillault <benoit.papillault@free.fr>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 1f2db647bb5c..22f0c2aa7a89 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -647,7 +647,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 	}
 	if (pos[1] != 0 &&
 	    (pos[1] != ifibss->ssid_len ||
-	     !memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) {
+	     memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len))) {
 		/* Ignore ProbeReq for foreign SSID */
 		return;
 	}

From 098dfded5b1b09927995e89c6d689f85a0f53384 Mon Sep 17 00:00:00 2001
From: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Date: Fri, 5 Feb 2010 11:40:00 -0800
Subject: [PATCH 344/640] iwlwifi: Fix to set correct ht configuration

iwl_set_rxon_ht() only get called in iwl_post_associate(); which cause
possible incorrect ht configuration. Adding the call in iwl_mac_config() if
IEEE80211_CONF_CHANGE_CHANNEL flag is set to re-configure and send rxon
command.

Fixes
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2146

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
CC: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 5461f105bd2d..d10bea64fce3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -2744,6 +2744,7 @@ int iwl_mac_config(struct ieee80211_hw *hw, u32 changed)
 		if ((le16_to_cpu(priv->staging_rxon.channel) != ch))
 			priv->staging_rxon.flags = 0;
 
+		iwl_set_rxon_ht(priv, ht_conf);
 		iwl_set_rxon_channel(priv, conf->channel);
 
 		iwl_set_flags_for_band(priv, conf->channel->band);

From c0ce77b8323c1a0d4eeef97caf16c0ea971222a9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 3 Feb 2010 10:22:31 +0100
Subject: [PATCH 345/640] mac80211: fix deferred hardware scan requests

Reinette found the reason for the warnings that
happened occasionally when a hw-offloaded scan
finished; her description of the problem:

  mac80211 will defer the handling of scan requests if it is
  busy with management work at the time. The scan requests
  are deferred and run after the work has completed. When
  this occurs there are currently two problems.

  * The scan request for hardware scan is not fully populated
    with the band and channels to scan not initialized.

  * When the scan is queued the state is not correctly updated
    to reflect that a scan is in progress. The problem here is
    that when the driver completes the scan and calls
    ieee80211_scan_completed() a warning will be triggered
    since mac80211 was not aware that a scan was in progress.

The reason is that the queued scan work will start
the hw scan right away when the hw_scan_req struct
has already been allocated. However, in the first
pass it will not have been filled, which happens
at the same time as setting the bits. To fix this,
simply move the allocation after the pending work
test as well, so that the first iteration of the
scan work will call __ieee80211_start_scan() even
in the hardware scan case.

Bug-identified-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f934c9620b73..bc17cf7d68db 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -439,6 +439,16 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	if (local->scan_req)
 		return -EBUSY;
 
+	if (req != local->int_scan_req &&
+	    sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !list_empty(&ifmgd->work_list)) {
+		/* actually wait for the work it's doing to finish/time out */
+		set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request);
+		local->scan_req = req;
+		local->scan_sdata = sdata;
+		return 0;
+	}
+
 	if (local->ops->hw_scan) {
 		u8 *ies;
 
@@ -463,14 +473,6 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	local->scan_req = req;
 	local->scan_sdata = sdata;
 
-	if (req != local->int_scan_req &&
-	    sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    !list_empty(&ifmgd->work_list)) {
-		/* actually wait for the work it's doing to finish/time out */
-		set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request);
-		return 0;
-	}
-
 	if (local->ops->hw_scan)
 		__set_bit(SCAN_HW_SCANNING, &local->scanning);
 	else

From 7a4439c406c21b1e900ed497cec1a79d05b38c07 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Mon, 8 Feb 2010 15:36:48 -0600
Subject: [PATCH 346/640] 9p: Include fsync support for 9p client

Implement the fsync in the client side by marking stat field values to 'don't touch' so that server may
interpret it as a request to guarantee that the contents of the associated file are committed to stable
storage before the Rwstat message is returned.

Without this patch, calling fsync on a 9p file results in "Invalid argument" error. Please check the attached
C program.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Acked-by: Venkateswararao Jujjuri (JV) <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs_vfs.h  |  1 +
 fs/9p/vfs_file.c  | 19 +++++++++++++++++++
 fs/9p/vfs_inode.c |  2 +-
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 3a7560e35865..ed835836e0dc 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -60,3 +60,4 @@ void v9fs_dentry_release(struct dentry *);
 int v9fs_uflags2omode(int uflags, int extended);
 
 ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
+void v9fs_blank_wstat(struct p9_wstat *wstat);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3902bf43a088..74a0461a9ac0 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -257,6 +257,23 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	return total;
 }
 
+static int v9fs_file_fsync(struct file *filp, struct dentry *dentry,
+					int datasync)
+{
+	struct p9_fid *fid;
+	struct p9_wstat wstat;
+	int retval;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp,
+						dentry, datasync);
+
+	fid = filp->private_data;
+	v9fs_blank_wstat(&wstat);
+
+	retval = p9_client_wstat(fid, &wstat);
+	return retval;
+}
+
 static const struct file_operations v9fs_cached_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = do_sync_read,
@@ -266,6 +283,7 @@ static const struct file_operations v9fs_cached_file_operations = {
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
 	.mmap = generic_file_readonly_mmap,
+	.fsync = v9fs_file_fsync,
 };
 
 const struct file_operations v9fs_file_operations = {
@@ -276,4 +294,5 @@ const struct file_operations v9fs_file_operations = {
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
 	.mmap = generic_file_readonly_mmap,
+	.fsync = v9fs_file_fsync,
 };
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9d03d1ebca6f..a407fa3388c0 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -176,7 +176,7 @@ int v9fs_uflags2omode(int uflags, int extended)
  *
  */
 
-static void
+void
 v9fs_blank_wstat(struct p9_wstat *wstat)
 {
 	wstat->type = ~0;

From d8c8a9e36560e9ff4c99279d64ce5dd0e1a33fa6 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 8 Feb 2010 16:23:23 -0600
Subject: [PATCH 347/640] 9p: fix option parsing

Options pointer is being moved before calling kfree() which seems
to cause problems.  This uses a separate pointer to track and free
original allocation.

Signed-off-by: Venkateswararao Jujjuri <jvrao@us.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>w
---
 fs/9p/v9fs.c        | 10 ++++++----
 net/9p/client.c     |  9 +++++----
 net/9p/trans_fd.c   | 10 ++++++----
 net/9p/trans_rdma.c |  9 +++++----
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index cf62b05e296a..6848788a13db 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -84,7 +84,7 @@ static const match_table_t tokens = {
 
 static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 {
-	char *options;
+	char *options, *tmp_options;
 	substring_t args[MAX_OPT_ARGS];
 	char *p;
 	int option = 0;
@@ -102,9 +102,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 	if (!opts)
 		return 0;
 
-	options = kstrdup(opts, GFP_KERNEL);
-	if (!options)
+	tmp_options = kstrdup(opts, GFP_KERNEL);
+	if (!tmp_options)
 		goto fail_option_alloc;
+	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -194,7 +195,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 			continue;
 		}
 	}
-	kfree(options);
+
+	kfree(tmp_options);
 	return ret;
 
 fail_option_alloc:
diff --git a/net/9p/client.c b/net/9p/client.c
index a2e2d61b903b..cbe066966b3c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -69,7 +69,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
 
 static int parse_opts(char *opts, struct p9_client *clnt)
 {
-	char *options;
+	char *options, *tmp_options;
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
@@ -81,12 +81,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	if (!opts)
 		return 0;
 
-	options = kstrdup(opts, GFP_KERNEL);
-	if (!options) {
+	tmp_options = kstrdup(opts, GFP_KERNEL);
+	if (!tmp_options) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
 				"failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
+	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -125,7 +126,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	}
 
 free_and_return:
-	kfree(options);
+	kfree(tmp_options);
 	return ret;
 }
 
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index be1cb909d8c0..31d0b05582a9 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -714,7 +714,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
-	char *options;
+	char *options, *tmp_options;
 	int ret;
 
 	opts->port = P9_PORT;
@@ -724,12 +724,13 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 	if (!params)
 		return 0;
 
-	options = kstrdup(params, GFP_KERNEL);
-	if (!options) {
+	tmp_options = kstrdup(params, GFP_KERNEL);
+	if (!tmp_options) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
 				"failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
+	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -760,7 +761,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 			continue;
 		}
 	}
-	kfree(options);
+
+	kfree(tmp_options);
 	return 0;
 }
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 65cb29db03f8..2c95a89c0f46 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -166,7 +166,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
-	char *options;
+	char *options, *tmp_options;
 	int ret;
 
 	opts->port = P9_PORT;
@@ -177,12 +177,13 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 	if (!params)
 		return 0;
 
-	options = kstrdup(params, GFP_KERNEL);
-	if (!options) {
+	tmp_options = kstrdup(params, GFP_KERNEL);
+	if (!tmp_options) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
 			   "failed to allocate copy of option string\n");
 		return -ENOMEM;
 	}
+	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -216,7 +217,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
 	}
 	/* RQ must be at least as large as the SQ */
 	opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
-	kfree(options);
+	kfree(tmp_options);
 	return 0;
 }
 

From 4b9d2a2112163a757943c78ea98587fc9e828641 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 8 Feb 2010 13:16:55 +1000
Subject: [PATCH 348/640] drm/radeon/kms: don't crash if no DDC bus on VGA/DVI
 connector.

This is strange - like really really strange, twilight zone of strange.
VGA ports have DDC buses, but sometimes for some reasons the BIOS
says we don't and we oops - AMD mentioned bios bugs so we'll have
to add quirks.

reported on irc by nirbheek and
https://bugzilla.redhat.com/show_bug.cgi?id=554323

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 20 ++++++++++++--------
 drivers/gpu/drm/radeon/radeon_display.c    | 11 ++++++++++-
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 2d8e5a70f284..238188540017 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -580,16 +580,18 @@ static enum drm_connector_status radeon_vga_detect(struct drm_connector *connect
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder;
 	struct drm_encoder_helper_funcs *encoder_funcs;
-	bool dret;
+	bool dret = false;
 	enum drm_connector_status ret = connector_status_disconnected;
 
 	encoder = radeon_best_single_encoder(connector);
 	if (!encoder)
 		ret = connector_status_disconnected;
 
-	radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
-	dret = radeon_ddc_probe(radeon_connector);
-	radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
+	if (radeon_connector->ddc_bus) {
+		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
+		dret = radeon_ddc_probe(radeon_connector);
+		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
+	}
 	if (dret) {
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
@@ -740,11 +742,13 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect
 	struct drm_mode_object *obj;
 	int i;
 	enum drm_connector_status ret = connector_status_disconnected;
-	bool dret;
+	bool dret = false;
 
-	radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
-	dret = radeon_ddc_probe(radeon_connector);
-	radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
+	if (radeon_connector->ddc_bus) {
+		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
+		dret = radeon_ddc_probe(radeon_connector);
+		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
+	}
 	if (dret) {
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 6a92f994cc26..7e17a362b54b 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -278,7 +278,7 @@ static void radeon_print_display_setup(struct drm_device *dev)
 		DRM_INFO("  %s\n", connector_names[connector->connector_type]);
 		if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
 			DRM_INFO("  %s\n", hpd_names[radeon_connector->hpd.hpd]);
-		if (radeon_connector->ddc_bus)
+		if (radeon_connector->ddc_bus) {
 			DRM_INFO("  DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
 				 radeon_connector->ddc_bus->rec.mask_clk_reg,
 				 radeon_connector->ddc_bus->rec.mask_data_reg,
@@ -288,6 +288,15 @@ static void radeon_print_display_setup(struct drm_device *dev)
 				 radeon_connector->ddc_bus->rec.en_data_reg,
 				 radeon_connector->ddc_bus->rec.y_clk_reg,
 				 radeon_connector->ddc_bus->rec.y_data_reg);
+		} else {
+			if (connector->connector_type == DRM_MODE_CONNECTOR_VGA ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_DVII ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_DVID ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_DVIA ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)
+				DRM_INFO("  DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n");
+		}
 		DRM_INFO("  Encoders:\n");
 		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 			radeon_encoder = to_radeon_encoder(encoder);

From 2fc1b5dd99f66d93ffc23fd8df82d384c1a354c8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Feb 2010 15:00:39 -0800
Subject: [PATCH 349/640] dst: call cond_resched() in dst_gc_task()

Kernel bugzilla #15239

On some workloads, it is quite possible to get a huge dst list to
process in dst_gc_task(), and trigger soft lockup detection.

Fix is to call cond_resched(), as we run in process context.

Reported-by: Pawel Staszewski <pstaszewski@itcare.pl>
Tested-by: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/dst.c b/net/core/dst.c
index 57bc4d5b8d08..cb1b3488b739 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -17,6 +17,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <net/net_namespace.h>
+#include <linux/sched.h>
 
 #include <net/dst.h>
 
@@ -79,6 +80,7 @@ loop:
 	while ((dst = next) != NULL) {
 		next = dst->next;
 		prefetch(&next->next);
+		cond_resched();
 		if (likely(atomic_read(&dst->__refcnt))) {
 			last->next = dst;
 			last = dst;

From efa8450f6c93c9d4c99adfea2f52f1d02d878d5b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 9 Feb 2010 09:06:00 +1000
Subject: [PATCH 350/640] drm/radeon/kms: add quirk for VGA without DDC on
 rv730 XFX card.

Reported on irc by nirbheek.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_atombios.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index fa82ca74324e..2dcda6115874 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -287,6 +287,15 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 			*connector_type = DRM_MODE_CONNECTOR_DVID;
 	}
 
+	/* XFX Pine Group device rv730 reports no VGA DDC lines
+	 * even though they are wired up to record 0x93
+	 */
+	if ((dev->pdev->device == 0x9498) &&
+	    (dev->pdev->subsystem_vendor == 0x1682) &&
+	    (dev->pdev->subsystem_device == 0x2452)) {
+		struct radeon_device *rdev = dev->dev_private;
+		*i2c_bus = radeon_lookup_i2c_gpio(rdev, 0x93);
+	}
 	return true;
 }
 

From fb786100f7c75e154e63d0f5a2982e6d46dfb602 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 8 Feb 2010 11:50:32 +0000
Subject: [PATCH 351/640] 9p: Fix the kernel crash on a failed mount

The patch fix the crash repoted below

[   15.149907] BUG: unable to handle kernel NULL pointer dereference at 00000001
[   15.150806] IP: [<c140b886>] p9_virtio_close+0x18/0x24
.....
....
[   15.150806] Call Trace:
[   15.150806]  [<c1408e78>] ? p9_client_destroy+0x3f/0x163
[   15.150806]  [<c1409342>] ? p9_client_create+0x25f/0x270
[   15.150806]  [<c1063b72>] ? trace_hardirqs_on+0xb/0xd
[   15.150806]  [<c11ed4e8>] ? match_token+0x64/0x164
[   15.150806]  [<c1175e8d>] ? v9fs_session_init+0x2f1/0x3c8
[   15.150806]  [<c109cfc9>] ? kmem_cache_alloc+0x98/0xb8
[   15.150806]  [<c1063b72>] ? trace_hardirqs_on+0xb/0xd
[   15.150806]  [<c1173dd1>] ? v9fs_get_sb+0x47/0x1e8
[   15.150806]  [<c1173dea>] ? v9fs_get_sb+0x60/0x1e8
[   15.150806]  [<c10a2e77>] ? vfs_kern_mount+0x81/0x11a
[   15.150806]  [<c10a2f55>] ? do_kern_mount+0x33/0xbe
[   15.150806]  [<c10b40b9>] ? do_mount+0x654/0x6b3
[   15.150806]  [<c1038949>] ? do_page_fault+0x0/0x284
[   15.150806]  [<c10b28ec>] ? copy_mount_options+0x73/0xd2
[   15.150806]  [<c10b4179>] ? sys_mount+0x61/0x94
[   15.150806]  [<c14284e9>] ? syscall_call+0x7/0xb
....
[   15.203562] ---[ end trace 1dd159357709eb4b ]---
[

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 67c4bc704c5a..cb50f4ae5eef 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -102,7 +102,8 @@ static void p9_virtio_close(struct p9_client *client)
 	struct virtio_chan *chan = client->trans;
 
 	mutex_lock(&virtio_9p_lock);
-	chan->inuse = false;
+	if (chan)
+		chan->inuse = false;
 	mutex_unlock(&virtio_9p_lock);
 }
 

From bf2d29c64dd777e9a40bc4533e721944a590250f Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 8 Feb 2010 17:59:34 -0600
Subject: [PATCH 352/640] 9p: fix memory leak in v9fs_parse_options()

If match_strdup() fail this function exits without freeing the options string.

Signed-off-by: Venkateswararao Jujjuri <jvrao@us.ibm.com>
Sigend-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6848788a13db..7d6c2139891d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -103,8 +103,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 		return 0;
 
 	tmp_options = kstrdup(opts, GFP_KERNEL);
-	if (!tmp_options)
+	if (!tmp_options) {
+		ret = -ENOMEM;
 		goto fail_option_alloc;
+	}
 	options = tmp_options;
 
 	while ((p = strsep(&options, ",")) != NULL) {
@@ -160,8 +162,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 			break;
 		case Opt_cache:
 			s = match_strdup(&args[0]);
-			if (!s)
-				goto fail_option_alloc;
+			if (!s) {
+				ret = -ENOMEM;
+				P9_DPRINTK(P9_DEBUG_ERROR,
+				  "problem allocating copy of cache arg\n");
+				goto free_and_return;
+			}
 
 			if (strcmp(s, "loose") == 0)
 				v9ses->cache = CACHE_LOOSE;
@@ -174,8 +180,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 
 		case Opt_access:
 			s = match_strdup(&args[0]);
-			if (!s)
-				goto fail_option_alloc;
+			if (!s) {
+				ret = -ENOMEM;
+				P9_DPRINTK(P9_DEBUG_ERROR,
+				  "problem allocating copy of access arg\n");
+				goto free_and_return;
+			}
 
 			v9ses->flags &= ~V9FS_ACCESS_MASK;
 			if (strcmp(s, "user") == 0)
@@ -196,13 +206,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 		}
 	}
 
+free_and_return:
 	kfree(tmp_options);
-	return ret;
-
 fail_option_alloc:
-	P9_DPRINTK(P9_DEBUG_ERROR,
-		   "failed to allocate copy of option argument\n");
-	return -ENOMEM;
+	return ret;
 }
 
 /**

From 8781ff9495578dbb74065fae55305110d9f81cb9 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Mon, 8 Feb 2010 18:18:34 -0600
Subject: [PATCH 353/640] 9p: fix p9_client_destroy unconditional calling
 v9fs_put_trans

restructure client create code to handle error cases better and
only cleanup initialized portions of the stack.

Signed-off-by: Venkateswararao Jujjuri <jvrao@us.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index cbe066966b3c..09d4f1e2e4a8 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -676,18 +676,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	clnt->trans = NULL;
 	spin_lock_init(&clnt->lock);
 	INIT_LIST_HEAD(&clnt->fidlist);
-	clnt->fidpool = p9_idpool_create();
-	if (IS_ERR(clnt->fidpool)) {
-		err = PTR_ERR(clnt->fidpool);
-		clnt->fidpool = NULL;
-		goto error;
-	}
 
 	p9_tag_init(clnt);
 
 	err = parse_opts(options, clnt);
 	if (err < 0)
-		goto error;
+		goto free_client;
 
 	if (!clnt->trans_mod)
 		clnt->trans_mod = v9fs_get_default_trans();
@@ -696,7 +690,14 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		err = -EPROTONOSUPPORT;
 		P9_DPRINTK(P9_DEBUG_ERROR,
 				"No transport defined or default transport\n");
-		goto error;
+		goto free_client;
+	}
+
+	clnt->fidpool = p9_idpool_create();
+	if (IS_ERR(clnt->fidpool)) {
+		err = PTR_ERR(clnt->fidpool);
+		clnt->fidpool = NULL;
+		goto put_trans;
 	}
 
 	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
@@ -704,19 +705,25 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
-		goto error;
+		goto destroy_fidpool;
 
 	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ;
 
 	err = p9_client_version(clnt);
 	if (err)
-		goto error;
+		goto close_trans;
 
 	return clnt;
 
-error:
-	p9_client_destroy(clnt);
+close_trans:
+	clnt->trans_mod->close(clnt);
+destroy_fidpool:
+	p9_idpool_destroy(clnt->fidpool);
+put_trans:
+	v9fs_put_trans(clnt->trans_mod);
+free_client:
+	kfree(clnt);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_create);

From f927b8907cb25943d6275d4ea036c065b8fd3f33 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 27 Jan 2010 14:29:05 +1000
Subject: [PATCH 354/640] drm/nouveau: fix non-vram notifier blocks

Due to a thinko, these were previously forced to VRAM even if we allocated
them in GART.

This commit fixes that bug, but keeps the previous behaviour of using VRAM
by default until it's been tested properly across more chipsets.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.c      |  2 +-
 drivers/gpu/drm/nouveau/nouveau_notifier.c | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 343ab7f17ccc..fc692e5553ad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -56,7 +56,7 @@ int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
 MODULE_PARM_DESC(vram_notify, "Force DMA notifiers to be in VRAM");
-int nouveau_vram_notify;
+int nouveau_vram_notify = 1;
 module_param_named(vram_notify, nouveau_vram_notify, int, 0400);
 
 MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (>=GeForce 8)");
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index 6c66a34b6345..d99dc087f9b1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -34,15 +34,20 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct nouveau_bo *ntfy = NULL;
+	uint32_t flags;
 	int ret;
 
-	ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, nouveau_vram_notify ?
-			      TTM_PL_FLAG_VRAM : TTM_PL_FLAG_TT,
+	if (nouveau_vram_notify)
+		flags = TTM_PL_FLAG_VRAM;
+	else
+		flags = TTM_PL_FLAG_TT;
+
+	ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, flags,
 			      0, 0x0000, false, true, &ntfy);
 	if (ret)
 		return ret;
 
-	ret = nouveau_bo_pin(ntfy, TTM_PL_FLAG_VRAM);
+	ret = nouveau_bo_pin(ntfy, flags);
 	if (ret)
 		goto out_err;
 
@@ -128,6 +133,8 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
 			target = NV_DMA_TARGET_PCI;
 		} else {
 			target = NV_DMA_TARGET_AGP;
+			if (dev_priv->card_type >= NV_50)
+				offset += dev_priv->vm_gart_base;
 		}
 	} else {
 		NV_ERROR(dev, "Bad DMA target, mem_type %d!\n",

From a32ed69d7bb3cd259d813d71281d62993b9a70fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= <koriakin@0x04.net>
Date: Tue, 26 Jan 2010 14:00:42 +0000
Subject: [PATCH 355/640] drm/nouveau: Add module options to disable
 acceleration.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

noaccel=1 disables all acceleration and doesn't even attempt
initialising PGRAPH+PFIFO, nofbaccel=1 only makes fbcon unaccelerated.

Signed-off-by: Marcin Kościelnicki <koriakin@0x04.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.c   |  8 +++++++
 drivers/gpu/drm/nouveau/nouveau_drv.h   |  2 ++
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 10 +++++---
 drivers/gpu/drm/nouveau/nouveau_state.c | 32 +++++++++++++++----------
 4 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index fc692e5553ad..da3b93b84502 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -75,6 +75,14 @@ MODULE_PARM_DESC(ignorelid, "Ignore ACPI lid status");
 int nouveau_ignorelid = 0;
 module_param_named(ignorelid, nouveau_ignorelid, int, 0400);
 
+MODULE_PARM_DESC(noagp, "Disable all acceleration");
+int nouveau_noaccel = 0;
+module_param_named(noaccel, nouveau_noaccel, int, 0400);
+
+MODULE_PARM_DESC(noagp, "Disable fbcon acceleration");
+int nouveau_nofbaccel = 0;
+module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400);
+
 MODULE_PARM_DESC(tv_norm, "Default TV norm.\n"
 		 "\t\tSupported: PAL, PAL-M, PAL-N, PAL-Nc, NTSC-M, NTSC-J,\n"
 		 "\t\t\thd480i, hd480p, hd576i, hd576p, hd720p, hd1080i.\n"
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 6b9690418bc7..5445cefdd03e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -678,6 +678,8 @@ extern int nouveau_reg_debug;
 extern char *nouveau_vbios;
 extern int nouveau_ctxfw;
 extern int nouveau_ignorelid;
+extern int nouveau_nofbaccel;
+extern int nouveau_noaccel;
 
 /* nouveau_state.c */
 extern void nouveau_preclose(struct drm_device *dev, struct drm_file *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 0b05c869e0e7..eddadaccc285 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -267,8 +267,12 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width,
 	dev_priv->fbdev_info = info;
 
 	strcpy(info->fix.id, "nouveaufb");
-	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA |
-		      FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_IMAGEBLIT;
+	if (nouveau_nofbaccel)
+		info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_DISABLED;
+	else
+		info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA |
+			      FBINFO_HWACCEL_FILLRECT |
+			      FBINFO_HWACCEL_IMAGEBLIT;
 	info->fbops = &nouveau_fbcon_ops;
 	info->fix.smem_start = dev->mode_config.fb_base + nvbo->bo.offset -
 			       dev_priv->vm_vram_base;
@@ -316,7 +320,7 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width,
 	par->nouveau_fb = nouveau_fb;
 	par->dev = dev;
 
-	if (dev_priv->channel) {
+	if (dev_priv->channel && !nouveau_nofbaccel) {
 		switch (dev_priv->card_type) {
 		case NV_50:
 			nv50_fbcon_accel_init(info);
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index f2d0187ba152..241e24d60eb4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -427,15 +427,19 @@ nouveau_card_init(struct drm_device *dev)
 	if (ret)
 		goto out_timer;
 
-	/* PGRAPH */
-	ret = engine->graph.init(dev);
-	if (ret)
-		goto out_fb;
+	if (nouveau_noaccel)
+		engine->graph.accel_blocked = true;
+	else {
+		/* PGRAPH */
+		ret = engine->graph.init(dev);
+		if (ret)
+			goto out_fb;
 
-	/* PFIFO */
-	ret = engine->fifo.init(dev);
-	if (ret)
-		goto out_graph;
+		/* PFIFO */
+		ret = engine->fifo.init(dev);
+		if (ret)
+			goto out_graph;
+	}
 
 	/* this call irq_preinstall, register irq handler and
 	 * call irq_postinstall
@@ -479,9 +483,11 @@ nouveau_card_init(struct drm_device *dev)
 out_irq:
 	drm_irq_uninstall(dev);
 out_fifo:
-	engine->fifo.takedown(dev);
+	if (!nouveau_noaccel)
+		engine->fifo.takedown(dev);
 out_graph:
-	engine->graph.takedown(dev);
+	if (!nouveau_noaccel)
+		engine->graph.takedown(dev);
 out_fb:
 	engine->fb.takedown(dev);
 out_timer:
@@ -518,8 +524,10 @@ static void nouveau_card_takedown(struct drm_device *dev)
 			dev_priv->channel = NULL;
 		}
 
-		engine->fifo.takedown(dev);
-		engine->graph.takedown(dev);
+		if (!nouveau_noaccel) {
+			engine->fifo.takedown(dev);
+			engine->graph.takedown(dev);
+		}
 		engine->fb.takedown(dev);
 		engine->timer.takedown(dev);
 		engine->mc.takedown(dev);

From 69c9700b544e496dc3ccf472a4f3a76dcf4abaf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= <koriakin@0x04.net>
Date: Tue, 26 Jan 2010 18:39:20 +0000
Subject: [PATCH 356/640] drm/nouveau: Add getparam to get available PGRAPH
 units.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On nv50, this will be needed by applications using CUDA to know
how much stack/local memory to allocate.

Signed-off-by: Marcin Kościelnicki <koriakin@0x04.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_reg.h   | 1 +
 drivers/gpu/drm/nouveau/nouveau_state.c | 9 +++++++++
 include/drm/nouveau_drm.h               | 1 +
 3 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index 251f1b3b38b9..aa9b310e41be 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -99,6 +99,7 @@
  * the card will hang early on in the X init process.
  */
 #    define NV_PMC_ENABLE_UNK13                               (1<<13)
+#define NV40_PMC_GRAPH_UNITS				   0x00001540
 #define NV40_PMC_BACKLIGHT				   0x000015f0
 #	define NV40_PMC_BACKLIGHT_MASK			   0x001f0000
 #define NV40_PMC_1700                                      0x00001700
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 241e24d60eb4..fcd7610817a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -825,6 +825,15 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 	case NOUVEAU_GETPARAM_VM_VRAM_BASE:
 		getparam->value = dev_priv->vm_vram_base;
 		break;
+	case NOUVEAU_GETPARAM_GRAPH_UNITS:
+		/* NV40 and NV50 versions are quite different, but register
+		 * address is the same. User is supposed to know the card
+		 * family anyway... */
+		if (dev_priv->chipset >= 0x40) {
+			getparam->value = nv_rd32(dev, NV40_PMC_GRAPH_UNITS);
+			break;
+		}
+		/* FALLTHRU */
 	default:
 		NV_ERROR(dev, "unknown parameter %lld\n", getparam->param);
 		return -EINVAL;
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index 1e67c441ea82..f745948b61e4 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -77,6 +77,7 @@ struct drm_nouveau_gpuobj_free {
 #define NOUVEAU_GETPARAM_PCI_PHYSICAL    10
 #define NOUVEAU_GETPARAM_CHIPSET_ID      11
 #define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
+#define NOUVEAU_GETPARAM_GRAPH_UNITS     13
 struct drm_nouveau_getparam {
 	uint64_t param;
 	uint64_t value;

From 139295b671ff4ccd904f2fa58e9dbc0fe99cc7fe Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sat, 30 Jan 2010 18:28:00 +0100
Subject: [PATCH 357/640] drm/nouveau: Fixup semaphores on pre-nv50 cards.

Apparently, they generate a PFIFO interrupt each time one of the
semaphore methods is executed if its ctxdma wasn't manually marked as
valid. This patch makes it flip the valid bit in response to the
DMA_SEMAPHORE method (which triggers the IRQ even for a valid ctxdma).

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_irq.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
index 3b9bad66162a..baa9b3e0b66b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
@@ -211,6 +211,20 @@ nouveau_fifo_irq_handler(struct drm_device *dev)
 								get + 4);
 		}
 
+		if (status & NV_PFIFO_INTR_SEMAPHORE) {
+			uint32_t sem;
+
+			status &= ~NV_PFIFO_INTR_SEMAPHORE;
+			nv_wr32(dev, NV03_PFIFO_INTR_0,
+				NV_PFIFO_INTR_SEMAPHORE);
+
+			sem = nv_rd32(dev, NV10_PFIFO_CACHE1_SEMAPHORE);
+			nv_wr32(dev, NV10_PFIFO_CACHE1_SEMAPHORE, sem | 0x1);
+
+			nv_wr32(dev, NV03_PFIFO_CACHE1_GET, get + 4);
+			nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
+		}
+
 		if (status) {
 			NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n",
 				status, chid);

From f0fbe3eb5f65fe5948219f4ceac68f8a665b1fc6 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sat, 30 Jan 2010 23:21:38 +0100
Subject: [PATCH 358/640] drm/nouveau: call ttm_bo_wait with the bo lock held
 to prevent hang

nouveau_gem_ioctl_cpu_prep calls ttm_bo_wait without the bo lock held.
ttm_bo_wait unlocks that lock, and so must be called with it held.

Currently this bug causes libdrm nouveau_bo_busy() to hang the machine.

Signed-off-by: Luca Barbieri <luca at luca-barbieri.com>
Acked-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_gem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 6ac804b0c9f9..70cc30803e3b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -925,7 +925,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 	}
 
 	if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) {
+		spin_lock(&nvbo->bo.lock);
 		ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait);
+		spin_unlock(&nvbo->bo.lock);
 	} else {
 		ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait);
 		if (ret == 0)

From 7dad9ef6d9255b4d2d0a26305a785a55f3ba55e3 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 2 Feb 2010 14:40:30 -0800
Subject: [PATCH 359/640] drivers/gpu/drm/nouveau/nouveau_grctx.c: correct NULL
 test

Test the just-allocated value for NULL rather than some other value.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression x,y;
statement S;
@@

x = \(kmalloc\|kcalloc\|kzalloc\)(...);
(
if ((x) == NULL) S
|
if (
-   y
+   x
       == NULL)
 S
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: David Airlie <airlied@linux.ie>
Cc: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_grctx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.c b/drivers/gpu/drm/nouveau/nouveau_grctx.c
index 419f4c2b3b89..c7ebec696747 100644
--- a/drivers/gpu/drm/nouveau/nouveau_grctx.c
+++ b/drivers/gpu/drm/nouveau/nouveau_grctx.c
@@ -97,8 +97,8 @@ nouveau_grctx_prog_load(struct drm_device *dev)
 		}
 
 		pgraph->ctxvals = kmalloc(fw->size, GFP_KERNEL);
-		if (!pgraph->ctxprog) {
-			NV_ERROR(dev, "OOM copying ctxprog\n");
+		if (!pgraph->ctxvals) {
+			NV_ERROR(dev, "OOM copying ctxvals\n");
 			release_firmware(fw);
 			nouveau_grctx_fini(dev);
 			return -ENOMEM;

From 126b5440565a1fa0cb49fd30041525d5a9a848f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= <koriakin@0x04.net>
Date: Wed, 27 Jan 2010 14:03:18 +0000
Subject: [PATCH 360/640] drm/nouveau: Fix fbcon on mixed pre-NV50 + NV50
 multicard.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We used single shared fbops struct and patched it at fb init time with
pointers to the right variant. On mixed multicard, this meant that
it was either sending NV50-style commands to all cards, or NV04-style
commands to all cards.

Signed-off-by: Marcin Kościelnicki <koriakin@0x04.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 30 +++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_fbcon.h |  6 +++++
 drivers/gpu/drm/nouveau/nv04_fbcon.c    |  9 +++-----
 drivers/gpu/drm/nouveau/nv50_fbcon.c    |  9 +++-----
 4 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index eddadaccc285..ea879a2efef3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -107,6 +107,34 @@ static struct fb_ops nouveau_fbcon_ops = {
 	.fb_setcmap = drm_fb_helper_setcmap,
 };
 
+static struct fb_ops nv04_fbcon_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_setcolreg = drm_fb_helper_setcolreg,
+	.fb_fillrect = nv04_fbcon_fillrect,
+	.fb_copyarea = nv04_fbcon_copyarea,
+	.fb_imageblit = nv04_fbcon_imageblit,
+	.fb_sync = nouveau_fbcon_sync,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
+};
+
+static struct fb_ops nv50_fbcon_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_setcolreg = drm_fb_helper_setcolreg,
+	.fb_fillrect = nv50_fbcon_fillrect,
+	.fb_copyarea = nv50_fbcon_copyarea,
+	.fb_imageblit = nv50_fbcon_imageblit,
+	.fb_sync = nouveau_fbcon_sync,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
+};
+
 static void nouveau_fbcon_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
 				    u16 blue, int regno)
 {
@@ -324,9 +352,11 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width,
 		switch (dev_priv->card_type) {
 		case NV_50:
 			nv50_fbcon_accel_init(info);
+			info->fbops = &nv50_fbcon_ops;
 			break;
 		default:
 			nv04_fbcon_accel_init(info);
+			info->fbops = &nv04_fbcon_ops;
 			break;
 		};
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index 462e0b87b4bd..f9c34e1a8c11 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -40,7 +40,13 @@ int nouveau_fbcon_remove(struct drm_device *dev, struct drm_framebuffer *fb);
 void nouveau_fbcon_restore(void);
 void nouveau_fbcon_zfill(struct drm_device *dev);
 
+void nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
+void nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
+void nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
 int nv04_fbcon_accel_init(struct fb_info *info);
+void nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
+void nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
+void nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
 int nv50_fbcon_accel_init(struct fb_info *info);
 
 void nouveau_fbcon_gpu_lockup(struct fb_info *info);
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index d910873c1368..fd01caabd5c3 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -27,7 +27,7 @@
 #include "nouveau_dma.h"
 #include "nouveau_fbcon.h"
 
-static void
+void
 nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 {
 	struct nouveau_fbcon_par *par = info->par;
@@ -54,7 +54,7 @@ nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 	FIRE_RING(chan);
 }
 
-static void
+void
 nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbcon_par *par = info->par;
@@ -88,7 +88,7 @@ nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 	FIRE_RING(chan);
 }
 
-static void
+void
 nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbcon_par *par = info->par;
@@ -307,9 +307,6 @@ nv04_fbcon_accel_init(struct fb_info *info)
 
 	FIRE_RING(chan);
 
-	info->fbops->fb_fillrect = nv04_fbcon_fillrect;
-	info->fbops->fb_copyarea = nv04_fbcon_copyarea;
-	info->fbops->fb_imageblit = nv04_fbcon_imageblit;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index e4f279ee61cf..0f57cdf7ccb2 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -3,7 +3,7 @@
 #include "nouveau_dma.h"
 #include "nouveau_fbcon.h"
 
-static void
+void
 nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbcon_par *par = info->par;
@@ -46,7 +46,7 @@ nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 	FIRE_RING(chan);
 }
 
-static void
+void
 nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 {
 	struct nouveau_fbcon_par *par = info->par;
@@ -81,7 +81,7 @@ nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 	FIRE_RING(chan);
 }
 
-static void
+void
 nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbcon_par *par = info->par;
@@ -262,9 +262,6 @@ nv50_fbcon_accel_init(struct fb_info *info)
 	OUT_RING(chan, info->fix.smem_start - dev_priv->fb_phys +
 			 dev_priv->vm_vram_base);
 
-	info->fbops->fb_fillrect = nv50_fbcon_fillrect;
-	info->fbops->fb_copyarea = nv50_fbcon_copyarea;
-	info->fbops->fb_imageblit = nv50_fbcon_imageblit;
 	return 0;
 }
 

From 9967b9481d2387af4dbe5ceda7a209466ba004a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= <koriakin@0x04.net>
Date: Mon, 8 Feb 2010 00:20:17 +0000
Subject: [PATCH 361/640] drm/nouveau: Add proper vgaarb support.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Marcin Kościelnicki <koriakin@0x04.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index fcd7610817a1..a4851af5b05e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -310,6 +310,14 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 static unsigned int
 nouveau_vga_set_decode(void *priv, bool state)
 {
+	struct drm_device *dev = priv;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	if (dev_priv->chipset >= 0x40)
+		nv_wr32(dev, 0x88054, state);
+	else
+		nv_wr32(dev, 0x1854, state);
+
 	if (state)
 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;

From e235c1f3e132a243a1f81b3d95c99ee199b4d3f3 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 22 Jan 2010 13:17:28 +1000
Subject: [PATCH 362/640] drm/nv40: make INIT_COMPUTE_MEM a NOP, just like nv50

It appears we aren't required to do memory sizing ourselves on nv40
either.  NV40 init tables read a strap from PEXTDEV_BOOT_0 into a
CRTC register, and then later use that value to select a memory
configuration (written to PFB_CFG0, just like INIT_COMPUTE_MEM on
earlier cards) with INIT_IO_RESTRICT_PROG.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bios.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index d7f8d8b4a4b8..fb4793e65ff2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -1865,7 +1865,7 @@ init_compute_mem(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 
 	struct drm_nouveau_private *dev_priv = bios->dev->dev_private;
 
-	if (dev_priv->card_type >= NV_50)
+	if (dev_priv->card_type >= NV_40)
 		return 1;
 
 	/*

From 1ee7698fc3ec3d7949fa55e5154c8f5de8f1c3d9 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Feb 2010 10:08:34 +1000
Subject: [PATCH 363/640] drm/nouveau: make dp auxch xfer len check for reads
 only

Writes don't return a count, and adding the check broke native DP.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_dp.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index dd4937224220..f954ad93e81f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -502,12 +502,12 @@ nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
 			break;
 	}
 
-	if ((stat & NV50_AUXCH_STAT_COUNT) != data_nr) {
-		ret = -EREMOTEIO;
-		goto out;
-	}
-
 	if (cmd & 1) {
+		if ((stat & NV50_AUXCH_STAT_COUNT) != data_nr) {
+			ret = -EREMOTEIO;
+			goto out;
+		}
+
 		for (i = 0; i < 4; i++) {
 			data32[i] = nv_rd32(dev, NV50_AUXCH_DATA_IN(index, i));
 			NV_DEBUG_KMS(dev, "rd %d: 0x%08x\n", i, data32[i]);

From 9eb07c259207d048e3ee8be2a77b2a4680b1edd4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 9 Feb 2010 12:31:47 +1100
Subject: [PATCH 364/640] md: fix 'degraded' calculation when starting a
 reshape.

This code was written long ago when it was not possible to
reshape a degraded array.  Now it is so the current level of
degraded-ness needs to be taken in to account.  Also newly addded
devices should only reduce degradedness if they are deemed to be
in-sync.

In particular, if you convert a RAID5 to a RAID6, and increase the
number of devices at the same time, then the 5->6 conversion will
make the array degraded so the current code will produce a wrong
value for 'degraded' - "-1" to be precise.

If the reshape runs to completion end_reshape will calculate a correct
new value for 'degraded', but if a device fails during the reshape an
incorrect decision might be made based on the incorrect value of
"degraded".

This patch is suitable for 2.6.32-stable and if they are still open,
2.6.31-stable and 2.6.30-stable as well.

Cc: stable@kernel.org
Reported-by: Michael Evans <mjevans1983@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid5.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e84204eb12df..b5629c3e14fa 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5464,11 +5464,11 @@ static int raid5_start_reshape(mddev_t *mddev)
 		    !test_bit(Faulty, &rdev->flags)) {
 			if (raid5_add_disk(mddev, rdev) == 0) {
 				char nm[20];
-				if (rdev->raid_disk >= conf->previous_raid_disks)
+				if (rdev->raid_disk >= conf->previous_raid_disks) {
 					set_bit(In_sync, &rdev->flags);
-				else
+					added_devices++;
+				} else
 					rdev->recovery_offset = 0;
-				added_devices++;
 				sprintf(nm, "rd%d", rdev->raid_disk);
 				if (sysfs_create_link(&mddev->kobj,
 						      &rdev->kobj, nm))
@@ -5480,9 +5480,12 @@ static int raid5_start_reshape(mddev_t *mddev)
 				break;
 		}
 
+	/* When a reshape changes the number of devices, ->degraded
+	 * is measured against the large of the pre and post number of
+	 * devices.*/
 	if (mddev->delta_disks > 0) {
 		spin_lock_irqsave(&conf->device_lock, flags);
-		mddev->degraded = (conf->raid_disks - conf->previous_raid_disks)
+		mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
 			- added_devices;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}

From 2d171886b183982feb03446997c65e9e212b948e Mon Sep 17 00:00:00 2001
From: Divy Le Ray <divy@chelsio.com>
Date: Mon, 8 Feb 2010 22:37:24 -0800
Subject: [PATCH 365/640] cxgb3: fix GRO checksum check

Verify the HW checksum state for frames handed to GRO processing.

Signed-off-by: Divy Le Ray <divy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cxgb3/sge.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index bdbd14727e4b..318a018ca7c5 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -2079,6 +2079,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 			 struct sge_fl *fl, int len, int complete)
 {
 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
+	struct port_info *pi = netdev_priv(qs->netdev);
 	struct sk_buff *skb = NULL;
 	struct cpl_rx_pkt *cpl;
 	struct skb_frag_struct *rx_frag;
@@ -2116,11 +2117,18 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 
 	if (!nr_frags) {
 		offset = 2 + sizeof(struct cpl_rx_pkt);
-		qs->lro_va = sd->pg_chunk.va + 2;
-	}
-	len -= offset;
+		cpl = qs->lro_va = sd->pg_chunk.va + 2;
 
-	prefetch(qs->lro_va);
+		if ((pi->rx_offload & T3_RX_CSUM) &&
+		     cpl->csum_valid && cpl->csum == htons(0xffff)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
+		} else
+			skb->ip_summed = CHECKSUM_NONE;
+	} else
+		cpl = qs->lro_va;
+
+	len -= offset;
 
 	rx_frag += nr_frags;
 	rx_frag->page = sd->pg_chunk.page;
@@ -2136,12 +2144,8 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 		return;
 
 	skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	cpl = qs->lro_va;
 
 	if (unlikely(cpl->vlan_valid)) {
-		struct net_device *dev = qs->netdev;
-		struct port_info *pi = netdev_priv(dev);
 		struct vlan_group *grp = pi->vlan_grp;
 
 		if (likely(grp != NULL)) {

From d4ae20b3799e0b6fa0d832a645a422da9f239868 Mon Sep 17 00:00:00 2001
From: Jan Luebbe <jluebbe@debian.org>
Date: Mon, 8 Feb 2010 22:41:44 -0800
Subject: [PATCH 366/640] net/sched: Fix module name in Kconfig

The action modules have been prefixed with 'act_', but the Kconfig
description was not changed.

Signed-off-by: Jan Luebbe <jluebbe@debian.org>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 929218a47620..21f9c7678aa3 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -433,7 +433,7 @@ config NET_ACT_POLICE
 	  module.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called police.
+	  module will be called act_police.
 
 config NET_ACT_GACT
         tristate "Generic actions"
@@ -443,7 +443,7 @@ config NET_ACT_GACT
 	  accepting packets.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called gact.
+	  module will be called act_gact.
 
 config GACT_PROB
         bool "Probability support"
@@ -459,7 +459,7 @@ config NET_ACT_MIRRED
 	  other devices.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called mirred.
+	  module will be called act_mirred.
 
 config NET_ACT_IPT
         tristate "IPtables targets"
@@ -469,7 +469,7 @@ config NET_ACT_IPT
 	  classification.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called ipt.
+	  module will be called act_ipt.
 
 config NET_ACT_NAT
         tristate "Stateless NAT"
@@ -479,7 +479,7 @@ config NET_ACT_NAT
 	  netfilter for NAT unless you know what you are doing.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called nat.
+	  module will be called act_nat.
 
 config NET_ACT_PEDIT
         tristate "Packet Editing"
@@ -488,7 +488,7 @@ config NET_ACT_PEDIT
 	  Say Y here if you want to mangle the content of packets.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called pedit.
+	  module will be called act_pedit.
 
 config NET_ACT_SIMP
         tristate "Simple Example (Debug)"
@@ -502,7 +502,7 @@ config NET_ACT_SIMP
 	  If unsure, say N.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called simple.
+	  module will be called act_simple.
 
 config NET_ACT_SKBEDIT
         tristate "SKB Editing"
@@ -513,7 +513,7 @@ config NET_ACT_SKBEDIT
 	  If unsure, say N.
 
 	  To compile this code as a module, choose M here: the
-	  module will be called skbedit.
+	  module will be called act_skbedit.
 
 config NET_CLS_IND
 	bool "Incoming device classification"

From 3af26f58d1920d904da87c3897d23070fe2266b4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 8 Feb 2010 22:42:40 -0800
Subject: [PATCH 367/640] MAINTAINERS: networking drivers - Add git net-next
 tree

During the rc period, patches that are not bugfixes
should be done using the net-next tree.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 03f38c18f323..602022d2c7a5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3836,6 +3836,7 @@ NETWORKING DRIVERS
 L:	netdev@vger.kernel.org
 W:	http://www.linuxfoundation.org/en/Net
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git
 S:	Odd Fixes
 F:	drivers/net/
 F:	include/linux/if_*

From bcf4d812e66ee95f762b38063d654fd1ff7156b0 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 8 Feb 2010 22:44:18 -0800
Subject: [PATCH 368/640] drivers/net: Correct NULL test

Test the value that was just allocated rather than the previously tested one.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@r@
expression *x;
expression e;
identifier l;
@@

if (x == NULL || ...) {
    ... when forall
    return ...; }
... when != goto l;
    when != x = e
    when != &x
*x == NULL
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ax88796.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ax88796.c b/drivers/net/ax88796.c
index 62d9c9cc5671..1dd4403247ca 100644
--- a/drivers/net/ax88796.c
+++ b/drivers/net/ax88796.c
@@ -921,7 +921,7 @@ static int ax_probe(struct platform_device *pdev)
  		size = (res->end - res->start) + 1;
 
 		ax->mem2 = request_mem_region(res->start, size, pdev->name);
-		if (ax->mem == NULL) {
+		if (ax->mem2 == NULL) {
 			dev_err(&pdev->dev, "cannot reserve registers\n");
 			ret = -ENXIO;
 			goto exit_mem1;

From 4c52228d1b83ef67d4fa381c2ade70122c9e3c34 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@de.ibm.com>
Date: Tue, 9 Feb 2010 09:46:07 +0100
Subject: [PATCH 369/640] [S390] qdio: continue polling for buffer state ERROR

Inbound traffic handling may hang if next buffer to check is in
state ERROR, polling is stopped and the final check for further
available inbound buffers disregards buffers in state ERROR.
This patch includes state ERROR when checking availability of
more inbound buffers.

Cc: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 999fe80c4051..5781c5d10cff 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -531,7 +531,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q)
 	qdio_siga_sync_q(q);
 	get_buf_state(q, q->first_to_check, &state, 0);
 
-	if (state == SLSB_P_INPUT_PRIMED)
+	if (state == SLSB_P_INPUT_PRIMED || state == SLSB_P_INPUT_ERROR)
 		/* more work coming */
 		return 0;
 

From 959153d34544b7237bad263e73a5abdf94fc7722 Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Tue, 9 Feb 2010 09:46:08 +0100
Subject: [PATCH 370/640] [S390] qdio: prevent call trace if CHPID is offline

If a CHPID is offline during a device shutdown the ccw_device_halt|clear
may fail and the qdio device stays in state STOPPED until the shutdown is
finished. If an interrupt occurs before the device is set to INACTIVE
the STOPPED state triggers a WARN_ON in the interrupt handler.
Prevent this WARN_ON by catching the STOPPED state in the interrupt
handler.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 5781c5d10cff..62b654af9237 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -960,6 +960,8 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
 			qdio_handle_activate_check(cdev, intparm, cstat,
 						   dstat);
 		break;
+	case QDIO_IRQ_STATE_STOPPED:
+		break;
 	default:
 		WARN_ON(1);
 	}

From 7717aefff3290c61e5f9e6aa39e9e1dc63cd4e81 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 9 Feb 2010 09:46:09 +0100
Subject: [PATCH 371/640] [S390] Fix struct _lowcore layout.

Offsets and sizes are wrong for 32 bit.
Got broken with 866ba284 "[S390] cleanup lowcore.h".

Reported-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/lowcore.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index f2ef4b619ce1..c25dfac7dd76 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -293,12 +293,12 @@ struct _lowcore
 	__u64	clock_comparator;		/* 0x02d0 */
 	__u32	machine_flags;			/* 0x02d8 */
 	__u32	ftrace_func;			/* 0x02dc */
-	__u8	pad_0x02f0[0x0300-0x02f0];	/* 0x02f0 */
+	__u8	pad_0x02e0[0x0300-0x02e0];	/* 0x02e0 */
 
 	/* Interrupt response block */
 	__u8	irb[64];			/* 0x0300 */
 
-	__u8	pad_0x0400[0x0e00-0x0400];	/* 0x0400 */
+	__u8	pad_0x0340[0x0e00-0x0340];	/* 0x0340 */
 
 	/*
 	 * 0xe00 contains the address of the IPL Parameter Information

From c93d89f3dbf0202bf19c07960ca8602b48c2f9a0 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 27 Jan 2010 19:13:40 +0800
Subject: [PATCH 372/640] Export the symbol of getboottime and
 mmonotonic_to_bootbased

Export getboottime and monotonic_to_bootbased in order to let them
could be used by following patch.

Cc: stable@kernel.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 kernel/time/timekeeping.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7faaa32fbf4f..e2ab064c6d41 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -880,6 +880,7 @@ void getboottime(struct timespec *ts)
 
 	set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
 }
+EXPORT_SYMBOL_GPL(getboottime);
 
 /**
  * monotonic_to_bootbased - Convert the monotonic time to boot based.
@@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts)
 {
 	*ts = timespec_add_safe(*ts, total_sleep_time);
 }
+EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
 unsigned long get_seconds(void)
 {

From 923de3cf5bf12049628019010e36623fca5ef6d1 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 27 Jan 2010 19:13:49 +0800
Subject: [PATCH 373/640] kvmclock: count total_sleep_time when updating guest
 clock

Current kvm wallclock does not consider the total_sleep_time which could cause
wrong wallclock in guest after host suspend/resume. This patch solve
this issue by counting total_sleep_time to get the correct host boot time.

Cc: stable@kernel.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Glauber Costa <glommer@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ddcad452add..a1e1bc9d412d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 {
 	static int version;
 	struct pvclock_wall_clock wc;
-	struct timespec now, sys, boot;
+	struct timespec boot;
 
 	if (!wall_clock)
 		return;
@@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 	 * wall clock specified here.  guest system time equals host
 	 * system time for us, thus we must fill in host boot time here.
 	 */
-	now = current_kernel_time();
-	ktime_get_ts(&sys);
-	boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+	getboottime(&boot);
 
 	wc.sec = boot.tv_sec;
 	wc.nsec = boot.tv_nsec;
@@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	local_irq_save(flags);
 	kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
 	ktime_get_ts(&ts);
+	monotonic_to_bootbased(&ts);
 	local_irq_restore(flags);
 
 	/* With all the info we got, fill in the values */

From ee73f656a604d5aa9df86a97102e4e462dd79924 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 29 Jan 2010 17:28:41 -0200
Subject: [PATCH 374/640] KVM: PIT: control word is write-only

PIT control word (address 0x43) is write-only, reads are undefined.

Cc: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8254.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 296aba49472a..15578f180e59 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -467,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this,
 		return -EOPNOTSUPP;
 
 	addr &= KVM_PIT_CHANNEL_MASK;
+	if (addr == 3)
+		return 0;
+
 	s = &pit_state->channels[addr];
 
 	mutex_lock(&pit_state->lock);

From 6c8afef551fef87a3bf24f8a74c69a7f2f72fc82 Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Tue, 9 Feb 2010 10:07:00 +0530
Subject: [PATCH 375/640] ath9k: Fix sequence numbers for PAE frames

Currently, PAE frames are not assigned proper sequence numbers.
Since sending PAE frames as part of aggregates breaks
crupto with several APs, they are sent as normal MPDUs.
Fix the seqeuence number issue by updating the frame with the
internal sequence number.

Tested-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/xmit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index fa12b9060b0b..29bf33692f71 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -1615,7 +1615,7 @@ static int ath_tx_setup_buffer(struct ieee80211_hw *hw, struct ath_buf *bf,
 		bf->bf_frmlen -= padsize;
 	}
 
-	if (conf_is_ht(&hw->conf) && !is_pae(skb))
+	if (conf_is_ht(&hw->conf))
 		bf->bf_state.bf_type |= BUF_HT;
 
 	bf->bf_flags = setup_tx_flags(sc, skb, txctl->txq);
@@ -1701,7 +1701,7 @@ static void ath_tx_start_dma(struct ath_softc *sc, struct ath_buf *bf,
 			goto tx_done;
 		}
 
-		if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) {
+		if ((tx_info->flags & IEEE80211_TX_CTL_AMPDU) && !is_pae(skb)) {
 			/*
 			 * Try aggregation if it's a unicast data frame
 			 * and the destination is HT capable.

From 33a5d083e786f0c3fb4efedb59b0e8e3de39963b Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 9 Feb 2010 12:07:41 +0100
Subject: [PATCH 376/640] iwmc3200wifi: Test of wrong pointer after kzalloc in
 iwm_mlme_update_bss_table()

The wrong pointer was tested.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c
index 3db3d8b07491..64d16fe37f9a 100644
--- a/drivers/net/wireless/iwmc3200wifi/rx.c
+++ b/drivers/net/wireless/iwmc3200wifi/rx.c
@@ -794,7 +794,7 @@ static int iwm_mlme_update_bss_table(struct iwm_priv *iwm, u8 *buf,
 	}
 
 	bss->bss = kzalloc(bss_len, GFP_KERNEL);
-	if (!bss) {
+	if (!bss->bss) {
 		kfree(bss);
 		IWM_ERR(iwm, "Couldn't allocate bss\n");
 		return -ENOMEM;

From 2c1740098c708b465e87637b237feb2fd98f129a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 8 Feb 2010 09:32:27 -0500
Subject: [PATCH 377/640] NFS: Fix a bug in nfs_fscache_release_page()

Not having an fscache cookie is perfectly valid if the user didn't mount
with the fscache option.

This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=15234

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: stable@kernel.org
---
 fs/nfs/fscache.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index fa588006588d..237874f1af23 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
  */
 int nfs_fscache_release_page(struct page *page, gfp_t gfp)
 {
-	struct nfs_inode *nfsi = NFS_I(page->mapping->host);
-	struct fscache_cookie *cookie = nfsi->fscache;
-
-	BUG_ON(!cookie);
-
 	if (PageFsCache(page)) {
+		struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+		struct fscache_cookie *cookie = nfsi->fscache;
+
+		BUG_ON(!cookie);
 		dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
 			 cookie, page, nfsi);
 

From 7549ad5f9b6eda49bbac4b14c5b8f37bf464f922 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 8 Feb 2010 09:32:34 -0500
Subject: [PATCH 378/640] NFS: Remove a redundant check for PageFsCache in
 nfs_migrate_page()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: David Howells <dhowells@redhat.com>
---
 fs/nfs/write.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b54b8bb101f..d63d964a0392 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1598,8 +1598,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 	struct nfs_page *req;
 	int ret;
 
-	if (PageFsCache(page))
-		nfs_fscache_release_page(page, GFP_KERNEL);
+	nfs_fscache_release_page(page, GFP_KERNEL);
 
 	req = nfs_find_and_lock_request(page);
 	ret = PTR_ERR(req);

From fdcb45777a3d1689c5541e1f85ee3ebbd197d2c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 8 Feb 2010 09:32:40 -0500
Subject: [PATCH 379/640] NFS: Fix the mapping of the NFSERR_SERVERFAULT error

It was recently pointed out that the NFSERR_SERVERFAULT error, which is
designed to inform the user of a serious internal error on the server, was
being mapped to an error value that is internal to the kernel.

This patch maps it to the error EREMOTEIO, which is exported to userland
through errno.h.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/mount_clnt.c | 2 +-
 fs/nfs/nfs2xdr.c    | 2 +-
 fs/nfs/nfs4xdr.c    | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0adefc40cc89..59047f8d7d72 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -120,7 +120,7 @@ static struct {
 	{ .status = MNT3ERR_INVAL,		.errno = -EINVAL,	},
 	{ .status = MNT3ERR_NAMETOOLONG,	.errno = -ENAMETOOLONG,	},
 	{ .status = MNT3ERR_NOTSUPP,		.errno = -ENOTSUPP,	},
-	{ .status = MNT3ERR_SERVERFAULT,	.errno = -ESERVERFAULT,	},
+	{ .status = MNT3ERR_SERVERFAULT,	.errno = -EREMOTEIO,	},
 };
 
 struct mountres {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5e078b222b4e..7bc2da8efd4a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -699,7 +699,7 @@ static struct {
 	{ NFSERR_BAD_COOKIE,	-EBADCOOKIE	},
 	{ NFSERR_NOTSUPP,	-ENOTSUPP	},
 	{ NFSERR_TOOSMALL,	-ETOOSMALL	},
-	{ NFSERR_SERVERFAULT,	-ESERVERFAULT	},
+	{ NFSERR_SERVERFAULT,	-EREMOTEIO	},
 	{ NFSERR_BADTYPE,	-EBADTYPE	},
 	{ NFSERR_JUKEBOX,	-EJUKEBOX	},
 	{ -1,			-EIO		}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e437fd6a819f..5cd5184b56db 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4631,7 +4631,7 @@ static int decode_sequence(struct xdr_stream *xdr,
 	 * If the server returns different values for sessionID, slotID or
 	 * sequence number, the server is looney tunes.
 	 */
-	status = -ESERVERFAULT;
+	status = -EREMOTEIO;
 
 	if (memcmp(id.data, res->sr_session->sess_id.data,
 		   NFS4_MAX_SESSIONID_LEN)) {
@@ -5774,7 +5774,7 @@ static struct {
 	{ NFS4ERR_BAD_COOKIE,	-EBADCOOKIE	},
 	{ NFS4ERR_NOTSUPP,	-ENOTSUPP	},
 	{ NFS4ERR_TOOSMALL,	-ETOOSMALL	},
-	{ NFS4ERR_SERVERFAULT,	-ESERVERFAULT	},
+	{ NFS4ERR_SERVERFAULT,	-EREMOTEIO	},
 	{ NFS4ERR_BADTYPE,	-EBADTYPE	},
 	{ NFS4ERR_LOCKED,	-EAGAIN		},
 	{ NFS4ERR_SYMLINK,	-ELOOP		},
@@ -5801,7 +5801,7 @@ nfs4_stat_to_errno(int stat)
 	}
 	if (stat <= 10000 || stat > 10100) {
 		/* The server is looney tunes. */
-		return -ESERVERFAULT;
+		return -EREMOTEIO;
 	}
 	/* If we cannot translate the error, the recovery routines should
 	 * handle it.

From fed08d036f2aabd8d0c684439de37f8ebec2bbc2 Mon Sep 17 00:00:00 2001
From: Jody Bruchon <jody@nctritech.com>
Date: Sat, 6 Feb 2010 10:46:26 -0500
Subject: [PATCH 380/640] ALSA: hda-intel: Avoid divide by zero crash

On my AMD780V chipset, hda_intel.c can crash the kernel with a divide by
zero
for as-yet unknown reasons. A simple check for zero prevents it, though
the problem that causes it remains. Since the workaround is harmless and
won't affect anyone except victims of this bug, it should be safe;
moreover,
because this crash can be triggered by a user-mode application, there are
denial of service implications on the systems affected by the bug without
the patch.

Signed-off-by: Jody Bruchon <jody@nctritech.com>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index b8faa6dc5abe..e767c3f395ab 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1893,6 +1893,12 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev)
 
 	if (!bdl_pos_adj[chip->dev_index])
 		return 1; /* no delayed ack */
+	if (azx_dev->period_bytes == 0) {
+		printk(KERN_WARNING
+		       "hda-intel: Divide by zero was avoided "
+		       "in azx_dev->period_bytes.\n");
+		return 0;
+	}
 	if (pos % azx_dev->period_bytes > azx_dev->period_bytes / 2)
 		return 0; /* NG - it's below the period boundary */
 	return 1; /* OK, it's fine */

From 39c9bfb453b748ce220ceefacbe2a5c19fabf67b Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Feb 2010 10:22:29 +1000
Subject: [PATCH 381/640] drm/nv50: prevent multiple init tables being parsed
 at the same time

With DVI and DP plugged, the DVI clock change interrupts being run can
cause DP link training to fail.  This adds a spinlock around init table
parsing to prevent this.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bios.c | 17 +++++++++--------
 drivers/gpu/drm/nouveau/nouveau_bios.h |  2 ++
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index fb4793e65ff2..2cd0fad17dac 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -3765,7 +3765,6 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct init_exec iexec = {true, false};
 	struct nvbios *bios = &dev_priv->VBIOS;
 	uint8_t *table = &bios->data[bios->display.script_table_ptr];
 	uint8_t *otable = NULL;
@@ -3845,8 +3844,6 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 	}
 
-	bios->display.output = dcbent;
-
 	if (pxclk == 0) {
 		script = ROM16(otable[6]);
 		if (!script) {
@@ -3855,7 +3852,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_TRACE(dev, "0x%04X: parsing output script 0\n", script);
-		parse_init_table(bios, script, &iexec);
+		nouveau_bios_run_init_table(dev, script, dcbent);
 	} else
 	if (pxclk == -1) {
 		script = ROM16(otable[8]);
@@ -3865,7 +3862,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_TRACE(dev, "0x%04X: parsing output script 1\n", script);
-		parse_init_table(bios, script, &iexec);
+		nouveau_bios_run_init_table(dev, script, dcbent);
 	} else
 	if (pxclk == -2) {
 		if (table[4] >= 12)
@@ -3878,7 +3875,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_TRACE(dev, "0x%04X: parsing output script 2\n", script);
-		parse_init_table(bios, script, &iexec);
+		nouveau_bios_run_init_table(dev, script, dcbent);
 	} else
 	if (pxclk > 0) {
 		script = ROM16(otable[table[4] + i*6 + 2]);
@@ -3890,7 +3887,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_TRACE(dev, "0x%04X: parsing clock script 0\n", script);
-		parse_init_table(bios, script, &iexec);
+		nouveau_bios_run_init_table(dev, script, dcbent);
 	} else
 	if (pxclk < 0) {
 		script = ROM16(otable[table[4] + i*6 + 4]);
@@ -3902,7 +3899,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_TRACE(dev, "0x%04X: parsing clock script 1\n", script);
-		parse_init_table(bios, script, &iexec);
+		nouveau_bios_run_init_table(dev, script, dcbent);
 	}
 
 	return 0;
@@ -5864,10 +5861,13 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nvbios *bios = &dev_priv->VBIOS;
 	struct init_exec iexec = { true, false };
+	unsigned long flags;
 
+	spin_lock_irqsave(&bios->lock, flags);
 	bios->display.output = dcbent;
 	parse_init_table(bios, table, &iexec);
 	bios->display.output = NULL;
+	spin_unlock_irqrestore(&bios->lock, flags);
 }
 
 static bool NVInitVBIOS(struct drm_device *dev)
@@ -5876,6 +5876,7 @@ static bool NVInitVBIOS(struct drm_device *dev)
 	struct nvbios *bios = &dev_priv->VBIOS;
 
 	memset(bios, 0, sizeof(struct nvbios));
+	spin_lock_init(&bios->lock);
 	bios->dev = dev;
 
 	if (!NVShadowVBIOS(dev, bios->data))
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 058e98c76d89..68446fd4146b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -205,6 +205,8 @@ struct nvbios {
 	struct drm_device *dev;
 	struct nouveau_bios_info pub;
 
+	spinlock_t lock;
+
 	uint8_t data[NV_PROM_SIZE];
 	unsigned int length;
 	bool execute;

From 5025b43120b629bdf11087a3c652dc9cbe172191 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Feb 2010 12:30:35 +1000
Subject: [PATCH 382/640] drm/nv50: disregard dac outputs in nv50_sor_dpms()

Fixes DVI+VGA on my 9400, and likely a lot of other configurations that
got broken by the previos DVI-over-DP fix.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv50_sor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
index ecf1936b8224..c2fff543b06f 100644
--- a/drivers/gpu/drm/nouveau/nv50_sor.c
+++ b/drivers/gpu/drm/nouveau/nv50_sor.c
@@ -101,6 +101,7 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode)
 		struct nouveau_encoder *nvenc = nouveau_encoder(enc);
 
 		if (nvenc == nv_encoder ||
+		    nvenc->disconnect != nv50_sor_disconnect ||
 		    nvenc->dcb->or != nv_encoder->dcb->or)
 			continue;
 

From eb1dba0ebaa5b7642b323fac148f9947522a48a8 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sun, 27 Dec 2009 12:22:07 +0100
Subject: [PATCH 383/640] drm/nv50: align size of buffer object to the right
 boundaries.

- In the current situation the padding that is added is dangerous to write
  to, userspace could potentially overwrite parts of another bo.
- Depth and stencil buffers are supposed to be large enough in general so
  the waste of memory should be acceptable.
- Alternatives are hiding the padding from users or splitting vram into 2
  zones.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index db0ed4c13f98..028719fddf76 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -65,8 +65,10 @@ nouveau_bo_fixup_align(struct drm_device *dev,
 
 	/*
 	 * Some of the tile_flags have a periodic structure of N*4096 bytes,
-	 * align to to that as well as the page size. Overallocate memory to
-	 * avoid corruption of other buffer objects.
+	 * align to to that as well as the page size. Align the size to the
+	 * appropriate boundaries. This does imply that sizes are rounded up
+	 * 3-7 pages, so be aware of this and do not waste memory by allocating
+	 * many small buffers.
 	 */
 	if (dev_priv->card_type == NV_50) {
 		uint32_t block_size = nouveau_mem_fb_amount(dev) >> 15;
@@ -77,22 +79,20 @@ nouveau_bo_fixup_align(struct drm_device *dev,
 		case 0x2800:
 		case 0x4800:
 		case 0x7a00:
-			*size = roundup(*size, block_size);
 			if (is_power_of_2(block_size)) {
-				*size += 3 * block_size;
 				for (i = 1; i < 10; i++) {
 					*align = 12 * i * block_size;
 					if (!(*align % 65536))
 						break;
 				}
 			} else {
-				*size += 6 * block_size;
 				for (i = 1; i < 10; i++) {
 					*align = 8 * i * block_size;
 					if (!(*align % 65536))
 						break;
 				}
 			}
+			*size = roundup(*size, *align);
 			break;
 		default:
 			break;

From a51a3bf50d41708388f51ce63d965c0e77726eab Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Mon, 1 Feb 2010 18:32:09 +0100
Subject: [PATCH 384/640] drm/nv50: avoid unloading pgraph context when ctxprog
 is running

- We need to disable pgraph fifo access before checking the current channel,
  otherwise we could still hit a running ctxprog.
- The writes to 0x400500 are already handled by pgraph->fifo_access and are
  therefore redundant, moreover pgraph fifo access should not be reenabled
  before current context is set as invalid. So remove them altogether.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_channel.c |  7 +++----
 drivers/gpu/drm/nouveau/nv50_graph.c      | 10 +++++++---
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 343d718a9667..2281f99da7fc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -278,12 +278,11 @@ nouveau_channel_free(struct nouveau_channel *chan)
 	/* Ensure the channel is no longer active on the GPU */
 	pfifo->reassign(dev, false);
 
-	if (pgraph->channel(dev) == chan) {
-		pgraph->fifo_access(dev, false);
+	pgraph->fifo_access(dev, false);
+	if (pgraph->channel(dev) == chan)
 		pgraph->unload_context(dev);
-		pgraph->fifo_access(dev, true);
-	}
 	pgraph->destroy_context(chan);
+	pgraph->fifo_access(dev, true);
 
 	if (pfifo->channel_id(dev) == chan->id) {
 		pfifo->disable(dev);
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 20319e59d368..6d504801b514 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -165,6 +165,12 @@ nv50_graph_channel(struct drm_device *dev)
 	uint32_t inst;
 	int i;
 
+	/* Be sure we're not in the middle of a context switch or bad things
+	 * will happen, such as unloading the wrong pgraph context.
+	 */
+	if (!nv_wait(0x400300, 0x00000001, 0x00000000))
+		NV_ERROR(dev, "Ctxprog is still running\n");
+
 	inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
 	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
 		return NULL;
@@ -275,7 +281,7 @@ nv50_graph_load_context(struct nouveau_channel *chan)
 int
 nv50_graph_unload_context(struct drm_device *dev)
 {
-	uint32_t inst, fifo = nv_rd32(dev, 0x400500);
+	uint32_t inst;
 
 	inst  = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
 	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
@@ -283,12 +289,10 @@ nv50_graph_unload_context(struct drm_device *dev)
 	inst &= NV50_PGRAPH_CTXCTL_CUR_INSTANCE;
 
 	nouveau_wait_for_idle(dev);
-	nv_wr32(dev, 0x400500, fifo & ~1);
 	nv_wr32(dev, 0x400784, inst);
 	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x20);
 	nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 0x01);
 	nouveau_wait_for_idle(dev);
-	nv_wr32(dev, 0x400500, fifo);
 
 	nv_wr32(dev, NV50_PGRAPH_CTXCTL_CUR, inst);
 	return 0;

From a87ff62a80a6a65fc664cd410061910b8c52b896 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Mon, 1 Feb 2010 18:47:52 +0100
Subject: [PATCH 385/640] drm/nv50: delete ramfc object after disabling fifo,
 not before

ramfc is zero'ed upon destruction, so it's safer to do things in the right
order.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv50_fifo.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index 32b244bcb482..204a79ff10f4 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -317,17 +317,20 @@ void
 nv50_fifo_destroy_context(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
+	struct nouveau_gpuobj_ref *ramfc = chan->ramfc;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	nouveau_gpuobj_ref_del(dev, &chan->ramfc);
-	nouveau_gpuobj_ref_del(dev, &chan->cache);
-
+	/* This will ensure the channel is seen as disabled. */
+	chan->ramfc = NULL;
 	nv50_fifo_channel_disable(dev, chan->id, false);
 
 	/* Dummy channel, also used on ch 127 */
 	if (chan->id == 0)
 		nv50_fifo_channel_disable(dev, 127, false);
+
+	nouveau_gpuobj_ref_del(dev, &ramfc);
+	nouveau_gpuobj_ref_del(dev, &chan->cache);
 }
 
 int

From b1d37aa0aa43c5bf857364093ab2191acd37f2ec Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Wed, 20 Jan 2010 19:54:34 +0100
Subject: [PATCH 386/640] drm/nv50: make the pgraph irq handler loop like the
 pre-nv50 version

Unset the bit that indicates that a ctxprog can continue at the end.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_irq.c | 133 ++++++++++++++------------
 1 file changed, 73 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
index baa9b3e0b66b..447f9f69d6b1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
@@ -580,86 +580,99 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
 static void
 nv50_pgraph_irq_handler(struct drm_device *dev)
 {
-	uint32_t status, nsource;
+	uint32_t status;
 
-	status = nv_rd32(dev, NV03_PGRAPH_INTR);
-	nsource = nv_rd32(dev, NV03_PGRAPH_NSOURCE);
+	while ((status = nv_rd32(dev, NV03_PGRAPH_INTR))) {
+		uint32_t nsource = nv_rd32(dev, NV03_PGRAPH_NSOURCE);
 
-	if (status & 0x00000001) {
-		nouveau_pgraph_intr_notify(dev, nsource);
-		status &= ~0x00000001;
-		nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000001);
-	}
+		if (status & 0x00000001) {
+			nouveau_pgraph_intr_notify(dev, nsource);
+			status &= ~0x00000001;
+			nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000001);
+		}
 
-	if (status & 0x00000010) {
-		nouveau_pgraph_intr_error(dev, nsource |
-					  NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD);
+		if (status & 0x00000010) {
+			nouveau_pgraph_intr_error(dev, nsource |
+					NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD);
 
-		status &= ~0x00000010;
-		nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000010);
-	}
+			status &= ~0x00000010;
+			nv_wr32(dev, NV03_PGRAPH_INTR, 0x00000010);
+		}
 
-	if (status & 0x00001000) {
-		nv_wr32(dev, 0x400500, 0x00000000);
-		nv_wr32(dev, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
-		nv_wr32(dev, NV40_PGRAPH_INTR_EN, nv_rd32(dev,
-			NV40_PGRAPH_INTR_EN) & ~NV_PGRAPH_INTR_CONTEXT_SWITCH);
-		nv_wr32(dev, 0x400500, 0x00010001);
+		if (status & 0x00001000) {
+			nv_wr32(dev, 0x400500, 0x00000000);
+			nv_wr32(dev, NV03_PGRAPH_INTR,
+				NV_PGRAPH_INTR_CONTEXT_SWITCH);
+			nv_wr32(dev, NV40_PGRAPH_INTR_EN, nv_rd32(dev,
+				NV40_PGRAPH_INTR_EN) &
+				~NV_PGRAPH_INTR_CONTEXT_SWITCH);
+			nv_wr32(dev, 0x400500, 0x00010001);
 
-		nv50_graph_context_switch(dev);
+			nv50_graph_context_switch(dev);
 
-		status &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
-	}
+			status &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
+		}
 
-	if (status & 0x00100000) {
-		nouveau_pgraph_intr_error(dev, nsource |
-					  NV03_PGRAPH_NSOURCE_DATA_ERROR);
+		if (status & 0x00100000) {
+			nouveau_pgraph_intr_error(dev, nsource |
+					NV03_PGRAPH_NSOURCE_DATA_ERROR);
 
-		status &= ~0x00100000;
-		nv_wr32(dev, NV03_PGRAPH_INTR, 0x00100000);
-	}
+			status &= ~0x00100000;
+			nv_wr32(dev, NV03_PGRAPH_INTR, 0x00100000);
+		}
 
-	if (status & 0x00200000) {
-		int r;
+		if (status & 0x00200000) {
+			int r;
 
-		nouveau_pgraph_intr_error(dev, nsource |
-					  NV03_PGRAPH_NSOURCE_PROTECTION_ERROR);
+			nouveau_pgraph_intr_error(dev, nsource |
+					NV03_PGRAPH_NSOURCE_PROTECTION_ERROR);
 
-		NV_ERROR(dev, "magic set 1:\n");
-		for (r = 0x408900; r <= 0x408910; r += 4)
-			NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r));
-		nv_wr32(dev, 0x408900, nv_rd32(dev, 0x408904) | 0xc0000000);
-		for (r = 0x408e08; r <= 0x408e24; r += 4)
-			NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r));
-		nv_wr32(dev, 0x408e08, nv_rd32(dev, 0x408e08) | 0xc0000000);
+			NV_ERROR(dev, "magic set 1:\n");
+			for (r = 0x408900; r <= 0x408910; r += 4)
+				NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r,
+					nv_rd32(dev, r));
+			nv_wr32(dev, 0x408900,
+				nv_rd32(dev, 0x408904) | 0xc0000000);
+			for (r = 0x408e08; r <= 0x408e24; r += 4)
+				NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r,
+							nv_rd32(dev, r));
+			nv_wr32(dev, 0x408e08,
+				nv_rd32(dev, 0x408e08) | 0xc0000000);
 
-		NV_ERROR(dev, "magic set 2:\n");
-		for (r = 0x409900; r <= 0x409910; r += 4)
-			NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r));
-		nv_wr32(dev, 0x409900, nv_rd32(dev, 0x409904) | 0xc0000000);
-		for (r = 0x409e08; r <= 0x409e24; r += 4)
-			NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r, nv_rd32(dev, r));
-		nv_wr32(dev, 0x409e08, nv_rd32(dev, 0x409e08) | 0xc0000000);
+			NV_ERROR(dev, "magic set 2:\n");
+			for (r = 0x409900; r <= 0x409910; r += 4)
+				NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r,
+					nv_rd32(dev, r));
+			nv_wr32(dev, 0x409900,
+				nv_rd32(dev, 0x409904) | 0xc0000000);
+			for (r = 0x409e08; r <= 0x409e24; r += 4)
+				NV_ERROR(dev, "\t0x%08x: 0x%08x\n", r,
+					nv_rd32(dev, r));
+			nv_wr32(dev, 0x409e08,
+				nv_rd32(dev, 0x409e08) | 0xc0000000);
 
-		status &= ~0x00200000;
-		nv_wr32(dev, NV03_PGRAPH_NSOURCE, nsource);
-		nv_wr32(dev, NV03_PGRAPH_INTR, 0x00200000);
-	}
+			status &= ~0x00200000;
+			nv_wr32(dev, NV03_PGRAPH_NSOURCE, nsource);
+			nv_wr32(dev, NV03_PGRAPH_INTR, 0x00200000);
+		}
 
-	if (status) {
-		NV_INFO(dev, "Unhandled PGRAPH_INTR - 0x%08x\n", status);
-		nv_wr32(dev, NV03_PGRAPH_INTR, status);
-	}
+		if (status) {
+			NV_INFO(dev, "Unhandled PGRAPH_INTR - 0x%08x\n",
+				status);
+			nv_wr32(dev, NV03_PGRAPH_INTR, status);
+		}
 
-	{
-		const int isb = (1 << 16) | (1 << 0);
+		{
+			const int isb = (1 << 16) | (1 << 0);
 
-		if ((nv_rd32(dev, 0x400500) & isb) != isb)
-			nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) | isb);
-		nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) & ~(1 << 31));
+			if ((nv_rd32(dev, 0x400500) & isb) != isb)
+				nv_wr32(dev, 0x400500,
+					nv_rd32(dev, 0x400500) | isb);
+		}
 	}
 
 	nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PGRAPH_PENDING);
+	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) & ~(1 << 31));
 }
 
 static void

From cf9db6c41f739a294286847aab1e85f39aef1781 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 8 Feb 2010 20:35:02 -0600
Subject: [PATCH 387/640] x86-32: Make AT_VECTOR_SIZE_ARCH=2

Both x86-32 and x86-64 with 32-bit compat use ARCH_DLINFO_IA32,
which defines two saved_auxv entries.  But system.h only defines
AT_VECTOR_SIZE_ARCH as 2 for CONFIG_IA32_EMULATION, not for
CONFIG_X86_32.  Fix that.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
LKML-Reference: <20100209023502.GA15408@us.ibm.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/system.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index ecb544e65382..e04740f7a0bb 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -11,9 +11,9 @@
 #include <linux/irqflags.h>
 
 /* entries in ARCH_DLINFO: */
-#ifdef CONFIG_IA32_EMULATION
+#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
 # define AT_VECTOR_SIZE_ARCH 2
-#else
+#else /* else it's non-compat x86-64 */
 # define AT_VECTOR_SIZE_ARCH 1
 #endif
 

From f036d9f3985a529a81e582f68aa984eb7b20d54d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Feb 2010 16:18:40 -0800
Subject: [PATCH 388/640] sparc: Align clone and signal stacks to 16 bytes.

This is mandatory for 64-bit processes, and doing it also for 32-bit
processes saves a conditional in the compat case.

This fixes the glibc/nptl/tst-stdio1 test case, as well
as many others, on 64-bit.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/process_32.c |  2 +-
 arch/sparc/kernel/process_64.c |  8 ++++----
 arch/sparc/kernel/signal32.c   | 10 ++++++----
 arch/sparc/kernel/signal_32.c  |  6 ++++--
 arch/sparc/kernel/signal_64.c  |  8 +++++---
 5 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 2830b415e214..f23c8fda6503 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -526,7 +526,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 			 * Set some valid stack frames to give to the child.
 			 */
 			childstack = (struct sparc_stackf __user *)
-				(sp & ~0x7UL);
+				(sp & ~0x15UL);
 			parentstack = (struct sparc_stackf __user *)
 				regs->u_regs[UREG_FP];
 
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 18d67854a1b8..6679eebfd2e8 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -406,11 +406,11 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
 	} else
 		__get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6]));
 
-	/* Now 8-byte align the stack as this is mandatory in the
-	 * Sparc ABI due to how register windows work.  This hides
-	 * the restriction from thread libraries etc.  -DaveM
+	/* Now align the stack as this is mandatory in the Sparc ABI
+	 * due to how register windows work.  This hides the
+	 * restriction from thread libraries etc.
 	 */
-	csp &= ~7UL;
+	csp &= ~15UL;
 
 	distance = fp - psp;
 	rval = (csp - distance);
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index ba5b09ad6666..ea22cd373c64 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -120,8 +120,8 @@ struct rt_signal_frame32 {
 };
 
 /* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 7) & (~7)))
-#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 7) & (~7)))
+#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 15) & (~15)))
+#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 15) & (~15)))
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
@@ -420,15 +420,17 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
 			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
+	sp -= framesize;
+
 	/* Always align the stack frame.  This handles two cases.  First,
 	 * sigaltstack need not be mindful of platform specific stack
 	 * alignment.  Second, if we took this signal because the stack
 	 * is not aligned properly, we'd like to take the signal cleanly
 	 * and report that.
 	 */
-	sp &= ~7UL;
+	sp &= ~15UL;
 
-	return (void __user *)(sp - framesize);
+	return (void __user *) sp;
 }
 
 static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 7ce1a1005b1d..9882df92ba0a 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -267,15 +267,17 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
 			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
+	sp -= framesize;
+
 	/* Always align the stack frame.  This handles two cases.  First,
 	 * sigaltstack need not be mindful of platform specific stack
 	 * alignment.  Second, if we took this signal because the stack
 	 * is not aligned properly, we'd like to take the signal cleanly
 	 * and report that.
 	 */
-	sp &= ~7UL;
+	sp &= ~15UL;
 
-	return (void __user *)(sp - framesize);
+	return (void __user *) sp;
 }
 
 static inline int
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 647afbda7ae1..9fa48c30037e 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -353,7 +353,7 @@ segv:
 /* Checks if the fp is valid */
 static int invalid_frame_pointer(void __user *fp, int fplen)
 {
-	if (((unsigned long) fp) & 7)
+	if (((unsigned long) fp) & 15)
 		return 1;
 	return 0;
 }
@@ -396,15 +396,17 @@ static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *
 			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
+	sp -= framesize;
+
 	/* Always align the stack frame.  This handles two cases.  First,
 	 * sigaltstack need not be mindful of platform specific stack
 	 * alignment.  Second, if we took this signal because the stack
 	 * is not aligned properly, we'd like to take the signal cleanly
 	 * and report that.
 	 */
-	sp &= ~7UL;
+	sp &= ~15UL;
 
-	return (void __user *)(sp - framesize);
+	return (void __user *) sp;
 }
 
 static inline void

From ef286f6fa673cd7fb367e1b145069d8dbfcc6081 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 9 Feb 2010 16:34:14 +1100
Subject: [PATCH 389/640] md: fix some lockdep issues between md and sysfs.

======
This fix is related to
    http://bugzilla.kernel.org/show_bug.cgi?id=15142
but does not address that exact issue.
======

sysfs does like attributes being removed while they are being accessed
(i.e. read or written) and waits for the access to complete.

As accessing some md attributes takes the same lock that is held while
removing those attributes a deadlock can occur.

This patch addresses 3 issues in md that could lead to this deadlock.

Two relate to calling flush_scheduled_work while the lock is held.
This is probably a bad idea in general and as we use schedule_work to
delete various sysfs objects it is particularly bad.

In one case flush_scheduled_work is called from md_alloc (called by
md_probe) called from do_md_run which holds the lock.  This call is
only present to ensure that ->gendisk is set.  However we can be sure
that gendisk is always set (though possibly we couldn't when that code
was originally written.  This is because do_md_run is called in three
different contexts:
  1/ from md_ioctl.  This requires that md_open has succeeded, and it
     fails if ->gendisk is not set.
  2/ from writing a sysfs attribute.  This can only happen if the
     mddev has been registered in sysfs which happens in md_alloc
     after ->gendisk has been set.
  3/ from autorun_array which is only called by autorun_devices, which
     checks for ->gendisk to be set before calling autorun_array.
So the call to md_probe in do_md_run can be removed, and the check on
->gendisk can also go.


In the other case flush_scheduled_work is being called in do_md_stop,
purportedly to wait for all md_delayed_delete calls (which delete the
component rdevs) to complete.  However there really isn't any need to
wait for them - they have already been disconnected in all important
ways.

The third issue is that raid5->stop() removes some attribute names
while the lock is held.  There is already some infrastructure in place
to delay attribute removal until after the lock is released (using
schedule_work).  So extend that infrastructure to remove the
raid5_attrs_group.

This does not address all lockdep issues related to the sysfs
"s_active" lock.  The rest can be address by splitting that lockdep
context between symlinks and non-symlinks which hopefully will happen.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c    | 14 +++++---------
 drivers/md/raid5.c |  3 +--
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index dd3dfe42d5a9..a20a71e5efd3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4075,8 +4075,10 @@ static void mddev_delayed_delete(struct work_struct *ws)
 {
 	mddev_t *mddev = container_of(ws, mddev_t, del_work);
 
-	if (mddev->private == &md_redundancy_group) {
+	if (mddev->private) {
 		sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
+		if (mddev->private != (void*)1)
+			sysfs_remove_group(&mddev->kobj, mddev->private);
 		if (mddev->sysfs_action)
 			sysfs_put(mddev->sysfs_action);
 		mddev->sysfs_action = NULL;
@@ -4287,10 +4289,7 @@ static int do_md_run(mddev_t * mddev)
 		sysfs_notify_dirent(rdev->sysfs_state);
 	}
 
-	md_probe(mddev->unit, NULL, NULL);
 	disk = mddev->gendisk;
-	if (!disk)
-		return -ENOMEM;
 
 	spin_lock(&pers_lock);
 	pers = find_pers(mddev->level, mddev->clevel);
@@ -4530,8 +4529,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
 			mddev->queue->unplug_fn = NULL;
 			mddev->queue->backing_dev_info.congested_fn = NULL;
 			module_put(mddev->pers->owner);
-			if (mddev->pers->sync_request)
-				mddev->private = &md_redundancy_group;
+			if (mddev->pers->sync_request && mddev->private == NULL)
+				mddev->private = (void*)1;
 			mddev->pers = NULL;
 			/* tell userspace to handle 'inactive' */
 			sysfs_notify_dirent(mddev->sysfs_state);
@@ -4578,9 +4577,6 @@ out:
 		}
 		mddev->bitmap_info.offset = 0;
 
-		/* make sure all md_delayed_delete calls have finished */
-		flush_scheduled_work();
-
 		export_array(mddev);
 
 		mddev->array_sectors = 0;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b5629c3e14fa..ceb24afdc147 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5136,9 +5136,8 @@ static int stop(mddev_t *mddev)
 	mddev->thread = NULL;
 	mddev->queue->backing_dev_info.congested_fn = NULL;
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
-	sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
 	free_conf(conf);
-	mddev->private = NULL;
+	mddev->private = &raid5_attrs_group;
 	return 0;
 }
 

From 77058e1adcc439151db41f2b84e4867a88113cd8 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Mon, 8 Feb 2010 20:09:03 +0000
Subject: [PATCH 390/640] powerpc: Fix address masking bug in hpte_need_flush()

Commit f71dc176aa06359681c30ba6877ffccab6fba3a6 'Make
hpte_need_flush() correctly mask for multiple page sizes' introduced
bug, which is triggered when a kernel with a 64k base page size is run
on a system whose hardware does not 64k hash PTEs.  In this case, we
emulate 64k pages with multiple 4k hash PTEs, however in
hpte_need_flush() we incorrectly only mask the hardware page size from
the address, instead of the logical page size.  This causes things to
go wrong when we later attempt to iterate through the hardware
subpages of the logical page.

This patch corrects the error.  It has been tested on pSeries bare
metal by Michael Neuling.

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/tlb_hash64.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 282d9306361f..1ec06576f619 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -63,15 +63,21 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	if (huge) {
 #ifdef CONFIG_HUGETLB_PAGE
 		psize = get_slice_psize(mm, addr);
+		/* Mask the address for the correct page size */
+		addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
 #else
 		BUG();
 		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
 #endif
-	} else
+	} else {
 		psize = pte_pagesize_index(mm, addr, pte);
+		/* Mask the address for the standard page size.  If we
+		 * have a 64k page kernel, but the hardware does not
+		 * support 64k pages, this might be different from the
+		 * hardware page size encoded in the slice table. */
+		addr &= PAGE_MASK;
+	}
 
-	/* Mask the address for the correct page size */
-	addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
 
 	/* Build full vaddr */
 	if (!is_kernel_addr(addr)) {

From 681ee44d40d7c93b42118320e4620d07d8704fd6 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 9 Feb 2010 18:01:44 -0800
Subject: [PATCH 391/640] x86, apic: Don't use logical-flat mode when CPU
 hotplug may exceed 8 CPUs

We need to fall back from logical-flat APIC mode to physical-flat mode
when we have more than 8 CPUs.  However, in the presence of CPU
hotplug(with bios listing not enabled but possible cpus as disabled cpus in
MADT), we have to consider the number of possible CPUs rather than
the number of current CPUs; otherwise we may cross the 8-CPU boundary
when CPUs are added later.

32bit apic code can use more cleanups (like the removal of vendor checks in
32bit default_setup_apic_routing()) and more unifications with 64bit code.
Yinghai has some patches in works already. This patch addresses the boot issue
that is reported in the virtualization guest context.

[ hpa: incorporated function annotation feedback from Yinghai Lu ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <1265767304.2833.19.camel@sbs-t61.sc.intel.com>
Acked-by: Shaohui Zheng <shaohui.zheng@intel.com>
Reviewed-by: Yinghai Lu <yinghai@kernel.org>
Cc: <stable@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/acpi/boot.c     |  5 -----
 arch/x86/kernel/apic/apic.c     | 17 -----------------
 arch/x86/kernel/apic/probe_32.c | 29 +++++++++++++++++++++++++++--
 arch/x86/kernel/apic/probe_64.c |  2 +-
 arch/x86/kernel/mpparse.c       |  7 -------
 arch/x86/kernel/smpboot.c       |  2 --
 6 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 036d28adf59d..0acbcdfa5ca4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1185,9 +1185,6 @@ static void __init acpi_process_madt(void)
 		if (!error) {
 			acpi_lapic = 1;
 
-#ifdef CONFIG_X86_BIGSMP
-			generic_bigsmp_probe();
-#endif
 			/*
 			 * Parse MADT IO-APIC entries
 			 */
@@ -1197,8 +1194,6 @@ static void __init acpi_process_madt(void)
 				acpi_ioapic = 1;
 
 				smp_found_config = 1;
-				if (apic->setup_apic_routing)
-					apic->setup_apic_routing();
 			}
 		}
 		if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3987e4408f75..dfca210f6a10 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void)
 #endif
 
 	enable_IR_x2apic();
-#ifdef CONFIG_X86_64
 	default_setup_apic_routing();
-#endif
 
 	verify_local_APIC();
 	connect_bsp_APIC();
@@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	if (apicid > max_physical_apicid)
 		max_physical_apicid = apicid;
 
-#ifdef CONFIG_X86_32
-	if (num_processors > 8) {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_INTEL:
-			if (!APIC_XAPIC(version)) {
-				def_to_bigsmp = 0;
-				break;
-			}
-			/* If P4 and above fall through */
-		case X86_VENDOR_AMD:
-			def_to_bigsmp = 1;
-		}
-	}
-#endif
-
 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
 	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1a6559f6768c..99d2fe016084 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -52,7 +52,32 @@ static int __init print_ipi_mode(void)
 }
 late_initcall(print_ipi_mode);
 
-void default_setup_apic_routing(void)
+void __init default_setup_apic_routing(void)
+{
+	int version = apic_version[boot_cpu_physical_apicid];
+
+	if (num_possible_cpus() > 8) {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_INTEL:
+			if (!APIC_XAPIC(version)) {
+				def_to_bigsmp = 0;
+				break;
+			}
+			/* If P4 and above fall through */
+		case X86_VENDOR_AMD:
+			def_to_bigsmp = 1;
+		}
+	}
+
+#ifdef CONFIG_X86_BIGSMP
+	generic_bigsmp_probe();
+#endif
+
+	if (apic->setup_apic_routing)
+		apic->setup_apic_routing();
+}
+
+static void setup_apic_flat_routing(void)
 {
 #ifdef CONFIG_X86_IO_APIC
 	printk(KERN_INFO
@@ -103,7 +128,7 @@ struct apic apic_default = {
 	.init_apic_ldr			= default_init_apic_ldr,
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
-	.setup_apic_routing		= default_setup_apic_routing,
+	.setup_apic_routing		= setup_apic_flat_routing,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= default_apicid_to_node,
 	.cpu_to_logical_apicid		= default_cpu_to_logical_apicid,
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 450fe2064a14..83e9be4778e2 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void)
 	}
 #endif
 
-	if (apic == &apic_flat && num_processors > 8)
+	if (apic == &apic_flat && num_possible_cpus() > 8)
 			apic = &apic_physflat;
 
 	printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 40b54ceb68b5..a2c1edd2d3ac 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 		x86_init.mpparse.mpc_record(1);
 	}
 
-#ifdef CONFIG_X86_BIGSMP
-	generic_bigsmp_probe();
-#endif
-
-	if (apic->setup_apic_routing)
-		apic->setup_apic_routing();
-
 	if (!num_processors)
 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
 	return num_processors;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 678d0b8c26f3..b4e870cbdc60 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1083,9 +1083,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	set_cpu_sibling_map(0);
 
 	enable_IR_x2apic();
-#ifdef CONFIG_X86_64
 	default_setup_apic_routing();
-#endif
 
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");

From f79f11852831ba8837e82b73364e6f1cd0145499 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 10 Feb 2010 16:14:04 +0100
Subject: [PATCH 392/640] compat_ioctl: ignore RAID_VERSION ioctl

md ioctls are now handled by the md driver itself, but mdadm
may call RAID_VERSION on other devices as well. Mark the command
as IGNORE_IOCTL so this fails silently rather than printing
an annoying message.

Reported-by: "Michael S. Tsirkin" <m.s.tsirkin@gmail.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c5c45de1a2ee..b6f23b25370e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1038,6 +1038,8 @@ COMPATIBLE_IOCTL(FIOQSIZE)
 #ifdef CONFIG_BLOCK
 /* loop */
 IGNORE_IOCTL(LOOP_CLR_FD)
+/* md calls this on random blockdevs */
+IGNORE_IOCTL(RAID_VERSION)
 /* SG stuff */
 COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
 COMPATIBLE_IOCTL(SG_GET_TIMEOUT)

From a6c7fdd29350a74ba5f76809436de9c3d6763009 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Thu, 4 Feb 2010 13:06:59 +0200
Subject: [PATCH 393/640] OMAP: hsmmc: fix memory leak

The platform data allocated with kmalloc() will become unreachable once
the init is complete, so it should be freed. The problem was discovered
by kmemleak.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Acked-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/mmc-twl4030.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/mmc-twl4030.c b/arch/arm/mach-omap2/mmc-twl4030.c
index 0c3c72d934bf..8afe9dd3f150 100644
--- a/arch/arm/mach-omap2/mmc-twl4030.c
+++ b/arch/arm/mach-omap2/mmc-twl4030.c
@@ -408,6 +408,7 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 {
 	struct twl4030_hsmmc_info *c;
 	int nr_hsmmc = ARRAY_SIZE(hsmmc_data);
+	int i;
 
 	if (cpu_is_omap2430()) {
 		control_pbias_offset = OMAP243X_CONTROL_PBIAS_LITE;
@@ -434,7 +435,7 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 		mmc = kzalloc(sizeof(struct omap_mmc_platform_data), GFP_KERNEL);
 		if (!mmc) {
 			pr_err("Cannot allocate memory for mmc device!\n");
-			return;
+			goto done;
 		}
 
 		if (c->name)
@@ -532,6 +533,10 @@ void __init twl4030_mmc_init(struct twl4030_hsmmc_info *controllers)
 			continue;
 		c->dev = mmc->dev;
 	}
+
+done:
+	for (i = 0; i < nr_hsmmc; i++)
+		kfree(hsmmc_data[i]);
 }
 
 #endif

From 174b24963eaf96dc5e093502ee09639aed13eb2f Mon Sep 17 00:00:00 2001
From: Jelle Martijn Kok <jmkok@solutionsradio.com>
Date: Wed, 10 Feb 2010 09:34:09 -0600
Subject: [PATCH 394/640] rtl8187: Add new device ID

Add new RTL8187B device.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rtl818x/rtl8187_dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index bc5726dd5fe4..7ba3052b0708 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -65,6 +65,7 @@ static struct usb_device_id rtl8187_table[] __devinitdata = {
 	/* Sitecom */
 	{USB_DEVICE(0x0df6, 0x000d), .driver_info = DEVICE_RTL8187},
 	{USB_DEVICE(0x0df6, 0x0028), .driver_info = DEVICE_RTL8187B},
+	{USB_DEVICE(0x0df6, 0x0029), .driver_info = DEVICE_RTL8187B},
 	/* Sphairon Access Systems GmbH */
 	{USB_DEVICE(0x114B, 0x0150), .driver_info = DEVICE_RTL8187},
 	/* Dick Smith Electronics */

From d6cade0f7f40834ff3b48f2469d00b1be0ea0db6 Mon Sep 17 00:00:00 2001
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Tue, 9 Feb 2010 23:37:54 +0000
Subject: [PATCH 395/640] via-velocity: Remove unused IRQ status parameter from
 rx_srv and tx_srv

Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/via-velocity.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index c93f58f5c6f2..133069738ba0 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1877,13 +1877,12 @@ static void velocity_error(struct velocity_info *vptr, int status)
 /**
  *	tx_srv		-	transmit interrupt service
  *	@vptr; Velocity
- *	@status:
  *
  *	Scan the queues looking for transmitted packets that
  *	we can complete and clean up. Update any statistics as
  *	necessary/
  */
-static int velocity_tx_srv(struct velocity_info *vptr, u32 status)
+static int velocity_tx_srv(struct velocity_info *vptr)
 {
 	struct tx_desc *td;
 	int qnum;
@@ -2090,14 +2089,12 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 /**
  *	velocity_rx_srv		-	service RX interrupt
  *	@vptr: velocity
- *	@status: adapter status (unused)
  *
  *	Walk the receive ring of the velocity adapter and remove
  *	any received packets from the receive queue. Hand the ring
  *	slots back to the adapter for reuse.
  */
-static int velocity_rx_srv(struct velocity_info *vptr, int status,
-		int budget_left)
+static int velocity_rx_srv(struct velocity_info *vptr, int budget_left)
 {
 	struct net_device_stats *stats = &vptr->dev->stats;
 	int rd_curr = vptr->rx.curr;
@@ -2165,10 +2162,10 @@ static int velocity_poll(struct napi_struct *napi, int budget)
 	 * Do rx and tx twice for performance (taken from the VIA
 	 * out-of-tree driver).
 	 */
-	rx_done = velocity_rx_srv(vptr, isr_status, budget / 2);
-	velocity_tx_srv(vptr, isr_status);
-	rx_done += velocity_rx_srv(vptr, isr_status, budget - rx_done);
-	velocity_tx_srv(vptr, isr_status);
+	rx_done = velocity_rx_srv(vptr, budget / 2);
+	velocity_tx_srv(vptr);
+	rx_done += velocity_rx_srv(vptr, budget - rx_done);
+	velocity_tx_srv(vptr);
 
 	spin_unlock(&vptr->lock);
 
@@ -3100,7 +3097,7 @@ static int velocity_resume(struct pci_dev *pdev)
 	velocity_init_registers(vptr, VELOCITY_INIT_WOL);
 	mac_disable_int(vptr->mac_regs);
 
-	velocity_tx_srv(vptr, 0);
+	velocity_tx_srv(vptr);
 
 	for (i = 0; i < vptr->tx.numq; i++) {
 		if (vptr->tx.used[i])

From 39c2ff43ea3830ccc693f965abdace96e514b1c5 Mon Sep 17 00:00:00 2001
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Tue, 9 Feb 2010 23:38:07 +0000
Subject: [PATCH 396/640] via-velocity: Take spinlock on set coalesce

velocity_set_coalesce touches ISR and some other sensitive registers not
covered by the rtnl lock, so take the velocity spinlock.

Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/via-velocity.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 133069738ba0..54bafdab1f9d 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -3341,6 +3341,7 @@ static int velocity_set_coalesce(struct net_device *dev,
 {
 	struct velocity_info *vptr = netdev_priv(dev);
 	int max_us = 0x3f * 64;
+	unsigned long flags;
 
 	/* 6 bits of  */
 	if (ecmd->tx_coalesce_usecs > max_us)
@@ -3362,6 +3363,7 @@ static int velocity_set_coalesce(struct net_device *dev,
 			ecmd->tx_coalesce_usecs);
 
 	/* Setup the interrupt suppression and queue timers */
+	spin_lock_irqsave(&vptr->lock, flags);
 	mac_disable_int(vptr->mac_regs);
 	setup_adaptive_interrupts(vptr);
 	setup_queue_timers(vptr);
@@ -3369,6 +3371,7 @@ static int velocity_set_coalesce(struct net_device *dev,
 	mac_write_int_mask(vptr->int_mask, vptr->mac_regs);
 	mac_clear_isr(vptr->mac_regs);
 	mac_enable_int(vptr->mac_regs);
+	spin_unlock_irqrestore(&vptr->lock, flags);
 
 	return 0;
 }

From 3f2e8d9f13246382fbda6f03178eef867a9bfbe2 Mon Sep 17 00:00:00 2001
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Tue, 9 Feb 2010 23:38:25 +0000
Subject: [PATCH 397/640] via-velocity: Fix races on shared interrupts

This patch fixes two potential races in the velocity driver:

* Move the ACK and error handler to the interrupt handler. This fixes a
  potential race with shared interrupts when the other device interrupts
  before the NAPI poll handler has finished. As the velocity driver hasn't
  acked it's own interrupt, it will then steal the interrupt from the
  other device.

* Use spin_lock_irqsave in velocity_poll. In the current code, the
  interrupt handler will deadlock if e.g., the NAPI poll handler is
  executing when an interrupt (for another device) comes in since it
  tries to take the already held lock.

  Also unlock the spinlock only after enabling the interrupt in
  velocity_poll.

The error path is moved to the interrupt handler since this is where the
ISR is checked now.

Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: Anders Grafstrom <anders.grafstrom@netinsight.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/via-velocity.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 54bafdab1f9d..317aa34b21cf 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2148,16 +2148,9 @@ static int velocity_poll(struct napi_struct *napi, int budget)
 	struct velocity_info *vptr = container_of(napi,
 			struct velocity_info, napi);
 	unsigned int rx_done;
-	u32 isr_status;
-
-	spin_lock(&vptr->lock);
-	isr_status = mac_read_isr(vptr->mac_regs);
-
-	/* Ack the interrupt */
-	mac_write_isr(vptr->mac_regs, isr_status);
-	if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI)))
-		velocity_error(vptr, isr_status);
+	unsigned long flags;
 
+	spin_lock_irqsave(&vptr->lock, flags);
 	/*
 	 * Do rx and tx twice for performance (taken from the VIA
 	 * out-of-tree driver).
@@ -2167,13 +2160,12 @@ static int velocity_poll(struct napi_struct *napi, int budget)
 	rx_done += velocity_rx_srv(vptr, budget - rx_done);
 	velocity_tx_srv(vptr);
 
-	spin_unlock(&vptr->lock);
-
 	/* If budget not fully consumed, exit the polling mode */
 	if (rx_done < budget) {
 		napi_complete(napi);
 		mac_enable_int(vptr->mac_regs);
 	}
+	spin_unlock_irqrestore(&vptr->lock, flags);
 
 	return rx_done;
 }
@@ -2203,10 +2195,17 @@ static irqreturn_t velocity_intr(int irq, void *dev_instance)
 		return IRQ_NONE;
 	}
 
+	/* Ack the interrupt */
+	mac_write_isr(vptr->mac_regs, isr_status);
+
 	if (likely(napi_schedule_prep(&vptr->napi))) {
 		mac_disable_int(vptr->mac_regs);
 		__napi_schedule(&vptr->napi);
 	}
+
+	if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI)))
+		velocity_error(vptr, isr_status);
+
 	spin_unlock(&vptr->lock);
 
 	return IRQ_HANDLED;

From 8f98781e0f15207b6ab33bee1fae05428be0475b Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 10 Feb 2010 17:32:38 +0100
Subject: [PATCH 398/640] async-tx: fix buffer submission error handling in
 ipu_idma.c

If submitting new buffer failed, a wrong descriptor gets completed and it
doesn't check, if a callback is at all defined, which can lead to an Oops. Fix
these bugs and make ipu_update_channel_buffer() void, because it never fails.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ipu/ipu_idmac.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 9a5bc1a7389e..e80bae1673fa 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -761,12 +761,10 @@ static void ipu_select_buffer(enum ipu_channel channel, int buffer_n)
  * @buffer_n:	buffer number to update.
  *		0 or 1 are the only valid values.
  * @phyaddr:	buffer physical address.
- * @return:	Returns 0 on success or negative error code on failure. This
- *              function will fail if the buffer is set to ready.
  */
 /* Called under spin_lock(_irqsave)(&ichan->lock) */
-static int ipu_update_channel_buffer(struct idmac_channel *ichan,
-				     int buffer_n, dma_addr_t phyaddr)
+static void ipu_update_channel_buffer(struct idmac_channel *ichan,
+				      int buffer_n, dma_addr_t phyaddr)
 {
 	enum ipu_channel channel = ichan->dma_chan.chan_id;
 	uint32_t reg;
@@ -806,8 +804,6 @@ static int ipu_update_channel_buffer(struct idmac_channel *ichan,
 	}
 
 	spin_unlock_irqrestore(&ipu_data.lock, flags);
-
-	return 0;
 }
 
 /* Called under spin_lock_irqsave(&ichan->lock) */
@@ -816,7 +812,6 @@ static int ipu_submit_buffer(struct idmac_channel *ichan,
 {
 	unsigned int chan_id = ichan->dma_chan.chan_id;
 	struct device *dev = &ichan->dma_chan.dev->device;
-	int ret;
 
 	if (async_tx_test_ack(&desc->txd))
 		return -EINTR;
@@ -827,14 +822,7 @@ static int ipu_submit_buffer(struct idmac_channel *ichan,
 	 * could make it conditional on status >= IPU_CHANNEL_ENABLED, but
 	 * doing it again shouldn't hurt either.
 	 */
-	ret = ipu_update_channel_buffer(ichan, buf_idx,
-					sg_dma_address(sg));
-
-	if (ret < 0) {
-		dev_err(dev, "Updating sg %p on channel 0x%x buffer %d failed!\n",
-			sg, chan_id, buf_idx);
-		return ret;
-	}
+	ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg));
 
 	ipu_select_buffer(chan_id, buf_idx);
 	dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n",
@@ -1379,10 +1367,11 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
 
 	if (likely(sgnew) &&
 	    ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) {
-		callback = desc->txd.callback;
-		callback_param = desc->txd.callback_param;
+		callback = descnew->txd.callback;
+		callback_param = descnew->txd.callback_param;
 		spin_unlock(&ichan->lock);
-		callback(callback_param);
+		if (callback)
+			callback(callback_param);
 		spin_lock(&ichan->lock);
 	}
 

From 734c2992828c66cee3feb21ecd30a6ac44aecc51 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 6 Feb 2010 09:43:41 +0100
Subject: [PATCH 399/640] drivers/dma: Correct NULL test

cohd_fin has already been verified not to be NULL, so the argument to
BUG_ON cannot be true.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@r@
expression *x;
expression e;
identifier l;
@@

if (x == NULL || ...) {
    ... when forall
    return ...; }
... when != goto l;
    when != x = e
    when != &x
*x == NULL
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/coh901318.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index b5f2ee0f8e2c..64a937262a40 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -613,8 +613,6 @@ static void dma_tasklet(unsigned long data)
 	cohd_fin->pending_irqs--;
 	cohc->completed = cohd_fin->desc.cookie;
 
-	BUG_ON(cohc->nbr_active_done && cohd_fin == NULL);
-
 	if (cohc->nbr_active_done == 0)
 		return;
 

From 8523c0480979080e8088e40f25459e5b2d19f621 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Mon, 8 Feb 2010 16:41:15 -0800
Subject: [PATCH 400/640] RDMA/cm: Revert association of an RDMA device when
 binding to loopback

Revert the following change from commit 6f8372b6 ("RDMA/cm: fix
loopback address support")

   The defined behavior of rdma_bind_addr is to associate an RDMA
   device with an rdma_cm_id, as long as the user specified a non-
   zero address.  (ie they weren't just trying to reserve a port)
   Currently, if the loopback address is passed to rdma_bind_addr,
   no device is associated with the rdma_cm_id.  Fix this.

It turns out that important apps such as Open MPI depend on
rdma_bind_addr() NOT associating any RDMA device when binding to a
loopback address.  Open MPI is being updated to deal with this, but at
least until a new Open MPI release is available, maintain the previous
behavior: allow rdma_bind_addr() to succeed, but do not bind to a
device.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Acked-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/cma.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index cc9b5940fa97..875e34e0b235 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2115,9 +2115,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 	if (ret)
 		goto err1;
 
-	if (cma_loopback_addr(addr)) {
-		ret = cma_bind_loopback(id_priv);
-	} else if (!cma_zero_addr(addr)) {
+	if (!cma_any_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
 		if (ret)
 			goto err1;

From b91ad0ec52770dcb622b94fc1f57e076686f427a Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 5 Feb 2010 09:14:17 +0800
Subject: [PATCH 401/640] drm/i915: Rework DPLL calculation parameters for
 Ironlake

Got Ironlake DPLL parameter table, which reflects the hardware
optimized values. So this one trys to list DPLL parameters for
different output types, should potential fix clock issue seen
on new Arrandale CPUs.

This fixes DPLL setting failure on one 1920x1080 dual channel
LVDS for Ironlake. Test has also been made on LVDS panels with
smaller size and CRT/HDMI/DP ports for different monitors on
their all supported modes.

Update:
- Change name of double LVDS to dual LVDS.
- Fix SSC 120M reference clock to use the right range.

Cc: CSJ <changsijay@gmail.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_display.c | 217 +++++++++++++++++++++------
 1 file changed, 167 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 12775df1bbfd..7e9c835f9ae0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -240,33 +240,86 @@ struct intel_limit {
 #define IRONLAKE_DOT_MAX         350000
 #define IRONLAKE_VCO_MIN         1760000
 #define IRONLAKE_VCO_MAX         3510000
-#define IRONLAKE_N_MIN           1
-#define IRONLAKE_N_MAX           6
-#define IRONLAKE_M_MIN           79
-#define IRONLAKE_M_MAX           127
 #define IRONLAKE_M1_MIN          12
 #define IRONLAKE_M1_MAX          22
 #define IRONLAKE_M2_MIN          5
 #define IRONLAKE_M2_MAX          9
-#define IRONLAKE_P_SDVO_DAC_MIN  5
-#define IRONLAKE_P_SDVO_DAC_MAX  80
-#define IRONLAKE_P_LVDS_MIN      28
-#define IRONLAKE_P_LVDS_MAX      112
-#define IRONLAKE_P1_MIN          1
-#define IRONLAKE_P1_MAX          8
-#define IRONLAKE_P2_SDVO_DAC_SLOW 10
-#define IRONLAKE_P2_SDVO_DAC_FAST 5
-#define IRONLAKE_P2_LVDS_SLOW    14 /* single channel */
-#define IRONLAKE_P2_LVDS_FAST    7  /* double channel */
 #define IRONLAKE_P2_DOT_LIMIT    225000 /* 225Mhz */
 
-#define IRONLAKE_P_DISPLAY_PORT_MIN	10
-#define IRONLAKE_P_DISPLAY_PORT_MAX	20
-#define IRONLAKE_P2_DISPLAY_PORT_FAST	10
-#define IRONLAKE_P2_DISPLAY_PORT_SLOW	10
-#define IRONLAKE_P2_DISPLAY_PORT_LIMIT	0
-#define IRONLAKE_P1_DISPLAY_PORT_MIN	1
-#define IRONLAKE_P1_DISPLAY_PORT_MAX	2
+/* We have parameter ranges for different type of outputs. */
+
+/* DAC & HDMI Refclk 120Mhz */
+#define IRONLAKE_DAC_N_MIN	1
+#define IRONLAKE_DAC_N_MAX	5
+#define IRONLAKE_DAC_M_MIN	79
+#define IRONLAKE_DAC_M_MAX	127
+#define IRONLAKE_DAC_P_MIN	5
+#define IRONLAKE_DAC_P_MAX	80
+#define IRONLAKE_DAC_P1_MIN	1
+#define IRONLAKE_DAC_P1_MAX	8
+#define IRONLAKE_DAC_P2_SLOW	10
+#define IRONLAKE_DAC_P2_FAST	5
+
+/* LVDS single-channel 120Mhz refclk */
+#define IRONLAKE_LVDS_S_N_MIN	1
+#define IRONLAKE_LVDS_S_N_MAX	3
+#define IRONLAKE_LVDS_S_M_MIN	79
+#define IRONLAKE_LVDS_S_M_MAX	118
+#define IRONLAKE_LVDS_S_P_MIN	28
+#define IRONLAKE_LVDS_S_P_MAX	112
+#define IRONLAKE_LVDS_S_P1_MIN	2
+#define IRONLAKE_LVDS_S_P1_MAX	8
+#define IRONLAKE_LVDS_S_P2_SLOW	14
+#define IRONLAKE_LVDS_S_P2_FAST	14
+
+/* LVDS dual-channel 120Mhz refclk */
+#define IRONLAKE_LVDS_D_N_MIN	1
+#define IRONLAKE_LVDS_D_N_MAX	3
+#define IRONLAKE_LVDS_D_M_MIN	79
+#define IRONLAKE_LVDS_D_M_MAX	127
+#define IRONLAKE_LVDS_D_P_MIN	14
+#define IRONLAKE_LVDS_D_P_MAX	56
+#define IRONLAKE_LVDS_D_P1_MIN	2
+#define IRONLAKE_LVDS_D_P1_MAX	8
+#define IRONLAKE_LVDS_D_P2_SLOW	7
+#define IRONLAKE_LVDS_D_P2_FAST	7
+
+/* LVDS single-channel 100Mhz refclk */
+#define IRONLAKE_LVDS_S_SSC_N_MIN	1
+#define IRONLAKE_LVDS_S_SSC_N_MAX	2
+#define IRONLAKE_LVDS_S_SSC_M_MIN	79
+#define IRONLAKE_LVDS_S_SSC_M_MAX	126
+#define IRONLAKE_LVDS_S_SSC_P_MIN	28
+#define IRONLAKE_LVDS_S_SSC_P_MAX	112
+#define IRONLAKE_LVDS_S_SSC_P1_MIN	2
+#define IRONLAKE_LVDS_S_SSC_P1_MAX	8
+#define IRONLAKE_LVDS_S_SSC_P2_SLOW	14
+#define IRONLAKE_LVDS_S_SSC_P2_FAST	14
+
+/* LVDS dual-channel 100Mhz refclk */
+#define IRONLAKE_LVDS_D_SSC_N_MIN	1
+#define IRONLAKE_LVDS_D_SSC_N_MAX	3
+#define IRONLAKE_LVDS_D_SSC_M_MIN	79
+#define IRONLAKE_LVDS_D_SSC_M_MAX	126
+#define IRONLAKE_LVDS_D_SSC_P_MIN	14
+#define IRONLAKE_LVDS_D_SSC_P_MAX	42
+#define IRONLAKE_LVDS_D_SSC_P1_MIN	2
+#define IRONLAKE_LVDS_D_SSC_P1_MAX	6
+#define IRONLAKE_LVDS_D_SSC_P2_SLOW	7
+#define IRONLAKE_LVDS_D_SSC_P2_FAST	7
+
+/* DisplayPort */
+#define IRONLAKE_DP_N_MIN		1
+#define IRONLAKE_DP_N_MAX		2
+#define IRONLAKE_DP_M_MIN		81
+#define IRONLAKE_DP_M_MAX		90
+#define IRONLAKE_DP_P_MIN		10
+#define IRONLAKE_DP_P_MAX		20
+#define IRONLAKE_DP_P2_FAST		10
+#define IRONLAKE_DP_P2_SLOW		10
+#define IRONLAKE_DP_P2_LIMIT		0
+#define IRONLAKE_DP_P1_MIN		1
+#define IRONLAKE_DP_P1_MAX		2
 
 static bool
 intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
@@ -474,33 +527,78 @@ static const intel_limit_t intel_limits_pineview_lvds = {
 	.find_pll = intel_find_best_PLL,
 };
 
-static const intel_limit_t intel_limits_ironlake_sdvo = {
+static const intel_limit_t intel_limits_ironlake_dac = {
 	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
 	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_N_MIN,            .max = IRONLAKE_N_MAX },
-	.m   = { .min = IRONLAKE_M_MIN,            .max = IRONLAKE_M_MAX },
+	.n   = { .min = IRONLAKE_DAC_N_MIN,        .max = IRONLAKE_DAC_N_MAX },
+	.m   = { .min = IRONLAKE_DAC_M_MIN,        .max = IRONLAKE_DAC_M_MAX },
 	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
 	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_P_SDVO_DAC_MIN,   .max = IRONLAKE_P_SDVO_DAC_MAX },
-	.p1  = { .min = IRONLAKE_P1_MIN,           .max = IRONLAKE_P1_MAX },
+	.p   = { .min = IRONLAKE_DAC_P_MIN,	   .max = IRONLAKE_DAC_P_MAX },
+	.p1  = { .min = IRONLAKE_DAC_P1_MIN,       .max = IRONLAKE_DAC_P1_MAX },
 	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_P2_SDVO_DAC_SLOW,
-		 .p2_fast = IRONLAKE_P2_SDVO_DAC_FAST },
+		 .p2_slow = IRONLAKE_DAC_P2_SLOW,
+		 .p2_fast = IRONLAKE_DAC_P2_FAST },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
-static const intel_limit_t intel_limits_ironlake_lvds = {
+static const intel_limit_t intel_limits_ironlake_single_lvds = {
 	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
 	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_N_MIN,            .max = IRONLAKE_N_MAX },
-	.m   = { .min = IRONLAKE_M_MIN,            .max = IRONLAKE_M_MAX },
+	.n   = { .min = IRONLAKE_LVDS_S_N_MIN,     .max = IRONLAKE_LVDS_S_N_MAX },
+	.m   = { .min = IRONLAKE_LVDS_S_M_MIN,     .max = IRONLAKE_LVDS_S_M_MAX },
 	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
 	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_P_LVDS_MIN,       .max = IRONLAKE_P_LVDS_MAX },
-	.p1  = { .min = IRONLAKE_P1_MIN,           .max = IRONLAKE_P1_MAX },
+	.p   = { .min = IRONLAKE_LVDS_S_P_MIN,     .max = IRONLAKE_LVDS_S_P_MAX },
+	.p1  = { .min = IRONLAKE_LVDS_S_P1_MIN,    .max = IRONLAKE_LVDS_S_P1_MAX },
 	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_P2_LVDS_SLOW,
-		 .p2_fast = IRONLAKE_P2_LVDS_FAST },
+		 .p2_slow = IRONLAKE_LVDS_S_P2_SLOW,
+		 .p2_fast = IRONLAKE_LVDS_S_P2_FAST },
+	.find_pll = intel_g4x_find_best_PLL,
+};
+
+static const intel_limit_t intel_limits_ironlake_dual_lvds = {
+	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
+	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
+	.n   = { .min = IRONLAKE_LVDS_D_N_MIN,     .max = IRONLAKE_LVDS_D_N_MAX },
+	.m   = { .min = IRONLAKE_LVDS_D_M_MIN,     .max = IRONLAKE_LVDS_D_M_MAX },
+	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
+	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
+	.p   = { .min = IRONLAKE_LVDS_D_P_MIN,     .max = IRONLAKE_LVDS_D_P_MAX },
+	.p1  = { .min = IRONLAKE_LVDS_D_P1_MIN,    .max = IRONLAKE_LVDS_D_P1_MAX },
+	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
+		 .p2_slow = IRONLAKE_LVDS_D_P2_SLOW,
+		 .p2_fast = IRONLAKE_LVDS_D_P2_FAST },
+	.find_pll = intel_g4x_find_best_PLL,
+};
+
+static const intel_limit_t intel_limits_ironlake_single_lvds_100m = {
+	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
+	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
+	.n   = { .min = IRONLAKE_LVDS_S_SSC_N_MIN, .max = IRONLAKE_LVDS_S_SSC_N_MAX },
+	.m   = { .min = IRONLAKE_LVDS_S_SSC_M_MIN, .max = IRONLAKE_LVDS_S_SSC_M_MAX },
+	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
+	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
+	.p   = { .min = IRONLAKE_LVDS_S_SSC_P_MIN, .max = IRONLAKE_LVDS_S_SSC_P_MAX },
+	.p1  = { .min = IRONLAKE_LVDS_S_SSC_P1_MIN,.max = IRONLAKE_LVDS_S_SSC_P1_MAX },
+	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
+		 .p2_slow = IRONLAKE_LVDS_S_SSC_P2_SLOW,
+		 .p2_fast = IRONLAKE_LVDS_S_SSC_P2_FAST },
+	.find_pll = intel_g4x_find_best_PLL,
+};
+
+static const intel_limit_t intel_limits_ironlake_dual_lvds_100m = {
+	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
+	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
+	.n   = { .min = IRONLAKE_LVDS_D_SSC_N_MIN, .max = IRONLAKE_LVDS_D_SSC_N_MAX },
+	.m   = { .min = IRONLAKE_LVDS_D_SSC_M_MIN, .max = IRONLAKE_LVDS_D_SSC_M_MAX },
+	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
+	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
+	.p   = { .min = IRONLAKE_LVDS_D_SSC_P_MIN, .max = IRONLAKE_LVDS_D_SSC_P_MAX },
+	.p1  = { .min = IRONLAKE_LVDS_D_SSC_P1_MIN,.max = IRONLAKE_LVDS_D_SSC_P1_MAX },
+	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
+		 .p2_slow = IRONLAKE_LVDS_D_SSC_P2_SLOW,
+		 .p2_fast = IRONLAKE_LVDS_D_SSC_P2_FAST },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
@@ -509,34 +607,53 @@ static const intel_limit_t intel_limits_ironlake_display_port = {
                  .max = IRONLAKE_DOT_MAX },
         .vco = { .min = IRONLAKE_VCO_MIN,
                  .max = IRONLAKE_VCO_MAX},
-        .n   = { .min = IRONLAKE_N_MIN,
-                 .max = IRONLAKE_N_MAX },
-        .m   = { .min = IRONLAKE_M_MIN,
-                 .max = IRONLAKE_M_MAX },
+        .n   = { .min = IRONLAKE_DP_N_MIN,
+                 .max = IRONLAKE_DP_N_MAX },
+        .m   = { .min = IRONLAKE_DP_M_MIN,
+                 .max = IRONLAKE_DP_M_MAX },
         .m1  = { .min = IRONLAKE_M1_MIN,
                  .max = IRONLAKE_M1_MAX },
         .m2  = { .min = IRONLAKE_M2_MIN,
                  .max = IRONLAKE_M2_MAX },
-        .p   = { .min = IRONLAKE_P_DISPLAY_PORT_MIN,
-                 .max = IRONLAKE_P_DISPLAY_PORT_MAX },
-        .p1  = { .min = IRONLAKE_P1_DISPLAY_PORT_MIN,
-                 .max = IRONLAKE_P1_DISPLAY_PORT_MAX},
-        .p2  = { .dot_limit = IRONLAKE_P2_DISPLAY_PORT_LIMIT,
-                 .p2_slow = IRONLAKE_P2_DISPLAY_PORT_SLOW,
-                 .p2_fast = IRONLAKE_P2_DISPLAY_PORT_FAST },
+        .p   = { .min = IRONLAKE_DP_P_MIN,
+                 .max = IRONLAKE_DP_P_MAX },
+        .p1  = { .min = IRONLAKE_DP_P1_MIN,
+                 .max = IRONLAKE_DP_P1_MAX},
+        .p2  = { .dot_limit = IRONLAKE_DP_P2_LIMIT,
+                 .p2_slow = IRONLAKE_DP_P2_SLOW,
+                 .p2_fast = IRONLAKE_DP_P2_FAST },
         .find_pll = intel_find_pll_ironlake_dp,
 };
 
 static const intel_limit_t *intel_ironlake_limit(struct drm_crtc *crtc)
 {
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	const intel_limit_t *limit;
-	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
-		limit = &intel_limits_ironlake_lvds;
-	else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) ||
+	int refclk = 120;
+
+	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
+		if (dev_priv->lvds_use_ssc && dev_priv->lvds_ssc_freq == 100)
+			refclk = 100;
+
+		if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) ==
+		    LVDS_CLKB_POWER_UP) {
+			/* LVDS dual channel */
+			if (refclk == 100)
+				limit = &intel_limits_ironlake_dual_lvds_100m;
+			else
+				limit = &intel_limits_ironlake_dual_lvds;
+		} else {
+			if (refclk == 100)
+				limit = &intel_limits_ironlake_single_lvds_100m;
+			else
+				limit = &intel_limits_ironlake_single_lvds;
+		}
+	} else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) ||
 			HAS_eDP)
 		limit = &intel_limits_ironlake_display_port;
 	else
-		limit = &intel_limits_ironlake_sdvo;
+		limit = &intel_limits_ironlake_dac;
 
 	return limit;
 }

From f4fc580bec5fb76560329c8c537b9b71d8d032b6 Mon Sep 17 00:00:00 2001
From: Wu Zhangjin <wuzhangjin@gmail.com>
Date: Mon, 1 Feb 2010 17:10:55 +0800
Subject: [PATCH 402/640] MIPS: Fixup of the r4k timer

As reported by Maxime Bizon, the commit "MIPS: PowerTV: Fix support for
timer interrupts with > 64 external IRQs" have broken the r4k timer
since it didn't initialize the cp0_compare_irq_shift variable used in
c0_compare_int_pending() on the architectures whose cpu_has_mips_r2 is
false.

This patch fixes it via initializing the cp0_compare_irq_shift as the
cp0_compare_irq used in the old c0_compare_int_pending().

Reported-by: Maxime Bizon <mbizon@freebox.fr>
Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
Cc: David VomLehn <dvomlehn@cisco.com>
Cc: mbizon@freebox.fr
Cc: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/922/
Tested-by: Shane McDonald <mcdonald.shane@gmail.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/traps.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 338dfe8ed002..31b204b26ba0 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1501,6 +1501,7 @@ void __cpuinit per_cpu_trap_init(void)
 			cp0_perfcount_irq = -1;
 	} else {
 		cp0_compare_irq = CP0_LEGACY_COMPARE_IRQ;
+		cp0_compare_irq_shift = cp0_compare_irq;
 		cp0_perfcount_irq = -1;
 	}
 

From 59d302b342e5d451c4448479e82e1105864a3112 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 Feb 2010 19:16:34 +0100
Subject: [PATCH 403/640] MIPS: IP27: Make defconfig useful again.

RTC support was rewritten but the defconfig files were not updated.  Enable
IPv6 support which for some folks already is a must have.  Assign useful
values to other new options.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/configs/ip27_defconfig | 917 ++++++++++++++++++++++++-------
 1 file changed, 729 insertions(+), 188 deletions(-)

diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index ed84b4cb3c8d..84b6503f10b9 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.23-rc2
-# Tue Aug  7 13:04:24 2007
+# Linux kernel version: 2.6.33-rc6
+# Wed Feb  3 18:12:31 2010
 #
 CONFIG_MIPS=y
 
@@ -9,20 +9,28 @@ CONFIG_MIPS=y
 # Machine selection
 #
 # CONFIG_MACH_ALCHEMY is not set
+# CONFIG_AR7 is not set
+# CONFIG_BCM47XX is not set
+# CONFIG_BCM63XX is not set
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MACH_JAZZ is not set
-# CONFIG_LEMOTE_FULONG is not set
+# CONFIG_LASAT is not set
+# CONFIG_MACH_LOONGSON is not set
 # CONFIG_MIPS_MALTA is not set
 # CONFIG_MIPS_SIM is not set
-# CONFIG_MARKEINS is not set
+# CONFIG_NEC_MARKEINS is not set
 # CONFIG_MACH_VR41XX is not set
+# CONFIG_NXP_STB220 is not set
+# CONFIG_NXP_STB225 is not set
 # CONFIG_PNX8550_JBS is not set
 # CONFIG_PNX8550_STB810 is not set
 # CONFIG_PMC_MSP is not set
 # CONFIG_PMC_YOSEMITE is not set
+# CONFIG_POWERTV is not set
 # CONFIG_SGI_IP22 is not set
 CONFIG_SGI_IP27=y
+# CONFIG_SGI_IP28 is not set
 # CONFIG_SGI_IP32 is not set
 # CONFIG_SIBYTE_CRHINE is not set
 # CONFIG_SIBYTE_CARMEL is not set
@@ -33,32 +41,39 @@ CONFIG_SGI_IP27=y
 # CONFIG_SIBYTE_SENTOSA is not set
 # CONFIG_SIBYTE_BIGSUR is not set
 # CONFIG_SNI_RM is not set
-# CONFIG_TOSHIBA_JMR3927 is not set
-# CONFIG_TOSHIBA_RBTX4927 is not set
-# CONFIG_TOSHIBA_RBTX4938 is not set
+# CONFIG_MACH_TX39XX is not set
+# CONFIG_MACH_TX49XX is not set
+# CONFIG_MIKROTIK_RB532 is not set
 # CONFIG_WR_PPMC is not set
+# CONFIG_CAVIUM_OCTEON_SIMULATOR is not set
+# CONFIG_CAVIUM_OCTEON_REFERENCE_BOARD is not set
+# CONFIG_ALCHEMY_GPIO_INDIRECT is not set
 CONFIG_SGI_SN_M_MODE=y
 # CONFIG_SGI_SN_N_MODE is not set
 # CONFIG_MAPPED_KERNEL is not set
 # CONFIG_REPLICATE_KTEXT is not set
 # CONFIG_REPLICATE_EXHANDLERS is not set
+CONFIG_LOONGSON_UART_BASE=y
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 # CONFIG_ARCH_HAS_ILOG2_U32 is not set
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_ARCH_SUPPORTS_OPROFILE=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_GENERIC_TIME=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_ARC=y
 CONFIG_DMA_COHERENT=y
-CONFIG_EARLY_PRINTK=y
 CONFIG_SYS_HAS_EARLY_PRINTK=y
 # CONFIG_NO_IOPORT is not set
 CONFIG_CPU_BIG_ENDIAN=y
 # CONFIG_CPU_LITTLE_ENDIAN is not set
 CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y
+CONFIG_DEFAULT_SGI_PARTITION=y
 CONFIG_MIPS_L1_CACHE_SHIFT=7
 CONFIG_ARC64=y
 CONFIG_BOOT_ELF64=y
@@ -66,7 +81,8 @@ CONFIG_BOOT_ELF64=y
 #
 # CPU selection
 #
-# CONFIG_CPU_LOONGSON2 is not set
+# CONFIG_CPU_LOONGSON2E is not set
+# CONFIG_CPU_LOONGSON2F is not set
 # CONFIG_CPU_MIPS32_R1 is not set
 # CONFIG_CPU_MIPS32_R2 is not set
 # CONFIG_CPU_MIPS64_R1 is not set
@@ -79,6 +95,7 @@ CONFIG_BOOT_ELF64=y
 # CONFIG_CPU_TX49XX is not set
 # CONFIG_CPU_R5000 is not set
 # CONFIG_CPU_R5432 is not set
+# CONFIG_CPU_R5500 is not set
 # CONFIG_CPU_R6000 is not set
 # CONFIG_CPU_NEVADA is not set
 # CONFIG_CPU_R8000 is not set
@@ -86,6 +103,7 @@ CONFIG_CPU_R10000=y
 # CONFIG_CPU_RM7000 is not set
 # CONFIG_CPU_RM9000 is not set
 # CONFIG_CPU_SB1 is not set
+# CONFIG_CPU_CAVIUM_OCTEON is not set
 CONFIG_SYS_HAS_CPU_R10000=y
 CONFIG_SYS_SUPPORTS_64BIT_KERNEL=y
 CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
@@ -99,6 +117,7 @@ CONFIG_64BIT=y
 CONFIG_PAGE_SIZE_4KB=y
 # CONFIG_PAGE_SIZE_8KB is not set
 # CONFIG_PAGE_SIZE_16KB is not set
+# CONFIG_PAGE_SIZE_32KB is not set
 # CONFIG_PAGE_SIZE_64KB is not set
 CONFIG_CPU_HAS_PREFETCH=y
 CONFIG_MIPS_MT_DISABLED=y
@@ -110,6 +129,7 @@ CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_IRQ_PER_CPU=y
 CONFIG_CPU_SUPPORTS_HIGHMEM=y
 CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_NUMA=y
 CONFIG_SYS_SUPPORTS_NUMA=y
 CONFIG_NODES_SHIFT=6
@@ -120,16 +140,22 @@ CONFIG_DISCONTIGMEM_MANUAL=y
 CONFIG_DISCONTIGMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_NEED_MULTIPLE_NODES=y
-# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_MIGRATION=y
-CONFIG_RESOURCES_64BIT=y
+CONFIG_PHYS_ADDR_T_64BIT=y
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_VIRT_TO_BUS=y
+# CONFIG_KSM is not set
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
 CONFIG_SMP=y
 CONFIG_SYS_SUPPORTS_SMP=y
 CONFIG_NR_CPUS_DEFAULT_64=y
 CONFIG_NR_CPUS=64
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 # CONFIG_HZ_48 is not set
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_128 is not set
@@ -142,13 +168,13 @@ CONFIG_HZ=1000
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
-CONFIG_PREEMPT_BKL=y
 # CONFIG_MIPS_INSANE_LARGE is not set
 # CONFIG_KEXEC is not set
 CONFIG_SECCOMP=y
 CONFIG_LOCKDEP_SUPPORT=y
 CONFIG_STACKTRACE_SUPPORT=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+CONFIG_CONSTRUCTORS=y
 
 #
 # General setup
@@ -162,20 +188,41 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-# CONFIG_USER_NS is not set
 # CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_TINY_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=64
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
+# CONFIG_GROUP_SCHED is not set
 CONFIG_CGROUPS=y
+# CONFIG_CGROUP_DEBUG is not set
+# CONFIG_CGROUP_NS is not set
+# CONFIG_CGROUP_FREEZER is not set
+# CONFIG_CGROUP_DEVICE is not set
 CONFIG_CPUSETS=y
-CONFIG_SYSFS_DEPRECATED=y
+CONFIG_PROC_PID_CPUSET=y
+# CONFIG_CGROUP_CPUACCT is not set
+# CONFIG_RESOURCE_COUNTERS is not set
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
 CONFIG_RELAY=y
+# CONFIG_NAMESPACES is not set
 # CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 CONFIG_EMBEDDED=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
@@ -184,44 +231,92 @@ CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
+# CONFIG_PCSPKR_PLATFORM is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
+CONFIG_AIO=y
+
+#
+# Kernel Performance Events And Counters
+#
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_PCI_QUIRKS=y
+CONFIG_COMPAT_BRK=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_SYSCALL_WRAPPERS=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+
+#
+# GCOV-based kernel profiling
+#
+CONFIG_SLOW_WORK=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 # CONFIG_MODVERSIONS is not set
 CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
-# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+# CONFIG_BLK_CGROUP is not set
+CONFIG_BLOCK_COMPAT=y
 
 #
 # IO Schedulers
 #
 CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
-CONFIG_DEFAULT_AS=y
+# CONFIG_CFQ_GROUP_IOSCHED is not set
 # CONFIG_DEFAULT_DEADLINE is not set
-# CONFIG_DEFAULT_CFQ is not set
+CONFIG_DEFAULT_CFQ=y
 # CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_INLINE_SPIN_TRYLOCK is not set
+# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK is not set
+# CONFIG_INLINE_SPIN_LOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
+CONFIG_INLINE_SPIN_UNLOCK=y
+# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
+CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
+# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_READ_TRYLOCK is not set
+# CONFIG_INLINE_READ_LOCK is not set
+# CONFIG_INLINE_READ_LOCK_BH is not set
+# CONFIG_INLINE_READ_LOCK_IRQ is not set
+# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
+CONFIG_INLINE_READ_UNLOCK=y
+# CONFIG_INLINE_READ_UNLOCK_BH is not set
+CONFIG_INLINE_READ_UNLOCK_IRQ=y
+# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_WRITE_TRYLOCK is not set
+# CONFIG_INLINE_WRITE_LOCK is not set
+# CONFIG_INLINE_WRITE_LOCK_BH is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
+CONFIG_INLINE_WRITE_UNLOCK=y
+# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
+CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
+# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
+CONFIG_MUTEX_SPIN_ON_OWNER=y
+# CONFIG_FREEZER is not set
 
 #
 # Bus options (PCI, PCMCIA, EISA, ISA, TC)
@@ -230,11 +325,10 @@ CONFIG_HW_HAS_PCI=y
 CONFIG_PCI=y
 CONFIG_PCI_DOMAINS=y
 # CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCI_LEGACY is not set
+# CONFIG_PCI_STUB is not set
+# CONFIG_PCI_IOV is not set
 CONFIG_MMU=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
 # CONFIG_PCCARD is not set
 # CONFIG_HOTPLUG_PCI is not set
 
@@ -242,8 +336,9 @@ CONFIG_MMU=y
 # Executable file formats
 #
 CONFIG_BINFMT_ELF=y
+CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
+# CONFIG_HAVE_AOUT is not set
 # CONFIG_BINFMT_MISC is not set
-# CONFIG_BUILD_ELF64 is not set
 CONFIG_MIPS32_COMPAT=y
 CONFIG_COMPAT=y
 CONFIG_SYSVIPC_COMPAT=y
@@ -255,13 +350,10 @@ CONFIG_BINFMT_ELF32=y
 # Power management options
 #
 CONFIG_PM=y
-# CONFIG_PM_LEGACY is not set
 # CONFIG_PM_DEBUG is not set
-
-#
-# Networking
-#
+# CONFIG_PM_RUNTIME is not set
 CONFIG_NET=y
+CONFIG_COMPAT_NETLINK_MESSAGES=y
 
 #
 # Networking options
@@ -273,6 +365,8 @@ CONFIG_XFRM=y
 CONFIG_XFRM_USER=m
 # CONFIG_XFRM_SUB_POLICY is not set
 CONFIG_XFRM_MIGRATE=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_XFRM_IPCOMP=m
 CONFIG_NET_KEY=y
 CONFIG_NET_KEY_MIGRATE=y
 CONFIG_INET=y
@@ -292,19 +386,40 @@ CONFIG_IP_PNP=y
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_TUNNEL=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
 CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_LRO=y
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
 CONFIG_TCP_CONG_CUBIC=y
 CONFIG_DEFAULT_TCP_CONG="cubic"
 CONFIG_TCP_MD5SIG=y
-# CONFIG_IPV6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
+CONFIG_IPV6=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_TUNNEL=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_SIT_6RD=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_PIMSM_V2=y
 CONFIG_NETWORK_SECMARK=y
 # CONFIG_NETFILTER is not set
 # CONFIG_IP_DCCP is not set
@@ -314,9 +429,11 @@ CONFIG_IP_SCTP=m
 # CONFIG_SCTP_HMAC_NONE is not set
 # CONFIG_SCTP_HMAC_SHA1 is not set
 CONFIG_SCTP_HMAC_MD5=y
+# CONFIG_RDS is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
 # CONFIG_VLAN_8021Q is not set
 # CONFIG_DECNET is not set
 # CONFIG_LLC2 is not set
@@ -326,12 +443,9 @@ CONFIG_SCTP_HMAC_MD5=y
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
+# CONFIG_PHONET is not set
+# CONFIG_IEEE802154 is not set
 CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_FIFO=y
 
 #
 # Queueing/Scheduling
@@ -340,7 +454,7 @@ CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
 CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RR=m
+CONFIG_NET_SCH_MULTIQ=y
 CONFIG_NET_SCH_RED=m
 CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_TEQL=m
@@ -348,6 +462,7 @@ CONFIG_NET_SCH_TBF=m
 CONFIG_NET_SCH_GRED=m
 CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
+# CONFIG_NET_SCH_DRR is not set
 CONFIG_NET_SCH_INGRESS=m
 
 #
@@ -364,41 +479,63 @@ CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_MARK=y
 CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
 # CONFIG_NET_EMATCH is not set
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
 CONFIG_NET_ACT_GACT=m
 CONFIG_GACT_PROB=y
 CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 # CONFIG_NET_ACT_SIMP is not set
-CONFIG_NET_CLS_POLICE=y
+CONFIG_NET_ACT_SKBEDIT=m
 # CONFIG_NET_CLS_IND is not set
+CONFIG_NET_SCH_FIFO=y
+# CONFIG_DCB is not set
 
 #
 # Network testing
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-
-#
-# Wireless
-#
-CONFIG_CFG80211=m
+CONFIG_FIB_RULES=y
+CONFIG_WIRELESS=y
 CONFIG_WIRELESS_EXT=y
+CONFIG_WEXT_CORE=y
+CONFIG_WEXT_PROC=y
+CONFIG_WEXT_SPY=y
+CONFIG_WEXT_PRIV=y
+CONFIG_CFG80211=m
+# CONFIG_NL80211_TESTMODE is not set
+# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+# CONFIG_CFG80211_REG_DEBUG is not set
+CONFIG_CFG80211_DEFAULT_PS=y
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+CONFIG_CFG80211_WEXT=y
+CONFIG_WIRELESS_EXT_SYSFS=y
+CONFIG_LIB80211=m
+CONFIG_LIB80211_CRYPT_WEP=m
+CONFIG_LIB80211_CRYPT_CCMP=m
+CONFIG_LIB80211_CRYPT_TKIP=m
+# CONFIG_LIB80211_DEBUG is not set
 CONFIG_MAC80211=m
-# CONFIG_MAC80211_DEBUG is not set
-CONFIG_IEEE80211=m
-# CONFIG_IEEE80211_DEBUG is not set
-CONFIG_IEEE80211_CRYPT_WEP=m
-CONFIG_IEEE80211_CRYPT_CCMP=m
-CONFIG_IEEE80211_CRYPT_TKIP=m
-CONFIG_IEEE80211_SOFTMAC=m
-# CONFIG_IEEE80211_SOFTMAC_DEBUG is not set
+CONFIG_MAC80211_RC_PID=y
+CONFIG_MAC80211_RC_MINSTREL=y
+# CONFIG_MAC80211_RC_DEFAULT_PID is not set
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel"
+# CONFIG_MAC80211_MESH is not set
+CONFIG_MAC80211_LEDS=y
+# CONFIG_MAC80211_DEBUG_MENU is not set
+# CONFIG_WIMAX is not set
 CONFIG_RFKILL=m
+CONFIG_RFKILL_LEDS=y
 # CONFIG_NET_9P is not set
 
 #
@@ -408,9 +545,13 @@ CONFIG_RFKILL=m
 #
 # Generic Driver Options
 #
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_DEVTMPFS is not set
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
 # CONFIG_SYS_HYPERVISOR is not set
 CONFIG_CONNECTOR=m
 # CONFIG_MTD is not set
@@ -423,14 +564,19 @@ CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+# CONFIG_BLK_DEV_DRBD is not set
 # CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_OSD=m
 # CONFIG_BLK_DEV_SX8 is not set
 # CONFIG_BLK_DEV_RAM is not set
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_CDROM_PKTCDVD_BUFFERS=8
 # CONFIG_CDROM_PKTCDVD_WCACHE is not set
 CONFIG_ATA_OVER_ETH=m
+# CONFIG_BLK_DEV_HD is not set
 # CONFIG_MISC_DEVICES is not set
+CONFIG_EEPROM_93CX6=m
+CONFIG_HAVE_IDE=y
 # CONFIG_IDE is not set
 
 #
@@ -453,10 +599,6 @@ CONFIG_BLK_DEV_SR=m
 CONFIG_BLK_DEV_SR_VENDOR=y
 CONFIG_CHR_DEV_SG=m
 CONFIG_CHR_DEV_SCH=m
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
 # CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
@@ -471,11 +613,18 @@ CONFIG_SCSI_FC_ATTRS=y
 CONFIG_SCSI_ISCSI_ATTRS=m
 CONFIG_SCSI_SAS_ATTRS=m
 CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SAS_HOST_SMP=y
 # CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
 # CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+CONFIG_SCSI_HPSA=m
 # CONFIG_SCSI_3W_9XXX is not set
+CONFIG_SCSI_3W_SAS=m
 # CONFIG_SCSI_ACARD is not set
 # CONFIG_SCSI_AACRAID is not set
 # CONFIG_SCSI_AIC7XXX is not set
@@ -483,11 +632,21 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_AIC79XX is not set
 CONFIG_SCSI_AIC94XX=m
 # CONFIG_AIC94XX_DEBUG is not set
+CONFIG_SCSI_MVSAS=m
+# CONFIG_SCSI_MVSAS_DEBUG is not set
+CONFIG_SCSI_DPT_I2O=m
+# CONFIG_SCSI_ADVANSYS is not set
 # CONFIG_SCSI_ARCMSR is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_MPT2SAS_MAX_SGE=128
+# CONFIG_SCSI_MPT2SAS_LOGGING is not set
 # CONFIG_SCSI_HPTIOP is not set
+CONFIG_LIBFC=m
+# CONFIG_LIBFCOE is not set
+# CONFIG_FCOE is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
 # CONFIG_SCSI_IPS is not set
@@ -502,16 +661,31 @@ CONFIG_SCSI_QLOGIC_1280=y
 # CONFIG_SCSI_DC395x is not set
 # CONFIG_SCSI_DC390T is not set
 # CONFIG_SCSI_DEBUG is not set
+CONFIG_SCSI_PMCRAID=m
+# CONFIG_SCSI_PM8001 is not set
 # CONFIG_SCSI_SRP is not set
+CONFIG_SCSI_BFA_FC=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_SCSI_OSD_DPRINT_SENSE=1
+# CONFIG_SCSI_OSD_DEBUG is not set
 # CONFIG_ATA is not set
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=y
+CONFIG_MD_AUTODETECT=y
 CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=y
 CONFIG_MD_RAID1=y
 CONFIG_MD_RAID10=m
 CONFIG_MD_RAID456=y
-CONFIG_MD_RAID5_RESHAPE=y
+# CONFIG_MULTICORE_RAID456 is not set
+CONFIG_MD_RAID6_PQ=y
+# CONFIG_ASYNC_RAID6_TEST is not set
 CONFIG_MD_MULTIPATH=m
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
@@ -519,36 +693,39 @@ CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_ZERO=m
 CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_EMC=m
-CONFIG_DM_MULTIPATH_RDAC=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
 # CONFIG_DM_DELAY is not set
-
-#
-# Fusion MPT device support
-#
+CONFIG_DM_UEVENT=y
 # CONFIG_FUSION is not set
-# CONFIG_FUSION_SPI is not set
-# CONFIG_FUSION_FC is not set
-# CONFIG_FUSION_SAS is not set
 
 #
 # IEEE 1394 (FireWire) support
 #
+
+#
+# You can enable one or both FireWire driver stacks.
+#
+
+#
+# The newer stack is recommended.
+#
 # CONFIG_FIREWIRE is not set
 # CONFIG_IEEE1394 is not set
 # CONFIG_I2O is not set
 CONFIG_NETDEVICES=y
-CONFIG_NETDEVICES_MULTIQUEUE=y
 CONFIG_IFB=m
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
 CONFIG_MACVLAN=m
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
+CONFIG_VETH=m
 # CONFIG_ARCNET is not set
-CONFIG_PHYLIB=m
+CONFIG_PHYLIB=y
 
 #
 # MII PHY device drivers
@@ -562,23 +739,51 @@ CONFIG_VITESSE_PHY=m
 CONFIG_SMSC_PHY=m
 # CONFIG_BROADCOM_PHY is not set
 CONFIG_ICPLUS_PHY=m
+CONFIG_REALTEK_PHY=m
+CONFIG_NATIONAL_PHY=m
+CONFIG_STE10XP=m
+CONFIG_LSI_ET1011C_PHY=m
 # CONFIG_FIXED_PHY is not set
+CONFIG_MDIO_BITBANG=m
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=y
 CONFIG_AX88796=m
+CONFIG_AX88796_93CX6=y
 CONFIG_SGI_IOC3_ETH=y
 # CONFIG_HAPPYMEAL is not set
 # CONFIG_SUNGEM is not set
 # CONFIG_CASSINI is not set
 # CONFIG_NET_VENDOR_3COM is not set
+CONFIG_SMC91X=m
 # CONFIG_DM9000 is not set
+CONFIG_ETHOC=m
+CONFIG_SMSC911X=m
+CONFIG_DNET=m
 # CONFIG_NET_TULIP is not set
 # CONFIG_HP100 is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_NET_PCI is not set
+CONFIG_B44=m
+CONFIG_B44_PCI_AUTOSELECT=y
+CONFIG_B44_PCICORE_AUTOSELECT=y
+CONFIG_B44_PCI=y
+CONFIG_KS8842=m
+CONFIG_KS8851_MLL=m
+CONFIG_ATL2=m
 CONFIG_NETDEV_1000=y
 # CONFIG_ACENIC is not set
 # CONFIG_DL2K is not set
 # CONFIG_E1000 is not set
+CONFIG_E1000E=m
+CONFIG_IP1000=m
+CONFIG_IGB=m
+CONFIG_IGBVF=m
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -588,24 +793,75 @@ CONFIG_NETDEV_1000=y
 # CONFIG_SKY2 is not set
 CONFIG_VIA_VELOCITY=m
 # CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
+CONFIG_BNX2=m
+CONFIG_CNIC=m
 CONFIG_QLA3XXX=m
 # CONFIG_ATL1 is not set
+CONFIG_ATL1E=m
+CONFIG_ATL1C=m
+CONFIG_JME=m
 CONFIG_NETDEV_10000=y
+CONFIG_MDIO=m
 # CONFIG_CHELSIO_T1 is not set
+CONFIG_CHELSIO_T3_DEPENDS=y
 CONFIG_CHELSIO_T3=m
+CONFIG_ENIC=m
+CONFIG_IXGBE=m
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+CONFIG_VXGE=m
+# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
 # CONFIG_MYRI10GE is not set
 CONFIG_NETXEN_NIC=m
-# CONFIG_MLX4_CORE is not set
+CONFIG_NIU=m
+CONFIG_MLX4_EN=m
+CONFIG_MLX4_CORE=m
+# CONFIG_MLX4_DEBUG is not set
+CONFIG_TEHUTI=m
+CONFIG_BNX2X=m
+CONFIG_QLGE=m
+CONFIG_SFC=m
+CONFIG_BE2NET=m
 # CONFIG_TR is not set
-
-#
-# Wireless LAN
-#
-# CONFIG_WLAN_PRE80211 is not set
-CONFIG_WLAN_80211=y
+CONFIG_WLAN=y
+CONFIG_LIBERTAS_THINFIRM=m
+CONFIG_ATMEL=m
+CONFIG_PCI_ATMEL=m
+CONFIG_PRISM54=m
+CONFIG_RTL8180=m
+CONFIG_ADM8211=m
+# CONFIG_MAC80211_HWSIM is not set
+CONFIG_MWL8K=m
+CONFIG_ATH_COMMON=m
+# CONFIG_ATH_DEBUG is not set
+CONFIG_ATH5K=m
+# CONFIG_ATH5K_DEBUG is not set
+CONFIG_ATH9K_HW=m
+CONFIG_ATH9K_COMMON=m
+CONFIG_ATH9K=m
+CONFIG_B43=m
+CONFIG_B43_PCI_AUTOSELECT=y
+CONFIG_B43_PCICORE_AUTOSELECT=y
+CONFIG_B43_PHY_LP=y
+CONFIG_B43_LEDS=y
+CONFIG_B43_HWRNG=y
+# CONFIG_B43_DEBUG is not set
+CONFIG_B43LEGACY=m
+CONFIG_B43LEGACY_PCI_AUTOSELECT=y
+CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
+CONFIG_B43LEGACY_LEDS=y
+CONFIG_B43LEGACY_HWRNG=y
+# CONFIG_B43LEGACY_DEBUG is not set
+CONFIG_B43LEGACY_DMA=y
+CONFIG_B43LEGACY_PIO=y
+CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
+# CONFIG_B43LEGACY_DMA_MODE is not set
+# CONFIG_B43LEGACY_PIO_MODE is not set
+CONFIG_HOSTAP=m
+CONFIG_HOSTAP_FIRMWARE=y
+CONFIG_HOSTAP_FIRMWARE_NVRAM=y
+CONFIG_HOSTAP_PLX=m
+CONFIG_HOSTAP_PCI=m
 CONFIG_IPW2100=m
 CONFIG_IPW2100_MONITOR=y
 CONFIG_IPW2100_DEBUG=y
@@ -615,38 +871,57 @@ CONFIG_IPW2200_RADIOTAP=y
 CONFIG_IPW2200_PROMISCUOUS=y
 CONFIG_IPW2200_QOS=y
 CONFIG_IPW2200_DEBUG=y
+CONFIG_LIBIPW=m
+# CONFIG_LIBIPW_DEBUG is not set
+CONFIG_IWLWIFI=m
+CONFIG_IWLWIFI_SPECTRUM_MEASUREMENT=y
+# CONFIG_IWLWIFI_DEBUG is not set
+CONFIG_IWLAGN=m
+CONFIG_IWL4965=y
+CONFIG_IWL5000=y
+CONFIG_IWL3945=m
+CONFIG_IWL3945_SPECTRUM_MEASUREMENT=y
 CONFIG_LIBERTAS=m
 # CONFIG_LIBERTAS_DEBUG is not set
 CONFIG_HERMES=m
+# CONFIG_HERMES_CACHE_FW_ON_INIT is not set
 CONFIG_PLX_HERMES=m
 CONFIG_TMD_HERMES=m
 CONFIG_NORTEL_HERMES=m
 CONFIG_PCI_HERMES=m
-CONFIG_ATMEL=m
-CONFIG_PCI_ATMEL=m
-CONFIG_PRISM54=m
-CONFIG_HOSTAP=m
-CONFIG_HOSTAP_FIRMWARE=y
-CONFIG_HOSTAP_FIRMWARE_NVRAM=y
-CONFIG_HOSTAP_PLX=m
-CONFIG_HOSTAP_PCI=m
-CONFIG_BCM43XX=m
-CONFIG_BCM43XX_DEBUG=y
-CONFIG_BCM43XX_DMA=y
-CONFIG_BCM43XX_PIO=y
-CONFIG_BCM43XX_DMA_AND_PIO_MODE=y
-# CONFIG_BCM43XX_DMA_MODE is not set
-# CONFIG_BCM43XX_PIO_MODE is not set
+CONFIG_P54_COMMON=m
+CONFIG_P54_PCI=m
+CONFIG_P54_LEDS=y
+CONFIG_RT2X00=m
+CONFIG_RT2400PCI=m
+CONFIG_RT2500PCI=m
+CONFIG_RT61PCI=m
+CONFIG_RT2800PCI_PCI=m
+CONFIG_RT2800PCI=m
+CONFIG_RT2800_LIB=m
+CONFIG_RT2X00_LIB_PCI=m
+CONFIG_RT2X00_LIB=m
+CONFIG_RT2X00_LIB_HT=y
+CONFIG_RT2X00_LIB_FIRMWARE=y
+CONFIG_RT2X00_LIB_CRYPTO=y
+CONFIG_RT2X00_LIB_LEDS=y
+# CONFIG_RT2X00_DEBUG is not set
+CONFIG_WL12XX=m
+CONFIG_WL1251=m
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
 # CONFIG_WAN is not set
 # CONFIG_FDDI is not set
 # CONFIG_HIPPI is not set
 # CONFIG_PPP is not set
 # CONFIG_SLIP is not set
 # CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_VMXNET3 is not set
 # CONFIG_ISDN is not set
 # CONFIG_PHONE is not set
 
@@ -664,13 +939,16 @@ CONFIG_SERIO_SERPORT=y
 # CONFIG_SERIO_PCIPS2 is not set
 CONFIG_SERIO_LIBPS2=m
 CONFIG_SERIO_RAW=m
+CONFIG_SERIO_ALTERA_PS2=m
 # CONFIG_GAMEPORT is not set
 
 #
 # Character devices
 #
 # CONFIG_VT is not set
+CONFIG_DEVKMEM=y
 # CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_NOZOMI=m
 
 #
 # Serial drivers
@@ -693,95 +971,258 @@ CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
 # CONFIG_IPMI_HANDLER is not set
-# CONFIG_WATCHDOG is not set
 CONFIG_HW_RANDOM=m
-# CONFIG_RTC is not set
+CONFIG_HW_RANDOM_TIMERIOMEM=m
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
-# CONFIG_DRM is not set
 # CONFIG_RAW_DRIVER is not set
 # CONFIG_TCG_TPM is not set
 CONFIG_DEVPORT=y
-# CONFIG_I2C is not set
+CONFIG_I2C=m
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_COMPAT=y
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_HELPER_AUTO=y
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCA=m
 
 #
-# SPI support
+# I2C Hardware Bus support
 #
+
+#
+# PC SMBus host controller drivers
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI1563=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_ISCH=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+CONFIG_I2C_OCORES=m
+CONFIG_I2C_SIMTEC=m
+
+#
+# External I2C/SMBus adapter drivers
+#
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_TAOS_EVM=m
+
+#
+# Other I2C/SMBus bus drivers
+#
+CONFIG_I2C_PCA_PLATFORM=m
+CONFIG_I2C_STUB=m
+
+#
+# Miscellaneous I2C Chip support
+#
+CONFIG_SENSORS_TSL2550=m
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
 # CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
+
+#
+# PPS support
+#
+CONFIG_PPS=m
+# CONFIG_PPS_DEBUG is not set
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 # CONFIG_HWMON is not set
+CONFIG_THERMAL=m
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+CONFIG_SSB=m
+CONFIG_SSB_SPROM=y
+CONFIG_SSB_PCIHOST_POSSIBLE=y
+CONFIG_SSB_PCIHOST=y
+CONFIG_SSB_B43_PCI_BRIDGE=y
+# CONFIG_SSB_SILENT is not set
+# CONFIG_SSB_DEBUG is not set
+CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
+CONFIG_SSB_DRIVER_PCICORE=y
+# CONFIG_SSB_DRIVER_MIPS is not set
 
 #
 # Multifunction device drivers
 #
+# CONFIG_MFD_CORE is not set
 # CONFIG_MFD_SM501 is not set
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-# CONFIG_DVB_CORE is not set
-# CONFIG_DAB is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_WM8400 is not set
+CONFIG_MFD_WM8350=m
+CONFIG_MFD_WM8350_I2C=m
+CONFIG_MFD_PCF50633=m
+CONFIG_PCF50633_ADC=m
+CONFIG_PCF50633_GPIO=m
+CONFIG_AB3100_CORE=m
+CONFIG_AB3100_OTP=m
+# CONFIG_REGULATOR is not set
+# CONFIG_MEDIA_SUPPORT is not set
 
 #
 # Graphics support
 #
+# CONFIG_VGA_ARB is not set
+# CONFIG_DRM is not set
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
 # Display device support
 #
 # CONFIG_DISPLAY_SUPPORT is not set
-# CONFIG_VGASTATE is not set
-# CONFIG_VIDEO_OUTPUT_CONTROL is not set
-# CONFIG_FB is not set
-
-#
-# Sound
-#
 # CONFIG_SOUND is not set
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
 CONFIG_USB_ARCH_HAS_EHCI=y
 # CONFIG_USB is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
 
 #
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# Enable Host or Gadget support to see Inventra options
 #
 
 #
-# USB Gadget Support
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 # CONFIG_USB_GADGET is not set
+
+#
+# OTG and related infrastructure
+#
+# CONFIG_UWB is not set
 # CONFIG_MMC is not set
-# CONFIG_NEW_LEDS is not set
+# CONFIG_MEMSTICK is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+
+#
+# LED drivers
+#
+CONFIG_LEDS_LP3944=m
+CONFIG_LEDS_PCA955X=m
+CONFIG_LEDS_WM8350=m
+CONFIG_LEDS_BD2802=m
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
+# CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
-# CONFIG_RTC_CLASS is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+# CONFIG_RTC_DEBUG is not set
 
 #
-# DMA Engine support
+# RTC interfaces
 #
-# CONFIG_DMA_ENGINE is not set
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
 
 #
-# DMA Clients
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_BQ32K is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+# CONFIG_RTC_DRV_RX8025 is not set
+
+#
+# SPI RTC drivers
 #
 
 #
-# DMA Devices
+# Platform RTC drivers
 #
+# CONFIG_RTC_DRV_CMOS is not set
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+CONFIG_RTC_DRV_M48T35=y
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_MSM6242 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_RP5C01 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+# CONFIG_RTC_DRV_WM8350 is not set
+# CONFIG_RTC_DRV_PCF50633 is not set
+CONFIG_RTC_DRV_AB3100=m
 
 #
-# Userspace I/O
+# on-CPU RTC drivers
 #
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
 CONFIG_UIO=y
 # CONFIG_UIO_CIF is not set
+# CONFIG_UIO_PDRV is not set
+# CONFIG_UIO_PDRV_GENIRQ is not set
+CONFIG_UIO_SMX=m
+CONFIG_UIO_AEC=m
+CONFIG_UIO_SERCOS3=m
+CONFIG_UIO_PCI_GENERIC=m
+
+#
+# TI VLYNQ
+#
+# CONFIG_STAGING is not set
 
 #
 # File systems
@@ -792,35 +1233,57 @@ CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
 # CONFIG_EXT2_FS_XIP is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
-# CONFIG_EXT4DEV_FS is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_XATTR=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_EXT4_DEBUG is not set
 CONFIG_JBD=y
-CONFIG_JBD_DEBUG=y
+CONFIG_JBD2=y
 CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 CONFIG_FS_POSIX_ACL=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
-CONFIG_XFS_SECURITY=y
 CONFIG_XFS_POSIX_ACL=y
 # CONFIG_XFS_RT is not set
+# CONFIG_XFS_DEBUG is not set
 # CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+# CONFIG_NILFS2_FS is not set
+CONFIG_FILE_LOCKING=y
+CONFIG_FSNOTIFY=y
+CONFIG_DNOTIFY=y
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
+CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QUOTACTL=y
-CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=m
 # CONFIG_AUTOFS4_FS is not set
 CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
 CONFIG_GENERIC_ACL=y
 
+#
+# Caches
+#
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+# CONFIG_FSCACHE_HISTOGRAM is not set
+# CONFIG_FSCACHE_DEBUG is not set
+# CONFIG_FSCACHE_OBJECT_LIST is not set
+CONFIG_CACHEFILES=m
+# CONFIG_CACHEFILES_DEBUG is not set
+# CONFIG_CACHEFILES_HISTOGRAM is not set
+
 #
 # CD-ROM/DVD Filesystems
 #
@@ -840,16 +1303,13 @@ CONFIG_GENERIC_ACL=y
 CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 # CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
 CONFIG_CONFIGFS_FS=m
-
-#
-# Miscellaneous filesystems
-#
+CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ADFS_FS is not set
 # CONFIG_AFFS_FS is not set
 # CONFIG_ECRYPT_FS is not set
@@ -859,28 +1319,32 @@ CONFIG_CONFIGFS_FS=m
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
 # CONFIG_CRAMFS is not set
+CONFIG_SQUASHFS=m
+# CONFIG_SQUASHFS_EMBEDDED is not set
+CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
 # CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+CONFIG_OMFS_FS=m
 # CONFIG_HPFS_FS is not set
 # CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
+CONFIG_EXOFS_FS=m
+# CONFIG_EXOFS_DEBUG is not set
+CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_NFS_V3_ACL is not set
 # CONFIG_NFS_V4 is not set
-# CONFIG_NFS_DIRECTIO is not set
-# CONFIG_NFSD is not set
 # CONFIG_ROOT_NFS is not set
+# CONFIG_NFSD is not set
 CONFIG_LOCKD=y
 CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_BIND34 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -910,35 +1374,37 @@ CONFIG_SGI_PARTITION=y
 # CONFIG_KARMA_PARTITION is not set
 # CONFIG_EFI_PARTITION is not set
 # CONFIG_SYSV68_PARTITION is not set
-
-#
-# Native Language Support
-#
 # CONFIG_NLS is not set
-
-#
-# Distributed Lock Manager
-#
 CONFIG_DLM=m
 # CONFIG_DLM_DEBUG is not set
 
-#
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
 #
 # Kernel hacking
 #
 CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 # CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=2048
 # CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_UNUSED_SYMBOLS is not set
 # CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
 # CONFIG_DEBUG_KERNEL is not set
-CONFIG_CROSSCOMPILE=y
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_TRACING_SUPPORT=y
+# CONFIG_FTRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+CONFIG_EARLY_PRINTK=y
 # CONFIG_CMDLINE_BOOL is not set
 
 #
@@ -947,65 +1413,140 @@ CONFIG_CROSSCOMPILE=y
 CONFIG_KEYS=y
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
 # CONFIG_SECURITY is not set
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
+CONFIG_SECURITYFS=y
+# CONFIG_DEFAULT_SECURITY_SELINUX is not set
+# CONFIG_DEFAULT_SECURITY_SMACK is not set
+# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEFAULT_SECURITY=""
+CONFIG_XOR_BLOCKS=y
+CONFIG_ASYNC_CORE=y
+CONFIG_ASYNC_MEMCPY=y
+CONFIG_ASYNC_XOR=y
+CONFIG_ASYNC_PQ=y
+CONFIG_ASYNC_RAID6_RECOV=y
 CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_FIPS=y
 CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ABLKCIPHER=m
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD=m
+CONFIG_CRYPTO_AEAD2=y
 CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG=m
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+CONFIG_CRYPTO_GF128MUL=m
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_WORKQUEUE=y
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_AUTHENC=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_SEQIV=m
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CTR=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+
+#
+# Hash modes
+#
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_VMAC=m
+
+#
+# Digest
+#
+CONFIG_CRYPTO_CRC32C=m
+CONFIG_CRYPTO_GHASH=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA1=m
 CONFIG_CRYPTO_SHA256=m
 CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_GF128MUL=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_DES=y
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_WP512=m
+
+#
+# Ciphers
+#
 CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
+
+#
+# Compression
+#
 CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CAMELLIA=m
-# CONFIG_CRYPTO_TEST is not set
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
+
+#
+# Random Number Generation
+#
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_HW=y
+CONFIG_CRYPTO_DEV_HIFN_795X=m
+# CONFIG_CRYPTO_DEV_HIFN_795X_RNG is not set
+# CONFIG_BINARY_PRINTF is not set
 
 #
 # Library routines
 #
 CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
 CONFIG_CRC_CCITT=m
-# CONFIG_CRC16 is not set
-# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC16=y
+CONFIG_CRC_T10DIF=m
+CONFIG_CRC_ITU_T=m
 CONFIG_CRC32=y
-# CONFIG_CRC7 is not set
+CONFIG_CRC7=m
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=m
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_PLIST=y
+CONFIG_LZO_COMPRESS=m
+CONFIG_LZO_DECOMPRESS=m
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y

From 63731c964d6cd9de4800891bd33b6f9e47a249bc Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 4 Feb 2010 15:48:49 -0800
Subject: [PATCH 404/640] MIPS: Fix __devinit __cpuinit confusion in
 cpu_cache_init

cpu_cache_init and the things it calls should all be __cpuinit instead
of __devinit.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/938/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/c-octeon.c | 4 ++--
 arch/mips/mm/cache.c    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index 94e05e5733c1..e06f1af760a7 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -174,7 +174,7 @@ static void octeon_flush_cache_page(struct vm_area_struct *vma,
  * Probe Octeon's caches
  *
  */
-static void __devinit probe_octeon(void)
+static void __cpuinit probe_octeon(void)
 {
 	unsigned long icache_size;
 	unsigned long dcache_size;
@@ -235,7 +235,7 @@ static void __devinit probe_octeon(void)
  * Setup the Octeon cache flush routines
  *
  */
-void __devinit octeon_cache_init(void)
+void __cpuinit octeon_cache_init(void)
 {
 	extern unsigned long ebase;
 	extern char except_vec2_octeon;
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 102b2dfa542a..e716cafc346d 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -155,7 +155,7 @@ static inline void setup_protection_map(void)
 	protection_map[15] = PAGE_SHARED;
 }
 
-void __devinit cpu_cache_init(void)
+void __cpuinit cpu_cache_init(void)
 {
 	if (cpu_has_3k_cache) {
 		extern void __weak r3k_cache_init(void);

From c2d5b5e525a354987b9c3de3661133f982bf9ba0 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 6 Feb 2010 09:42:16 +0100
Subject: [PATCH 405/640] MIPS: SNI: Correct NULL test

Test the value that was just allocated rather than the previously tested one.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@r@
expression *x;
expression e;
identifier l;
@@

if (x == NULL || ...) {
    ... when forall
    return ...; }
... when != goto l;
    when != x = e
    when != &x
*x == NULL
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
To: linux-mips@linux-mips.org
To: linux-kernel@vger.kernel.org
To: kernel-janitors@vger.kernel.org
Patchwork: http://patchwork.linux-mips.org/patch/945/
Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sni/rm200.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c
index 46f00691f448..31e2583ec622 100644
--- a/arch/mips/sni/rm200.c
+++ b/arch/mips/sni/rm200.c
@@ -404,7 +404,7 @@ void __init sni_rm200_i8259_irqs(void)
 	if (!rm200_pic_master)
 		return;
 	rm200_pic_slave = ioremap_nocache(0x160000a0, 4);
-	if (!rm200_pic_master) {
+	if (!rm200_pic_slave) {
 		iounmap(rm200_pic_master);
 		return;
 	}

From 5b7efa898b357e6ebe4024c520e62024eb969b5f Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Mon, 8 Feb 2010 12:27:00 -0800
Subject: [PATCH 406/640] MIPS: Don't probe reserved EntryHi bits.

The patch that adds cpu_probe_vmbits is erroneously writing to reserved
bit 12.  Since we are really only probing high bits, don't write this bit
with a one.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Cc: Guenter Roeck <guenter.roeck@ericsson.com>
Patchwork: http://patchwork.linux-mips.org/patch/949/
Acked-by: Guenter Roeck <guenter.roeck@ericsson.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cpu-probe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 9c187a64649b..758ad426c57f 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -287,9 +287,9 @@ static inline int __cpu_has_fpu(void)
 static inline void cpu_probe_vmbits(struct cpuinfo_mips *c)
 {
 #ifdef __NEED_VMBITS_PROBE
-	write_c0_entryhi(0x3ffffffffffff000ULL);
+	write_c0_entryhi(0x3fffffffffffe000ULL);
 	back_to_back_c0_hazard();
-	c->vmbits = fls64(read_c0_entryhi() & 0x3ffffffffffff000ULL);
+	c->vmbits = fls64(read_c0_entryhi() & 0x3fffffffffffe000ULL);
 #endif
 }
 

From 99fcb766a3a50466fe31d743260a3400c1aee855 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 7 Feb 2010 16:20:18 +0100
Subject: [PATCH 407/640] drm/i915: Update write_domains on active list after
 flush.

Before changing the status of a buffer with a pending write we will await
upon a new flush for that buffer. So we can take advantage of any flushes
posted whilst the buffer is active and pending processing by the GPU, by
clearing its write_domain and updating its last_rendering_seqno -- thus
saving a potential flush in deep queues and improves flushing behaviour
upon eviction for both GTT space and fences.

In order to reduce the time spent searching the active list for matching
write_domains, we move those to a separate list whose elements are
the buffers belong to the active/flushing list with pending writes.

Orignal patch by Chris Wilson <chris@chris-wilson.co.uk>, forward-ported
by me.

In addition to better performance, this also fixes a real bug. Before
this changes, i915_gem_evict_everything didn't work as advertised. When
the gpu was actually busy and processing request, the flush and subsequent
wait would not move active and dirty buffers to the inactive list, but
just to the flushing list. Which triggered the BUG_ON at the end of this
function. With the more tight dirty buffer tracking, all currently busy and
dirty buffers get moved to the inactive list by one i915_gem_flush operation.

I've left the BUG_ON I've used to prove this in there.

References:
  Bug 25911 - 2.10.0 causes kernel oops and system hangs
  http://bugs.freedesktop.org/show_bug.cgi?id=25911

  Bug 26101 - [i915] xf86-video-intel 2.10.0 (and git) triggers kernel oops
              within seconds after login
  http://bugs.freedesktop.org/show_bug.cgi?id=26101

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Adam Lantos <hege@playma.org>
Cc: stable@kernel.org
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.h | 11 +++++++++++
 drivers/gpu/drm/i915/i915_gem.c | 23 +++++++++++++++++++----
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index aaf934d96f21..b99b6a841d95 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -492,6 +492,15 @@ typedef struct drm_i915_private {
 		 */
 		struct list_head flushing_list;
 
+		/**
+		 * List of objects currently pending a GPU write flush.
+		 *
+		 * All elements on this list will belong to either the
+		 * active_list or flushing_list, last_rendering_seqno can
+		 * be used to differentiate between the two elements.
+		 */
+		struct list_head gpu_write_list;
+
 		/**
 		 * LRU list of objects which are not in the ringbuffer and
 		 * are ready to unbind, but are still in the GTT.
@@ -592,6 +601,8 @@ struct drm_i915_gem_object {
 
 	/** This object's place on the active/flushing/inactive lists */
 	struct list_head list;
+	/** This object's place on GPU write list */
+	struct list_head gpu_write_list;
 
 	/** This object's place on the fenced object LRU */
 	struct list_head fence_list;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b4c8c0230689..11daa618385f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1552,6 +1552,8 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
 	else
 		list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
 
+	BUG_ON(!list_empty(&obj_priv->gpu_write_list));
+
 	obj_priv->last_rendering_seqno = 0;
 	if (obj_priv->active) {
 		obj_priv->active = 0;
@@ -1622,7 +1624,8 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
 		struct drm_i915_gem_object *obj_priv, *next;
 
 		list_for_each_entry_safe(obj_priv, next,
-					 &dev_priv->mm.flushing_list, list) {
+					 &dev_priv->mm.gpu_write_list,
+					 gpu_write_list) {
 			struct drm_gem_object *obj = obj_priv->obj;
 
 			if ((obj->write_domain & flush_domains) ==
@@ -1630,6 +1633,7 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
 				uint32_t old_write_domain = obj->write_domain;
 
 				obj->write_domain = 0;
+				list_del_init(&obj_priv->gpu_write_list);
 				i915_gem_object_move_to_active(obj, seqno);
 
 				trace_i915_gem_object_change_domain(obj,
@@ -2084,8 +2088,8 @@ static int
 i915_gem_evict_everything(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t seqno;
 	int ret;
+	uint32_t seqno;
 	bool lists_empty;
 
 	spin_lock(&dev_priv->mm.active_list_lock);
@@ -2107,6 +2111,8 @@ i915_gem_evict_everything(struct drm_device *dev)
 	if (ret)
 		return ret;
 
+	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
+
 	ret = i915_gem_evict_from_inactive_list(dev);
 	if (ret)
 		return ret;
@@ -2701,7 +2707,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
 	old_write_domain = obj->write_domain;
 	i915_gem_flush(dev, 0, obj->write_domain);
 	seqno = i915_add_request(dev, NULL, obj->write_domain);
-	obj->write_domain = 0;
+	BUG_ON(obj->write_domain);
 	i915_gem_object_move_to_active(obj, seqno);
 
 	trace_i915_gem_object_change_domain(obj,
@@ -3850,16 +3856,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		i915_gem_flush(dev,
 			       dev->invalidate_domains,
 			       dev->flush_domains);
-		if (dev->flush_domains)
+		if (dev->flush_domains & I915_GEM_GPU_DOMAINS)
 			(void)i915_add_request(dev, file_priv,
 					       dev->flush_domains);
 	}
 
 	for (i = 0; i < args->buffer_count; i++) {
 		struct drm_gem_object *obj = object_list[i];
+		struct drm_i915_gem_object *obj_priv = obj->driver_private;
 		uint32_t old_write_domain = obj->write_domain;
 
 		obj->write_domain = obj->pending_write_domain;
+		if (obj->write_domain)
+			list_move_tail(&obj_priv->gpu_write_list,
+				       &dev_priv->mm.gpu_write_list);
+		else
+			list_del_init(&obj_priv->gpu_write_list);
+
 		trace_i915_gem_object_change_domain(obj,
 						    obj->read_domains,
 						    old_write_domain);
@@ -4370,6 +4383,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	obj_priv->obj = obj;
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
+	INIT_LIST_HEAD(&obj_priv->gpu_write_list);
 	INIT_LIST_HEAD(&obj_priv->fence_list);
 	obj_priv->madv = I915_MADV_WILLNEED;
 
@@ -4821,6 +4835,7 @@ i915_gem_load(struct drm_device *dev)
 	spin_lock_init(&dev_priv->mm.active_list_lock);
 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
 	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
+	INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.request_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);

From fd2e8ea597222b8f38ae8948776a61ea7958232e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 9 Feb 2010 14:14:36 +0000
Subject: [PATCH 408/640] drm/i915: Increase fb alignment to 64k

An untiled framebuffer must be aligned to 64k. This is normally handled
by intel_pin_and_fence_fb_obj(), but the intelfb_create() likes to be
different and do the pinning itself. However, it aligns the buffer
object incorrectly for pre-i965 chipsets causing a PGTBL_ERR when it is
installed onto the output.

Fixes:
  KMS error message while initializing modesetting -
  render error detected: EIR: 0x10 [i915]
  http://bugs.freedesktop.org/show_bug.cgi?id=22936

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 371d753e362b..aaabbcbe5905 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -148,7 +148,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 
 	mutex_lock(&dev->struct_mutex);
 
-	ret = i915_gem_object_pin(fbo, PAGE_SIZE);
+	ret = i915_gem_object_pin(fbo, 64*1024);
 	if (ret) {
 		DRM_ERROR("failed to pin fb: %d\n", ret);
 		goto out_unref;

From ee25df2bc379728c45d81e04cf87984db1425edf Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Sat, 6 Feb 2010 10:41:53 -0800
Subject: [PATCH 409/640] drm/i915: handle FBC and self-refresh better

On 945, we need to avoid entering self-refresh if the compressor is
busy, or we may cause display FIFO underruns leading to ugly flicker.

Fixes fdo bug #24314, kernel bug #15043.

Tested-by: Alexander Lam <lambchop468@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Tested-by: Julien Cristau <jcristau@debian.org> (fd.o #25371)
Cc: stable@kernel.org
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_reg.h      | 1 +
 drivers/gpu/drm/i915/intel_display.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 847006c5218e..ab1bd2d3d3b6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -338,6 +338,7 @@
 #define   FBC_CTL_PERIODIC	(1<<30)
 #define   FBC_CTL_INTERVAL_SHIFT (16)
 #define   FBC_CTL_UNCOMPRESSIBLE (1<<14)
+#define   FBC_C3_IDLE		(1<<13)
 #define   FBC_CTL_STRIDE_SHIFT	(5)
 #define   FBC_CTL_FENCENO	(1<<0)
 #define FBC_COMMAND		0x0320c
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7e9c835f9ae0..a4d382c8bf58 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1031,6 +1031,8 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
 
 	/* enable it... */
 	fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
+	if (IS_I945GM(dev))
+		fbc_ctl |= FBC_C3_IDLE; /* 945 needs special SR handling */
 	fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
 	fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
 	if (obj_priv->tiling_mode != I915_TILING_NONE)

From b1b87f6b65a770a69f3632cf7c1f9182547c1249 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 26 Jan 2010 14:40:05 -0800
Subject: [PATCH 410/640] drm/i915: untangle page flip completion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a new page flip is requested, we need to both queue an unpin for
the current framebuffer, and also increment the flip pending count on
the newly submitted buffer.

At flip finish time, we need to unpin the old fb and decrement the flip
pending count on the new buffer.

The old code was conflating the two, and led to hangs when new direct
rendered apps were started, replacing the existing frame buffer.  This
patch splits out the buffers and prevents the hangs.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_display.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a4d382c8bf58..dc6ffe82d2cd 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4081,7 +4081,8 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
 struct intel_unpin_work {
 	struct work_struct work;
 	struct drm_device *dev;
-	struct drm_gem_object *obj;
+	struct drm_gem_object *old_fb_obj;
+	struct drm_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
 	int pending;
 };
@@ -4092,8 +4093,8 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 		container_of(__work, struct intel_unpin_work, work);
 
 	mutex_lock(&work->dev->struct_mutex);
-	i915_gem_object_unpin(work->obj);
-	drm_gem_object_unreference(work->obj);
+	i915_gem_object_unpin(work->old_fb_obj);
+	drm_gem_object_unreference(work->old_fb_obj);
 	mutex_unlock(&work->dev->struct_mutex);
 	kfree(work);
 }
@@ -4117,7 +4118,7 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe)
 	work = intel_crtc->unpin_work;
 	if (work == NULL || !work->pending) {
 		if (work && !work->pending) {
-			obj_priv = work->obj->driver_private;
+			obj_priv = work->pending_flip_obj->driver_private;
 			DRM_DEBUG_DRIVER("flip finish: %p (%d) not pending?\n",
 					 obj_priv,
 					 atomic_read(&obj_priv->pending_flip));
@@ -4142,7 +4143,7 @@ void intel_finish_page_flip(struct drm_device *dev, int pipe)
 
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
-	obj_priv = work->obj->driver_private;
+	obj_priv = work->pending_flip_obj->driver_private;
 
 	/* Initial scanout buffer will have a 0 pending flip count */
 	if ((atomic_read(&obj_priv->pending_flip) == 0) ||
@@ -4191,7 +4192,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	work->event = event;
 	work->dev = crtc->dev;
 	intel_fb = to_intel_framebuffer(crtc->fb);
-	work->obj = intel_fb->obj;
+	work->old_fb_obj = intel_fb->obj;
 	INIT_WORK(&work->work, intel_unpin_work_fn);
 
 	/* We borrow the event spin lock for protecting unpin_work */
@@ -4220,13 +4221,14 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	}
 
 	/* Reference the old fb object for the scheduled work. */
-	drm_gem_object_reference(work->obj);
+	drm_gem_object_reference(work->old_fb_obj);
 
 	crtc->fb = fb;
 	i915_gem_object_flush_write_domain(obj);
 	drm_vblank_get(dev, intel_crtc->pipe);
 	obj_priv = obj->driver_private;
 	atomic_inc(&obj_priv->pending_flip);
+	work->pending_flip_obj = obj;
 
 	BEGIN_LP_RING(4);
 	OUT_RING(MI_DISPLAY_FLIP |

From f072d2e77128c5b332ce217764cf170b660b99dc Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Tue, 9 Feb 2010 09:46:19 +0800
Subject: [PATCH 411/640] drm/i915: fix flip done interrupt on Ironlake

On Ironlake plane flip interrupt means flip done event already, the
behavior is not like old chips, and perform like other usual interrupt.
So only need to handle flip done event when receiving that interrupt.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_irq.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 50ddf4a95c5e..a17d6bdfe63e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -309,22 +309,22 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 	if (de_iir & DE_GSE)
 		ironlake_opregion_gse_intr(dev);
 
-	if (de_iir & DE_PLANEA_FLIP_DONE)
+	if (de_iir & DE_PLANEA_FLIP_DONE) {
 		intel_prepare_page_flip(dev, 0);
-
-	if (de_iir & DE_PLANEB_FLIP_DONE)
-		intel_prepare_page_flip(dev, 1);
-
-	if (de_iir & DE_PIPEA_VBLANK) {
-		drm_handle_vblank(dev, 0);
 		intel_finish_page_flip(dev, 0);
 	}
 
-	if (de_iir & DE_PIPEB_VBLANK) {
-		drm_handle_vblank(dev, 1);
+	if (de_iir & DE_PLANEB_FLIP_DONE) {
+		intel_prepare_page_flip(dev, 1);
 		intel_finish_page_flip(dev, 1);
 	}
 
+	if (de_iir & DE_PIPEA_VBLANK)
+		drm_handle_vblank(dev, 0);
+
+	if (de_iir & DE_PIPEB_VBLANK)
+		drm_handle_vblank(dev, 1);
+
 	/* check event from PCH */
 	if ((de_iir & DE_PCH_EVENT) &&
 	    (pch_iir & SDE_HOTPLUG_MASK)) {

From aacef09b59e99d9e919ede74d107d5d7f3721432 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Tue, 9 Feb 2010 09:46:20 +0800
Subject: [PATCH 412/640] drm/i915: fix pipe source image setting in flip
 command

The MI_DISPLAY_FLIP command needs to be set the same pipe
source image like in pipe source register, e.g source image
size minus one. This fixes screen corrupt issue on Ironlake.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_display.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index dc6ffe82d2cd..c161ace7132d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4180,7 +4180,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_unpin_work *work;
 	unsigned long flags;
-	int ret;
+	int pipesrc_reg = (intel_crtc->pipe == 0) ? PIPEASRC : PIPEBSRC;
+	int ret, pipesrc;
 	RING_LOCALS;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -4236,7 +4237,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	OUT_RING(fb->pitch);
 	if (IS_I965G(dev)) {
 		OUT_RING(obj_priv->gtt_offset | obj_priv->tiling_mode);
-		OUT_RING((fb->width << 16) | fb->height);
+		pipesrc = I915_READ(pipesrc_reg); 
+		OUT_RING(pipesrc & 0x0fff0fff);
 	} else {
 		OUT_RING(obj_priv->gtt_offset);
 		OUT_RING(MI_NOOP);

From a40e8d3139e9eb54bf1d29f91639a6c5e05f652e Mon Sep 17 00:00:00 2001
From: Owain Ainsworth <zerooa@googlemail.com>
Date: Tue, 9 Feb 2010 14:25:55 +0000
Subject: [PATCH 413/640] drm/i915: Correctly return -ENOMEM on allocation
 failure in cmdbuf ioctls.

Signed-off-by: Owain G. Ainsworth <oga@openbsd.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_dma.c | 4 +++-
 drivers/gpu/drm/i915/i915_gem.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e660ac07f3b2..2307f98349f7 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -735,8 +735,10 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 	if (cmdbuf->num_cliprects) {
 		cliprects = kcalloc(cmdbuf->num_cliprects,
 				    sizeof(struct drm_clip_rect), GFP_KERNEL);
-		if (cliprects == NULL)
+		if (cliprects == NULL) {
+			ret = -ENOMEM;
 			goto fail_batch_free;
+		}
 
 		ret = copy_from_user(cliprects, cmdbuf->cliprects,
 				     cmdbuf->num_cliprects *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 11daa618385f..ec8a0d7ffa39 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3688,8 +3688,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (args->num_cliprects != 0) {
 		cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
 				    GFP_KERNEL);
-		if (cliprects == NULL)
+		if (cliprects == NULL) {
+			ret = -ENOMEM;
 			goto pre_mutex_err;
+		}
 
 		ret = copy_from_user(cliprects,
 				     (struct drm_clip_rect __user *)

From dd19e44b28b12f7ea59ebb54d8ea18054da7f9d1 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sat, 30 Jan 2010 15:41:00 +0100
Subject: [PATCH 414/640] drm/nouveau: move dereferences after null checks

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_connector.c |  7 ++++---
 drivers/gpu/drm/nouveau/nouveau_object.c    |  3 ++-
 drivers/gpu/drm/nouveau/nouveau_sgdma.c     |  7 ++++---
 drivers/gpu/drm/nouveau/nv50_crtc.c         | 11 +++++++----
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 7e6d673f3a23..d2f63353ea97 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -88,13 +88,14 @@ nouveau_connector_destroy(struct drm_connector *drm_connector)
 {
 	struct nouveau_connector *nv_connector =
 		nouveau_connector(drm_connector);
-	struct drm_device *dev = nv_connector->base.dev;
-
-	NV_DEBUG_KMS(dev, "\n");
+	struct drm_device *dev;
 
 	if (!nv_connector)
 		return;
 
+	dev = nv_connector->base.dev;
+	NV_DEBUG_KMS(dev, "\n");
+
 	kfree(nv_connector->edid);
 	drm_sysfs_connector_remove(drm_connector);
 	drm_connector_cleanup(drm_connector);
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 6c2cf81716df..e7c100ba63a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -885,11 +885,12 @@ int
 nouveau_gpuobj_sw_new(struct nouveau_channel *chan, int class,
 		      struct nouveau_gpuobj **gpuobj_ret)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct drm_nouveau_private *dev_priv;
 	struct nouveau_gpuobj *gpuobj;
 
 	if (!chan || !gpuobj_ret || *gpuobj_ret != NULL)
 		return -EINVAL;
+	dev_priv = chan->dev->dev_private;
 
 	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
 	if (!gpuobj)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 4c7f1e403e80..ed1590577b6c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -54,11 +54,12 @@ static void
 nouveau_sgdma_clear(struct ttm_backend *be)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
-	struct drm_device *dev = nvbe->dev;
-
-	NV_DEBUG(nvbe->dev, "\n");
+	struct drm_device *dev;
 
 	if (nvbe && nvbe->pages) {
+		dev = nvbe->dev;
+		NV_DEBUG(dev, "\n");
+
 		if (nvbe->bound)
 			be->func->unbind(be);
 
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 40b7360841f8..d1a651e3400c 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -298,14 +298,17 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 static void
 nv50_crtc_destroy(struct drm_crtc *crtc)
 {
-	struct drm_device *dev = crtc->dev;
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-
-	NV_DEBUG_KMS(dev, "\n");
+	struct drm_device *dev;
+	struct nouveau_crtc *nv_crtc;
 
 	if (!crtc)
 		return;
 
+	dev = crtc->dev;
+	nv_crtc = nouveau_crtc(crtc);
+
+	NV_DEBUG_KMS(dev, "\n");
+
 	drm_crtc_cleanup(&nv_crtc->base);
 
 	nv50_cursor_fini(nv_crtc);

From 13876c6e5fec94e9ea51b73ac025583dd7655345 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 10 Feb 2010 20:50:34 +1000
Subject: [PATCH 415/640] nouveau: fix state detection with switchable graphics

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_acpi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 1cf488247a16..48227e744753 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -90,21 +90,21 @@ int nouveau_hybrid_setup(struct drm_device *dev)
 {
 	int result;
 
-	if (nouveau_dsm(dev, NOUVEAU_DSM_ACTIVE, NOUVEAU_DSM_ACTIVE_QUERY,
+	if (nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STATE,
 								&result))
 		return -ENODEV;
 
 	NV_INFO(dev, "_DSM hardware status gave 0x%x\n", result);
 
-	if (result & 0x1) {	/* Stamina mode - disable the external GPU */
+	if (result) { /* Ensure that the external GPU is enabled */
+		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_SPEED, NULL);
+		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_SPEED,
+									NULL);
+	} else { /* Stamina mode - disable the external GPU */
 		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_STAMINA,
 									NULL);
 		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STAMINA,
 									NULL);
-	} else {		/* Ensure that the external GPU is enabled */
-		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_SPEED, NULL);
-		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_SPEED,
-									NULL);
 	}
 
 	return 0;

From 6719fc663c6cd30da5dd02d08aaefb031a7a98fd Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 9 Feb 2010 12:31:08 +1000
Subject: [PATCH 416/640] drm/radeon/kms: fix screen clearing before fbcon.

This memset_io was added to debug something way back and got
left behind, memset the fb to black so the borders don't be all white.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_fb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 66055b3d8668..23aa393634f2 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -248,7 +248,7 @@ int radeonfb_create(struct drm_device *dev,
 	if (ret)
 		goto out_unref;
 
-	memset_io(fbptr, 0xff, aligned_size);
+	memset_io(fbptr, 0x0, aligned_size);
 
 	strcpy(info->fix.id, "radeondrmfb");
 

From 84b79f8d2882b0a84330c04839ed4d3cefd2ff77 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 7 Feb 2010 21:48:24 +0100
Subject: [PATCH 417/640] drm/i915: Fix crash while aborting hibernation

Commit cbda12d77ea590082edb6d30bd342a67ebc459e0 (drm/i915: implement
new pm ops for i915) introduced the problem that if s2disk hibernation
is aborted, the system will crash, because i915_pm_freeze() does
nothing, while it should at least reverse some operations carried out
by i915_suspend().

Fix this issue by splitting the i915 suspend into a freeze part a
suspend part, where the latter is not executed before creating a
hibernation image, and the i915 resume into a "low-level" resume part
and a thaw part, where the former is not executed after the image has
been created.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.c | 180 +++++++++++++++++++-------------
 1 file changed, 107 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ecac882e1d54..79beffcf5936 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -174,12 +174,42 @@ const static struct pci_device_id pciidlist[] = {
 MODULE_DEVICE_TABLE(pci, pciidlist);
 #endif
 
-static int i915_suspend(struct drm_device *dev, pm_message_t state)
+static int i915_drm_freeze(struct drm_device *dev)
+{
+	pci_save_state(dev->pdev);
+
+	/* If KMS is active, we do the leavevt stuff here */
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		int error = i915_gem_idle(dev);
+		if (error) {
+			dev_err(&dev->pdev->dev,
+				"GEM idle failed, resume might fail\n");
+			return error;
+		}
+		drm_irq_uninstall(dev);
+	}
+
+	i915_save_state(dev);
+
+	return 0;
+}
+
+static void i915_drm_suspend(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (!dev || !dev_priv) {
-		DRM_ERROR("dev: %p, dev_priv: %p\n", dev, dev_priv);
+	intel_opregion_free(dev, 1);
+
+	/* Modeset on resume, not lid events */
+	dev_priv->modeset_on_lid = 0;
+}
+
+static int i915_suspend(struct drm_device *dev, pm_message_t state)
+{
+	int error;
+
+	if (!dev || !dev->dev_private) {
+		DRM_ERROR("dev: %p\n", dev);
 		DRM_ERROR("DRM not initialized, aborting suspend.\n");
 		return -ENODEV;
 	}
@@ -187,19 +217,11 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state)
 	if (state.event == PM_EVENT_PRETHAW)
 		return 0;
 
-	pci_save_state(dev->pdev);
+	error = i915_drm_freeze(dev);
+	if (error)
+		return error;
 
-	/* If KMS is active, we do the leavevt stuff here */
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		if (i915_gem_idle(dev))
-			dev_err(&dev->pdev->dev,
-				"GEM idle failed, resume may fail\n");
-		drm_irq_uninstall(dev);
-	}
-
-	i915_save_state(dev);
-
-	intel_opregion_free(dev, 1);
+	i915_drm_suspend(dev);
 
 	if (state.event == PM_EVENT_SUSPEND) {
 		/* Shut down the device */
@@ -207,45 +229,45 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state)
 		pci_set_power_state(dev->pdev, PCI_D3hot);
 	}
 
-	/* Modeset on resume, not lid events */
-	dev_priv->modeset_on_lid = 0;
-
 	return 0;
 }
 
-static int i915_resume(struct drm_device *dev)
+static int i915_drm_thaw(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret = 0;
-
-	if (pci_enable_device(dev->pdev))
-		return -1;
-	pci_set_master(dev->pdev);
-
-	i915_restore_state(dev);
-
-	intel_opregion_init(dev, 1);
+	int error = 0;
 
 	/* KMS EnterVT equivalent */
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		mutex_lock(&dev->struct_mutex);
 		dev_priv->mm.suspended = 0;
 
-		ret = i915_gem_init_ringbuffer(dev);
-		if (ret != 0)
-			ret = -1;
+		error = i915_gem_init_ringbuffer(dev);
 		mutex_unlock(&dev->struct_mutex);
 
 		drm_irq_install(dev);
-	}
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+
 		/* Resume the modeset for every activated CRTC */
 		drm_helper_resume_force_mode(dev);
 	}
 
 	dev_priv->modeset_on_lid = 0;
 
-	return ret;
+	return error;
+}
+
+static int i915_resume(struct drm_device *dev)
+{
+	if (pci_enable_device(dev->pdev))
+		return -EIO;
+
+	pci_set_master(dev->pdev);
+
+	i915_restore_state(dev);
+
+	intel_opregion_init(dev, 1);
+
+	return i915_drm_thaw(dev);
 }
 
 /**
@@ -386,57 +408,69 @@ i915_pci_remove(struct pci_dev *pdev)
 	drm_put_dev(dev);
 }
 
-static int
-i915_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+static int i915_pm_suspend(struct device *dev)
 {
-	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	int error;
 
-	return i915_suspend(dev, state);
-}
+	if (!drm_dev || !drm_dev->dev_private) {
+		dev_err(dev, "DRM not initialized, aborting suspend.\n");
+		return -ENODEV;
+	}
 
-static int
-i915_pci_resume(struct pci_dev *pdev)
-{
-	struct drm_device *dev = pci_get_drvdata(pdev);
+	error = i915_drm_freeze(drm_dev);
+	if (error)
+		return error;
 
-	return i915_resume(dev);
-}
+	i915_drm_suspend(drm_dev);
 
-static int
-i915_pm_suspend(struct device *dev)
-{
-	return i915_pci_suspend(to_pci_dev(dev), PMSG_SUSPEND);
-}
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
 
-static int
-i915_pm_resume(struct device *dev)
-{
-	return i915_pci_resume(to_pci_dev(dev));
-}
-
-static int
-i915_pm_freeze(struct device *dev)
-{
-	return i915_pci_suspend(to_pci_dev(dev), PMSG_FREEZE);
-}
-
-static int
-i915_pm_thaw(struct device *dev)
-{
-	/* thaw during hibernate, do nothing! */
 	return 0;
 }
 
-static int
-i915_pm_poweroff(struct device *dev)
+static int i915_pm_resume(struct device *dev)
 {
-	return i915_pci_suspend(to_pci_dev(dev), PMSG_HIBERNATE);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+
+	return i915_resume(drm_dev);
 }
 
-static int
-i915_pm_restore(struct device *dev)
+static int i915_pm_freeze(struct device *dev)
 {
-	return i915_pci_resume(to_pci_dev(dev));
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+
+	if (!drm_dev || !drm_dev->dev_private) {
+		dev_err(dev, "DRM not initialized, aborting suspend.\n");
+		return -ENODEV;
+	}
+
+	return i915_drm_freeze(drm_dev);
+}
+
+static int i915_pm_thaw(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+
+	return i915_drm_thaw(drm_dev);
+}
+
+static int i915_pm_poweroff(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	int error;
+
+	error = i915_drm_freeze(drm_dev);
+	if (!error)
+		i915_drm_suspend(drm_dev);
+
+	return error;
 }
 
 const struct dev_pm_ops i915_pm_ops = {
@@ -445,7 +479,7 @@ const struct dev_pm_ops i915_pm_ops = {
      .freeze = i915_pm_freeze,
      .thaw = i915_pm_thaw,
      .poweroff = i915_pm_poweroff,
-     .restore = i915_pm_restore,
+     .restore = i915_pm_resume,
 };
 
 static struct vm_operations_struct i915_gem_vm_ops = {

From 75dfca80a610e4e87d3b9ccfb3f520692808697d Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 10 Feb 2010 15:09:44 -0800
Subject: [PATCH 418/640] drm/i915: hold ref on flip object until it completes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will prevent things from falling over if the user frees the flip
buffer before we complete the flip, since we'll hold an internal
reference.

Reported-by: Kristian Høgsberg <krh@bitplanet.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_display.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c161ace7132d..b27202d23ebc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4094,6 +4094,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 
 	mutex_lock(&work->dev->struct_mutex);
 	i915_gem_object_unpin(work->old_fb_obj);
+	drm_gem_object_unreference(work->pending_flip_obj);
 	drm_gem_object_unreference(work->old_fb_obj);
 	mutex_unlock(&work->dev->struct_mutex);
 	kfree(work);
@@ -4221,8 +4222,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		return ret;
 	}
 
-	/* Reference the old fb object for the scheduled work. */
+	/* Reference the objects for the scheduled work. */
 	drm_gem_object_reference(work->old_fb_obj);
+	drm_gem_object_reference(obj);
 
 	crtc->fb = fb;
 	i915_gem_object_flush_write_domain(obj);

From f77cef3db357aeea22d82a2aa4f0ef8fbae41d47 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 9 Feb 2010 19:41:55 +0000
Subject: [PATCH 419/640] drm/vmwgfx: Update the user-space interface.

When time-based throttling is implemented, we need to bump minor.
When the old way of detecting scanout is removed, we need to bump major.
In the meantime, this change should not break existing user-space.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c    |  6 ++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |  5 +++--
 include/drm/vmwgfx_drm.h                 | 12 +++++++++---
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 135be9688c90..0eaf68273eaf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -39,10 +39,10 @@
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20090724"
+#define VMWGFX_DRIVER_DATE "20100118"
 #define VMWGFX_DRIVER_MAJOR 0
-#define VMWGFX_DRIVER_MINOR 1
-#define VMWGFX_DRIVER_PATCHLEVEL 2
+#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 778851f9f1d6..1c7a316454d8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -48,6 +48,12 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_FIFO_OFFSET:
 		param->value = dev_priv->mmio_start;
 		break;
+	case DRM_VMW_PARAM_HW_CAPS:
+		param->value = dev_priv->capabilities;
+		break;
+	case DRM_VMW_PARAM_FIFO_CAPS:
+		param->value = dev_priv->fifo.capabilities;
+		break;
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index c7efbd47ab84..933e90d82866 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -610,9 +610,10 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		 */
 		srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT;
 		srf->scanout = true;
-	} else {
+	} else if (req->scanout)
+		srf->scanout = true;
+	else
 		srf->scanout = false;
-	}
 
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index 2be7e1249b6f..dfaf3c2d2c8e 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -68,7 +68,8 @@
 #define DRM_VMW_PARAM_NUM_FREE_STREAMS 1
 #define DRM_VMW_PARAM_3D               2
 #define DRM_VMW_PARAM_FIFO_OFFSET      3
-
+#define DRM_VMW_PARAM_HW_CAPS          4
+#define DRM_VMW_PARAM_FIFO_CAPS        5
 
 /**
  * struct drm_vmw_getparam_arg
@@ -181,6 +182,8 @@ struct drm_vmw_context_arg {
  * The size of the array should equal the total number of mipmap levels.
  * @shareable: Boolean whether other clients (as identified by file descriptors)
  * may reference this surface.
+ * @scanout: Boolean whether the surface is intended to be used as a
+ * scanout.
  *
  * Input data to the DRM_VMW_CREATE_SURFACE Ioctl.
  * Output data from the DRM_VMW_REF_SURFACE Ioctl.
@@ -192,7 +195,7 @@ struct drm_vmw_surface_create_req {
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
 	uint64_t size_addr;
 	int32_t shareable;
-	uint32_t pad64;
+	int32_t scanout;
 };
 
 /**
@@ -295,6 +298,9 @@ union drm_vmw_surface_reference_arg {
  *
  * @commands: User-space address of a command buffer cast to an uint64_t.
  * @command-size: Size in bytes of the command buffer.
+ * @throttle-us: Sleep until software is less than @throttle_us
+ * microseconds ahead of hardware. The driver may round this value
+ * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
  * uint64_t.
  *
@@ -304,7 +310,7 @@ union drm_vmw_surface_reference_arg {
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
 	uint32_t command_size;
-	uint32_t pad64;
+	uint32_t throttle_us;
 	uint64_t fence_rep;
 };
 

From 3bef35721018d2bac08d0d03979606b65347211e Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 9 Feb 2010 19:41:57 +0000
Subject: [PATCH 420/640] drm/vmwgfx: Report propper
 framebuffer_{max|min}_{width|height}

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index eeba6d1d06e4..31f9afed0a63 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -769,10 +769,10 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 
 	drm_mode_config_init(dev);
 	dev->mode_config.funcs = &vmw_kms_funcs;
-	dev->mode_config.min_width = 640;
-	dev->mode_config.min_height = 480;
-	dev->mode_config.max_width = 2048;
-	dev->mode_config.max_height = 2048;
+	dev->mode_config.min_width = 1;
+	dev->mode_config.min_height = 1;
+	dev->mode_config.max_width = dev_priv->fb_max_width;
+	dev->mode_config.max_height = dev_priv->fb_max_height;
 
 	ret = vmw_kms_init_legacy_display_system(dev_priv);
 

From a87897edbae2d60db7bcb6bb0a75e82013d68305 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 9 Feb 2010 21:29:47 +0000
Subject: [PATCH 421/640] drm/vmwgfx: Drop scanout flag compat and add execbuf
 ioctl parameter members. Bumps major.

Even if this bumps the version to 1 it does not mean the driver is
out of staging. From what we know this is the last backwards
incompatible change to the driver.

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |  6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 17 +----------------
 include/drm/vmwgfx_drm.h                 |  8 ++++++++
 3 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 0eaf68273eaf..3e4e670d3216 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -39,9 +39,9 @@
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
 
-#define VMWGFX_DRIVER_DATE "20100118"
-#define VMWGFX_DRIVER_MAJOR 0
-#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_DATE "20100209"
+#define VMWGFX_DRIVER_MAJOR 1
+#define VMWGFX_DRIVER_MINOR 0
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 933e90d82866..f8fbbc67a406 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -35,11 +35,6 @@
 #define VMW_RES_SURFACE ttm_driver_type1
 #define VMW_RES_STREAM ttm_driver_type2
 
-/* XXX: This isn't a real hardware flag, but just a hack for kernel to
- * know about primary surfaces. Find a better way to accomplish this.
- */
-#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
-
 struct vmw_user_context {
 	struct ttm_base_object base;
 	struct vmw_resource res;
@@ -579,6 +574,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 
 	srf->flags = req->flags;
 	srf->format = req->format;
+	srf->scanout = req->scanout;
 	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
 	srf->num_sizes = 0;
 	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
@@ -604,17 +600,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0))
 		goto out_err1;
 
-	if (srf->flags & SVGA3D_SURFACE_HINT_SCANOUT) {
-		/* we should not send this flag down to hardware since
-		 * its not a official one
-		 */
-		srf->flags &= ~SVGA3D_SURFACE_HINT_SCANOUT;
-		srf->scanout = true;
-	} else if (req->scanout)
-		srf->scanout = true;
-	else
-		srf->scanout = false;
-
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
 	    srf->sizes[0].width == 64 &&
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index dfaf3c2d2c8e..c7645f480d12 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -303,15 +303,23 @@ union drm_vmw_surface_reference_arg {
  * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
  * uint64_t.
+ * @version: Allows expanding the execbuf ioctl parameters without breaking
+ * backwards compatibility, since user-space will always tell the kernel
+ * which version it uses.
+ * @flags: Execbuf flags. None currently.
  *
  * Argument to the DRM_VMW_EXECBUF Ioctl.
  */
 
+#define DRM_VMW_EXECBUF_VERSION 0
+
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
 	uint32_t command_size;
 	uint32_t throttle_us;
 	uint64_t fence_rep;
+	 uint32_t version;
+	 uint32_t flags;
 };
 
 /**

From 598856407d4e20ebb4de01a91a93d89325924d43 Mon Sep 17 00:00:00 2001
From: Damian Lukowski <damian@tvk.rwth-aachen.de>
Date: Wed, 10 Feb 2010 18:04:08 -0800
Subject: [PATCH 422/640] tcp: fix ICMP-RTO war

Make sure, that TCP has a nonzero RTT estimation after three-way
handshake. Currently, a listening TCP has a value of 0 for srtt,
rttvar and rto right after the three-way handshake is completed
with TCP timestamps disabled.
This will lead to corrupt RTO recalculation and retransmission
flood when RTO is recalculated on backoff reversion as introduced
in "Revert RTO on ICMP destination unreachable"
(f1ecd5d9e7366609d640ff4040304ea197fbc618).
This behaviour can be provoked by connecting to a server which
"responds first" (like SMTP) and rejecting every packet after
the handshake with dest-unreachable, which will lead to softirq
load on the server (up to 30% per socket in some tests).

Thanks to Ilpo Jarvinen for providing debug patches and to
Denys Fedoryshchenko for reporting and testing.

Changes since v3: Removed bad characters in patchfile.

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 28e029632493..3fddc69ccccc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5783,11 +5783,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 				/* tcp_ack considers this ACK as duplicate
 				 * and does not calculate rtt.
-				 * Fix it at least with timestamps.
+				 * Force it here.
 				 */
-				if (tp->rx_opt.saw_tstamp &&
-				    tp->rx_opt.rcv_tsecr && !tp->srtt)
-					tcp_ack_saw_tstamp(sk, 0);
+				tcp_ack_update_rtt(sk, 0, 0);
 
 				if (tp->rx_opt.tstamp_ok)
 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;

From 85b9e4878f3b16993fba871c0c68d0948ec9c7c6 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 8 Feb 2010 09:57:25 +0000
Subject: [PATCH 423/640] drm/vmwgfx: Fix a circular locking dependency bug.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h  |  3 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 17 +++++++++--------
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c  | 13 +++----------
 3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 3e4e670d3216..356dc935ec13 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -113,6 +113,7 @@ struct vmw_fifo_state {
 	unsigned long static_buffer_size;
 	bool using_bounce_buffer;
 	uint32_t capabilities;
+	struct mutex fifo_mutex;
 	struct rw_semaphore rwsem;
 };
 
@@ -213,7 +214,7 @@ struct vmw_private {
 	 * Fencing and IRQs.
 	 */
 
-	uint32_t fence_seq;
+	atomic_t fence_seq;
 	wait_queue_head_t fence_queue;
 	wait_queue_head_t fifo_queue;
 	atomic_t fence_queue_waiters;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 4157547cc6e4..39d43a01d846 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -74,6 +74,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 	fifo->reserved_size = 0;
 	fifo->using_bounce_buffer = false;
 
+	mutex_init(&fifo->fifo_mutex);
 	init_rwsem(&fifo->rwsem);
 
 	/*
@@ -117,7 +118,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 		 (unsigned int) min,
 		 (unsigned int) fifo->capabilities);
 
-	dev_priv->fence_seq = dev_priv->last_read_sequence;
+	atomic_set(&dev_priv->fence_seq, dev_priv->last_read_sequence);
 	iowrite32(dev_priv->last_read_sequence, fifo_mem + SVGA_FIFO_FENCE);
 
 	return vmw_fifo_send_fence(dev_priv, &dummy);
@@ -283,7 +284,7 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 	uint32_t reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE;
 	int ret;
 
-	down_write(&fifo_state->rwsem);
+	mutex_lock(&fifo_state->fifo_mutex);
 	max = ioread32(fifo_mem + SVGA_FIFO_MAX);
 	min = ioread32(fifo_mem + SVGA_FIFO_MIN);
 	next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
@@ -351,7 +352,7 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 	}
 out_err:
 	fifo_state->reserved_size = 0;
-	up_write(&fifo_state->rwsem);
+	mutex_unlock(&fifo_state->fifo_mutex);
 	return NULL;
 }
 
@@ -426,6 +427,7 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 
 	}
 
+	down_write(&fifo_state->rwsem);
 	if (fifo_state->using_bounce_buffer || reserveable) {
 		next_cmd += bytes;
 		if (next_cmd >= max)
@@ -437,8 +439,9 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 	if (reserveable)
 		iowrite32(0, fifo_mem + SVGA_FIFO_RESERVED);
 	mb();
-	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
 	up_write(&fifo_state->rwsem);
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	mutex_unlock(&fifo_state->fifo_mutex);
 }
 
 int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
@@ -451,9 +454,7 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
 
 	fm = vmw_fifo_reserve(dev_priv, bytes);
 	if (unlikely(fm == NULL)) {
-		down_write(&fifo_state->rwsem);
-		*sequence = dev_priv->fence_seq;
-		up_write(&fifo_state->rwsem);
+		*sequence = atomic_read(&dev_priv->fence_seq);
 		ret = -ENOMEM;
 		(void)vmw_fallback_wait(dev_priv, false, true, *sequence,
 					false, 3*HZ);
@@ -461,7 +462,7 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
 	}
 
 	do {
-		*sequence = dev_priv->fence_seq++;
+		*sequence = atomic_add_return(1, &dev_priv->fence_seq);
 	} while (*sequence == 0);
 
 	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE)) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index d40086fc8647..4d7cb5393860 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -84,20 +84,13 @@ bool vmw_fence_signaled(struct vmw_private *dev_priv,
 	    vmw_fifo_idle(dev_priv, sequence))
 		return true;
 
-	/**
-	 * Below is to signal stale fences that have wrapped.
-	 * First, block fence submission.
-	 */
-
-	down_read(&fifo_state->rwsem);
-
 	/**
 	 * Then check if the sequence is higher than what we've actually
 	 * emitted. Then the fence is stale and signaled.
 	 */
 
-	ret = ((dev_priv->fence_seq - sequence) > VMW_FENCE_WRAP);
-	up_read(&fifo_state->rwsem);
+	ret = ((atomic_read(&dev_priv->fence_seq) - sequence)
+	       > VMW_FENCE_WRAP);
 
 	return ret;
 }
@@ -127,7 +120,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 
 	if (fifo_idle)
 		down_read(&fifo_state->rwsem);
-	signal_seq = dev_priv->fence_seq;
+	signal_seq = atomic_read(&dev_priv->fence_seq);
 	ret = 0;
 
 	for (;;) {

From c60a284cc41f9989391706e113d30b4f27dbe3e0 Mon Sep 17 00:00:00 2001
From: Pauli Nieminen <suokkos@gmail.com>
Date: Thu, 11 Feb 2010 00:10:33 +0200
Subject: [PATCH 424/640] drm/radeon: Skip dma copy test in benchmark if card
 doesn't have dma engine.

radeon_copy_dma is only available for r200 or newer cards.
Call to radeon_copy_dma would result to NULL pointer
dereference if benchmarking asic without dma engine.

Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_benchmark.c | 55 ++++++++++++++---------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 4ddfd4b5bc51..7932dc4d6b90 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -65,31 +65,42 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 	if (r) {
 		goto out_cleanup;
 	}
-	start_jiffies = jiffies;
-	for (i = 0; i < n; i++) {
-		r = radeon_fence_create(rdev, &fence);
-		if (r) {
-			goto out_cleanup;
+
+	/* r100 doesn't have dma engine so skip the test */
+	if (rdev->asic->copy_dma) {
+
+		start_jiffies = jiffies;
+		for (i = 0; i < n; i++) {
+			r = radeon_fence_create(rdev, &fence);
+			if (r) {
+				goto out_cleanup;
+			}
+
+			r = radeon_copy_dma(rdev, saddr, daddr,
+					size / RADEON_GPU_PAGE_SIZE, fence);
+
+			if (r) {
+				goto out_cleanup;
+			}
+			r = radeon_fence_wait(fence, false);
+			if (r) {
+				goto out_cleanup;
+			}
+			radeon_fence_unref(&fence);
 		}
-		r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence);
-		if (r) {
-			goto out_cleanup;
+		end_jiffies = jiffies;
+		time = end_jiffies - start_jiffies;
+		time = jiffies_to_msecs(time);
+		if (time > 0) {
+			i = ((n * size) >> 10) / time;
+			printk(KERN_INFO "radeon: dma %u bo moves of %ukb from"
+					" %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n",
+					n, size >> 10,
+					sdomain, ddomain, time,
+					i, i * 1000, (i * 1000) / 1024);
 		}
-		r = radeon_fence_wait(fence, false);
-		if (r) {
-			goto out_cleanup;
-		}
-		radeon_fence_unref(&fence);
-	}
-	end_jiffies = jiffies;
-	time = end_jiffies - start_jiffies;
-	time = jiffies_to_msecs(time);
-	if (time > 0) {
-		i = ((n * size) >> 10) / time;
-		printk(KERN_INFO "radeon: dma %u bo moves of %ukb from %d to %d"
-		       " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10,
-		       sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024);
 	}
+
 	start_jiffies = jiffies;
 	for (i = 0; i < n; i++) {
 		r = radeon_fence_create(rdev, &fence);

From 648ac05c4f8a8aea908c7dff81ceffe003e28561 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 10 Feb 2010 16:52:45 +1000
Subject: [PATCH 425/640] drm/radeon/kms: retry auxch on 0x20 timeout value.

ATOM appears to return 0x20 which seems to mean some sort of timeout.

retry the transaction up to 10 times before failing, this
makes DP->VGA convertor we bought work at least a bit more predictably.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/atombios_dp.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 71060114d5de..b32eeea5bb8b 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -332,11 +332,13 @@ bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes,
 	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION args;
 	int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
 	unsigned char *base;
+	int retry_count = 0;
 
 	memset(&args, 0, sizeof(args));
 
 	base = (unsigned char *)rdev->mode_info.atom_context->scratch;
 
+retry:
 	memcpy(base, req_bytes, num_bytes);
 
 	args.lpAuxRequest = 0;
@@ -347,10 +349,12 @@ bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes,
 
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 
-	if (args.ucReplyStatus) {
-		DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x\n",
+	if (args.ucReplyStatus && !args.ucDataOutLen) {
+		if (args.ucReplyStatus == 0x20 && retry_count < 10)
+			goto retry;
+		DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x after %d retries\n",
 			  req_bytes[1], req_bytes[0], req_bytes[2], req_bytes[3],
-			  chan->rec.i2c_id, args.ucReplyStatus);
+			  chan->rec.i2c_id, args.ucReplyStatus, retry_count);
 		return false;
 	}
 

From 77c1ff3982c6b36961725dd19e872a1c07df7f3b Mon Sep 17 00:00:00 2001
From: Andy Getzendanner <james.getzendanner@students.olin.edu>
Date: Thu, 11 Feb 2010 14:04:48 +1000
Subject: [PATCH 426/640] vgaarb: fix incorrect dereference of userspace
 pointer.

This patch corrects a userspace pointer dereference in the VGA arbiter
in 2.6.32.1.

copy_from_user() is used at line 822 to copy the contents of buf into
kbuf, but a call to strncmp() on line 964 uses buf rather than kbuf.  This
problem led to a GPF in strncmp() when X was started on my x86_32 systems.
 X triggered the behavior with a write of "target PCI:0000:01:00.0" to
/dev/vga_arbiter.

The patch has been tested against 2.6.32.1 and observed to correct the GPF
observed when starting X or manually writing the string "target
PCI:0000:01:00.0" to /dev/vga_arbiter.

Signed-off-by: Andy Getzendanner <james.getzendanner@students.olin.edu>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/vga/vgaarb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 1ac0c93603c9..24b56dc54597 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -961,7 +961,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		remaining -= 7;
 		pr_devel("client 0x%p called 'target'\n", priv);
 		/* if target is default */
-		if (!strncmp(buf, "default", 7))
+		if (!strncmp(kbuf, "default", 7))
 			pdev = pci_dev_get(vga_default_device());
 		else {
 			if (!vga_pci_str_to_vars(curr_pos, remaining,

From cab4d27764d5a8654212b3e96eb0ae793aec5b94 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 11 Feb 2010 17:15:57 +0100
Subject: [PATCH 427/640] amd64_edac: Do not falsely trigger kerneloops

An unfortunate "WARNING" in the message amd64_edac dumps when the system
doesn't support DRAM ECC or ECC checking is not enabled in the BIOS
used to trigger kerneloops which qualified the message as an OOPS thus
misleading the users. See, e.g.

https://bugs.launchpad.net/ubuntu/+source/linux/+bug/422536
http://bugzilla.kernel.org/show_bug.cgi?id=15238

Downgrade the message level to KERN_NOTICE and fix the formulation.

Cc: stable@kernel.org # .32.x
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Doug Thompson <dougthompson@xmission.com>
---
 drivers/edac/amd64_edac.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 000dc67b85b7..3391e6739d06 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2658,10 +2658,11 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
  * the memory system completely. A command line option allows to force-enable
  * hardware ECC later in amd64_enable_ecc_error_reporting().
  */
-static const char *ecc_warning =
-	"WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
-	" Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
-	" Also, use of the override can cause unknown side effects.\n";
+static const char *ecc_msg =
+	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
+	" Either enable ECC checking or force module loading by setting "
+	"'ecc_enable_override'.\n"
+	" (Note that use of the override may cause unknown side effects.)\n";
 
 static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
 {
@@ -2673,7 +2674,7 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
 
 	ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
 	if (!ecc_enabled)
-		amd64_printk(KERN_WARNING, "This node reports that Memory ECC "
+		amd64_printk(KERN_NOTICE, "This node reports that Memory ECC "
 			     "is currently disabled, set F3x%x[22] (%s).\n",
 			     K8_NBCFG, pci_name(pvt->misc_f3_ctl));
 	else
@@ -2681,13 +2682,13 @@ static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
 
 	nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
 	if (!nb_mce_en)
-		amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
+		amd64_printk(KERN_NOTICE, "NB MCE bank disabled, set MSR "
 			     "0x%08x[4] on node %d to enable.\n",
 			     MSR_IA32_MCG_CTL, pvt->mc_node_id);
 
 	if (!ecc_enabled || !nb_mce_en) {
 		if (!ecc_enable_override) {
-			amd64_printk(KERN_WARNING, "%s", ecc_warning);
+			amd64_printk(KERN_NOTICE, "%s", ecc_msg);
 			return -ENODEV;
 		}
 		ecc_enable_override = 0;

From 440ab7ac2d6b735fb278a1ff1674f6716314c6bb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 11 Feb 2010 12:29:16 -0800
Subject: [PATCH 428/640] sparc32: Fix thinko in previous change.

Should mask stack with 0xf not "0x15".

Noticed by Blue Swirl <blauwirbel@gmail.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/process_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index f23c8fda6503..c49865b30719 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -526,7 +526,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 			 * Set some valid stack frames to give to the child.
 			 */
 			childstack = (struct sparc_stackf __user *)
-				(sp & ~0x15UL);
+				(sp & ~0xfUL);
 			parentstack = (struct sparc_stackf __user *)
 				regs->u_regs[UREG_FP];
 

From 93716b9470fbfd9efdc7d0f2445cb34635de3f6d Mon Sep 17 00:00:00 2001
From: Marcel Selhorst <m.selhorst@sirrix.com>
Date: Wed, 10 Feb 2010 13:56:32 -0800
Subject: [PATCH 429/640] tpm_infineon: fix suspend/resume handler for
 pnp_driver

When suspending, tpm_infineon calls the generic suspend function of the
TPM framework.  However, the TPM framework does not return and the system
hangs upon suspend.  When sending the necessary command "TPM_SaveState"
directly within the driver, suspending and resuming works fine.

Signed-off-by: Marcel Selhorst <m.selhorst@sirrix.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Debora Velarde <debora@linux.vnet.ibm.com>
Cc: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Cc: <stable@kernel.org>		[2.6.32.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tpm/tpm_infineon.c | 79 ++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 22 deletions(-)

diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index ecba4942fc8e..f58440791e65 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -39,12 +39,12 @@
 struct tpm_inf_dev {
 	int iotype;
 
-	void __iomem *mem_base;		/* MMIO ioremap'd addr */
-	unsigned long map_base;		/* phys MMIO base */
-	unsigned long map_size;		/* MMIO region size */
-	unsigned int index_off;		/* index register offset */
+	void __iomem *mem_base;	/* MMIO ioremap'd addr */
+	unsigned long map_base;	/* phys MMIO base */
+	unsigned long map_size;	/* MMIO region size */
+	unsigned int index_off;	/* index register offset */
 
-	unsigned int data_regs;		/* Data registers */
+	unsigned int data_regs;	/* Data registers */
 	unsigned int data_size;
 
 	unsigned int config_port;	/* IO Port config index reg */
@@ -406,14 +406,14 @@ static const struct tpm_vendor_specific tpm_inf = {
 	.miscdev = {.fops = &inf_ops,},
 };
 
-static const struct pnp_device_id tpm_pnp_tbl[] = {
+static const struct pnp_device_id tpm_inf_pnp_tbl[] = {
 	/* Infineon TPMs */
 	{"IFX0101", 0},
 	{"IFX0102", 0},
 	{"", 0}
 };
 
-MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl);
+MODULE_DEVICE_TABLE(pnp, tpm_inf_pnp_tbl);
 
 static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev,
 				       const struct pnp_device_id *dev_id)
@@ -430,7 +430,7 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev,
 	if (pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) &&
 	    !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) {
 
-	    	tpm_dev.iotype = TPM_INF_IO_PORT;
+		tpm_dev.iotype = TPM_INF_IO_PORT;
 
 		tpm_dev.config_port = pnp_port_start(dev, 0);
 		tpm_dev.config_size = pnp_port_len(dev, 0);
@@ -459,9 +459,9 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev,
 			goto err_last;
 		}
 	} else if (pnp_mem_valid(dev, 0) &&
-	           !(pnp_mem_flags(dev, 0) & IORESOURCE_DISABLED)) {
+		   !(pnp_mem_flags(dev, 0) & IORESOURCE_DISABLED)) {
 
-	    	tpm_dev.iotype = TPM_INF_IO_MEM;
+		tpm_dev.iotype = TPM_INF_IO_MEM;
 
 		tpm_dev.map_base = pnp_mem_start(dev, 0);
 		tpm_dev.map_size = pnp_mem_len(dev, 0);
@@ -563,11 +563,11 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev,
 			 "product id 0x%02x%02x"
 			 "%s\n",
 			 tpm_dev.iotype == TPM_INF_IO_PORT ?
-				tpm_dev.config_port :
-				tpm_dev.map_base + tpm_dev.index_off,
+			 tpm_dev.config_port :
+			 tpm_dev.map_base + tpm_dev.index_off,
 			 tpm_dev.iotype == TPM_INF_IO_PORT ?
-				tpm_dev.data_regs :
-				tpm_dev.map_base + tpm_dev.data_regs,
+			 tpm_dev.data_regs :
+			 tpm_dev.map_base + tpm_dev.data_regs,
 			 version[0], version[1],
 			 vendorid[0], vendorid[1],
 			 productid[0], productid[1], chipname);
@@ -607,20 +607,55 @@ static __devexit void tpm_inf_pnp_remove(struct pnp_dev *dev)
 			iounmap(tpm_dev.mem_base);
 			release_mem_region(tpm_dev.map_base, tpm_dev.map_size);
 		}
+		tpm_dev_vendor_release(chip);
 		tpm_remove_hardware(chip->dev);
 	}
 }
 
+static int tpm_inf_pnp_suspend(struct pnp_dev *dev, pm_message_t pm_state)
+{
+	struct tpm_chip *chip = pnp_get_drvdata(dev);
+	int rc;
+	if (chip) {
+		u8 savestate[] = {
+			0, 193,	/* TPM_TAG_RQU_COMMAND */
+			0, 0, 0, 10,	/* blob length (in bytes) */
+			0, 0, 0, 152	/* TPM_ORD_SaveState */
+		};
+		dev_info(&dev->dev, "saving TPM state\n");
+		rc = tpm_inf_send(chip, savestate, sizeof(savestate));
+		if (rc < 0) {
+			dev_err(&dev->dev, "error while saving TPM state\n");
+			return rc;
+		}
+	}
+	return 0;
+}
+
+static int tpm_inf_pnp_resume(struct pnp_dev *dev)
+{
+	/* Re-configure TPM after suspending */
+	tpm_config_out(ENABLE_REGISTER_PAIR, TPM_INF_ADDR);
+	tpm_config_out(IOLIMH, TPM_INF_ADDR);
+	tpm_config_out((tpm_dev.data_regs >> 8) & 0xff, TPM_INF_DATA);
+	tpm_config_out(IOLIML, TPM_INF_ADDR);
+	tpm_config_out((tpm_dev.data_regs & 0xff), TPM_INF_DATA);
+	/* activate register */
+	tpm_config_out(TPM_DAR, TPM_INF_ADDR);
+	tpm_config_out(0x01, TPM_INF_DATA);
+	tpm_config_out(DISABLE_REGISTER_PAIR, TPM_INF_ADDR);
+	/* disable RESET, LP and IRQC */
+	tpm_data_out(RESET_LP_IRQC_DISABLE, CMD);
+	return tpm_pm_resume(&dev->dev);
+}
+
 static struct pnp_driver tpm_inf_pnp_driver = {
 	.name = "tpm_inf_pnp",
-	.driver = {
-		.owner = THIS_MODULE,
-		.suspend = tpm_pm_suspend,
-		.resume = tpm_pm_resume,
-	},
-	.id_table = tpm_pnp_tbl,
+	.id_table = tpm_inf_pnp_tbl,
 	.probe = tpm_inf_pnp_probe,
-	.remove = __devexit_p(tpm_inf_pnp_remove),
+	.suspend = tpm_inf_pnp_suspend,
+	.resume = tpm_inf_pnp_resume,
+	.remove = __devexit_p(tpm_inf_pnp_remove)
 };
 
 static int __init init_inf(void)
@@ -638,5 +673,5 @@ module_exit(cleanup_inf);
 
 MODULE_AUTHOR("Marcel Selhorst <m.selhorst@sirrix.com>");
 MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2");
-MODULE_VERSION("1.9");
+MODULE_VERSION("1.9.2");
 MODULE_LICENSE("GPL");

From c286d03cce118e9fb8dda8da43f9131c169c5a75 Mon Sep 17 00:00:00 2001
From: Johan Kristell <johan.kristell@axis.com>
Date: Wed, 10 Feb 2010 13:56:34 -0800
Subject: [PATCH 430/640] mmc_test: block addressed cards

This patch fixes a bug in the multiblock write tests where the written
data is read back for verifying one block at a time.  The tests in
mmc_test assumes that all cards are byte addressable.

This will cause the multi block write tests to fail, leading the user of
the mmc_test driver thinking there is something wrong with the sdhci
driver they are testing.

The start address for the block is calculated as: blocknum * 512. For
block addressable cards the blocknum alone should be used.

Signed-off-by: Johan Kristell <johan.kristell@axis.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/card/mmc_test.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index b9f1e84897cc..e7f8027165e6 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -74,6 +74,9 @@ static void mmc_test_prepare_mrq(struct mmc_test_card *test,
 	}
 
 	mrq->cmd->arg = dev_addr;
+	if (!mmc_card_blockaddr(test->card))
+		mrq->cmd->arg <<= 9;
+
 	mrq->cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 
 	if (blocks == 1)
@@ -190,7 +193,7 @@ static int __mmc_test_prepare(struct mmc_test_card *test, int write)
 	}
 
 	for (i = 0;i < BUFFER_SIZE / 512;i++) {
-		ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1);
+		ret = mmc_test_buffer_transfer(test, test->buffer, i, 512, 1);
 		if (ret)
 			return ret;
 	}
@@ -219,7 +222,7 @@ static int mmc_test_cleanup(struct mmc_test_card *test)
 	memset(test->buffer, 0, 512);
 
 	for (i = 0;i < BUFFER_SIZE / 512;i++) {
-		ret = mmc_test_buffer_transfer(test, test->buffer, i * 512, 512, 1);
+		ret = mmc_test_buffer_transfer(test, test->buffer, i, 512, 1);
 		if (ret)
 			return ret;
 	}
@@ -426,7 +429,7 @@ static int mmc_test_transfer(struct mmc_test_card *test,
 		for (i = 0;i < sectors;i++) {
 			ret = mmc_test_buffer_transfer(test,
 				test->buffer + i * 512,
-				dev_addr + i * 512, 512, 0);
+				dev_addr + i, 512, 0);
 			if (ret)
 				return ret;
 		}

From cff9279e4e8d6ff80a640dd6977c8f76aa01e1f8 Mon Sep 17 00:00:00 2001
From: Peter Tyser <ptyser@xes-inc.com>
Date: Wed, 10 Feb 2010 13:56:36 -0800
Subject: [PATCH 431/640] edac: mpc85xx fix bad page calculation

Commit b4846251727a38a7f248e41308c060995371dd05 ("edac: mpc85xx add
mpc83xx support") accidentally broke how a chip select's first and last
page addresses are calculated.  The page addresses are being shifted too
far right by PAGE_SHIFT.  This results in errors such as:

  EDAC MPC85xx MC1: Err addr: 0x003075c0
  EDAC MPC85xx MC1: PFN: 0x00000307
  EDAC MPC85xx MC1: PFN out of range!
  EDAC MC1: INTERNAL ERROR: row out of range (4 >= 4)
  EDAC MC1: CE - no information available: INTERNAL ERROR

The vaule of PAGE_SHIFT is already being taken into consideration during
the calculation of the 'start' and 'end' variables, thus it is not
necessary to account for it again when setting a chip select's first and
last page address.

Signed-off-by: Peter Tyser <ptyser@xes-inc.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Cc: Ira W. Snyder <iws@ovro.caltech.edu>
Cc: Kumar Gala <galak@gate.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/mpc85xx_edac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index cf27402af97b..e24a87fe3b9b 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -804,8 +804,8 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
 		end   <<= (24 - PAGE_SHIFT);
 		end    |= (1 << (24 - PAGE_SHIFT)) - 1;
 
-		csrow->first_page = start >> PAGE_SHIFT;
-		csrow->last_page = end >> PAGE_SHIFT;
+		csrow->first_page = start;
+		csrow->last_page = end;
 		csrow->nr_pages = end + 1 - start;
 		csrow->grain = 8;
 		csrow->mtype = mtype;

From f8c63345b498a8590e8e87a5990a36cdf89636df Mon Sep 17 00:00:00 2001
From: Peter Tyser <ptyser@xes-inc.com>
Date: Wed, 10 Feb 2010 13:56:37 -0800
Subject: [PATCH 432/640] edac: mpc85xx fix build regression by removing unused
 debug code

Some unused, unsupported debug code existed in the mpc85xx EDAC driver
that resulted in a build failure when CONFIG_EDAC_DEBUG was defined:

  drivers/edac/mpc85xx_edac.c: In function 'mpc85xx_mc_err_probe':
  drivers/edac/mpc85xx_edac.c:1031: error: implicit declaration of function 'edac_mc_register_mcidev_debug'
  drivers/edac/mpc85xx_edac.c:1031: error: 'debug_attr' undeclared (first use in this function)
  drivers/edac/mpc85xx_edac.c:1031: error: (Each undeclared identifier is reported only once
  drivers/edac/mpc85xx_edac.c:1031: error: for each function it appears in.)

Signed-off-by: Peter Tyser <ptyser@xes-inc.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/mpc85xx_edac.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index e24a87fe3b9b..ecd5928d7110 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -892,10 +892,6 @@ static int __devinit mpc85xx_mc_err_probe(struct of_device *op,
 
 	mpc85xx_init_csrows(mci);
 
-#ifdef CONFIG_EDAC_DEBUG
-	edac_mc_register_mcidev_debug((struct attribute **)debug_attr);
-#endif
-
 	/* store the original error disable bits */
 	orig_ddr_err_disable =
 	    in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE);

From 763458e0dbfb4d562d62823149cf62e8b8eca82b Mon Sep 17 00:00:00 2001
From: Rishikesh <risrajak@linux.vnet.ibm.com>
Date: Wed, 10 Feb 2010 13:56:40 -0800
Subject: [PATCH 433/640] MAINTAINERS: changed LTP maintainership
 responsibilities

Change the LTP maintainer responsibities from 2010.

Ref: http://marc.info/?l=ltp-list&m=126502242912536&w=2

Signed-off-by : Rishikesh K Rajak <risrajak@linux.vnet.ibm.com>
Cc: Subrata Modak <subrata@linux.vnet.ibm.com>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Garrett Cooper <yanegomi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 602022d2c7a5..412eff60c33d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3411,8 +3411,10 @@ S:	Maintained
 F:	drivers/scsi/sym53c8xx_2/
 
 LTP (Linux Test Project)
-M:	Subrata Modak <subrata@linux.vnet.ibm.com>
-M:	Mike Frysinger <vapier@gentoo.org>
+M:	Rishikesh K Rajak <risrajak@linux.vnet.ibm.com>
+M:	Garrett Cooper <yanegomi@gmail.com>
+M:     Mike Frysinger <vapier@gentoo.org>
+M:     Subrata Modak <subrata@linux.vnet.ibm.com>
 L:	ltp-list@lists.sourceforge.net (subscribers-only)
 W:	http://ltp.sourceforge.net/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/galak/ltp.git

From 4cfbafd33f5ae99688ab82525a1d449c1c1b198f Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Wed, 10 Feb 2010 13:56:40 -0800
Subject: [PATCH 434/640] compat_ioctl: add compat handler for TIOCGSID ioctl

This is used by tcgetsid(3).

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index b6f23b25370e..30698a13fb22 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -936,6 +936,7 @@ COMPATIBLE_IOCTL(TCSETSF)
 COMPATIBLE_IOCTL(TIOCLINUX)
 COMPATIBLE_IOCTL(TIOCSBRK)
 COMPATIBLE_IOCTL(TIOCCBRK)
+COMPATIBLE_IOCTL(TIOCGSID)
 COMPATIBLE_IOCTL(TIOCGICOUNT)
 /* Little t */
 COMPATIBLE_IOCTL(TIOCGETD)

From 803bf5ec259941936262d10ecc84511b76a20921 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Wed, 10 Feb 2010 13:56:42 -0800
Subject: [PATCH 435/640] fs/exec.c: restrict initial stack space expansion to
 rlimit

When reserving stack space for a new process, make sure we're not
attempting to expand the stack by more than rlimit allows.

This fixes a bug caused by b6a2fea39318e43fee84fa7b0b90d68bed92d2ba ("mm:
variable length argument support") and unmasked by
fc63cf237078c86214abcb2ee9926d8ad289da9b ("exec: setup_arg_pages() fails
to return errors").

This bug means that when limiting the stack to less the 20*PAGE_SIZE (eg.
80K on 4K pages or 'ulimit -s 79') all processes will be killed before
they start.  This is particularly bad with 64K pages, where a ulimit below
1280K will kill every process.

To test, do:

  'ulimit -s 15; ls'

before and after the patch is applied.  Before it's applied, 'ls' should
be killed.  After the patch is applied, 'ls' should no longer be killed.

A stack limit of 15KB since it's small enough to trigger 20*PAGE_SIZE.
Also 15KB not a multiple of PAGE_SIZE, which is a trickier case to handle
correctly with this code.

4K pages should be fine to test with.

[kosaki.motohiro@jp.fujitsu.com: cleanup]
[akpm@linux-foundation.org: cleanup cleanup]
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 0790a107ff7e..e95c692ef0e4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -571,6 +571,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	struct vm_area_struct *prev = NULL;
 	unsigned long vm_flags;
 	unsigned long stack_base;
+	unsigned long stack_size;
+	unsigned long stack_expand;
+	unsigned long rlim_stack;
 
 #ifdef CONFIG_STACK_GROWSUP
 	/* Limit stack size to 1GB */
@@ -627,10 +630,24 @@ int setup_arg_pages(struct linux_binprm *bprm,
 			goto out_unlock;
 	}
 
+	stack_expand = EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+	stack_size = vma->vm_end - vma->vm_start;
+	/*
+	 * Align this down to a page boundary as expand_stack
+	 * will align it up.
+	 */
+	rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
+	rlim_stack = min(rlim_stack, stack_size);
 #ifdef CONFIG_STACK_GROWSUP
-	stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+	if (stack_size + stack_expand > rlim_stack)
+		stack_base = vma->vm_start + rlim_stack;
+	else
+		stack_base = vma->vm_end + stack_expand;
 #else
-	stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+	if (stack_size + stack_expand > rlim_stack)
+		stack_base = vma->vm_end - rlim_stack;
+	else
+		stack_base = vma->vm_start - stack_expand;
 #endif
 	ret = expand_stack(vma, stack_base);
 	if (ret)

From 0e5a9fb0426108d750c97c25b1ab04d3768b5aff Mon Sep 17 00:00:00 2001
From: Abhijith Das <adas@redhat.com>
Date: Fri, 5 Feb 2010 18:25:41 -0500
Subject: [PATCH 436/640] GFS2: Fix error code

We need this one-liner to signal the mount helper of the 'insufficient journals' condition.

Signed-off-by: Abhijith Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_fstype.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 8a102f731003..a86ed6381566 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -725,7 +725,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 		goto fail;
 	}
 
-	error = -EINVAL;
+	error = -EUSERS;
 	if (!gfs2_jindex_size(sdp)) {
 		fs_err(sdp, "no journals!\n");
 		goto fail_jindex;

From 07ccb7bf2c928fef4fea2cda69ba2e23479578db Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 12 Feb 2010 10:10:55 +0000
Subject: [PATCH 437/640] GFS2: Fix bmap allocation corner-case bug

This patch solves a corner case during allocation which occurs if both
metadata (indirect) and data blocks are required but there is an
obstacle in the filesystem (e.g. a resource group header or another
allocated block) such that when the allocation is requested only
enough blocks for the metadata are returned.

By changing the exit condition of this loop, we ensure that a
minimum of one data block will always be returned.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6d47379e794b..583e823307ae 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -541,7 +541,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 				*ptr++ = cpu_to_be64(bn++);
 			break;
 		}
-	} while (state != ALLOC_DATA);
+	} while ((state != ALLOC_DATA) || !dblock);
 
 	ip->i_height = height;
 	gfs2_add_inode_blocks(&ip->i_inode, alloced);

From 973e9a2795b3b41d8408a0bb6f87b783c5efc88a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 11 Feb 2010 19:20:48 +0000
Subject: [PATCH 438/640] regulator: Fix display of null constraints for
 regulators

If the regulator constraints are empty and there is no voltage
reported then nothing will be added to the text displayed for the
constraints, leading to random stack data being printed. This is
unlikely to happen for practical regulators since most will at
least report a voltage but should still be fixed.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 686ef270ecf7..b60a4c9f8f16 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -661,7 +661,7 @@ static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state)
 static void print_constraints(struct regulator_dev *rdev)
 {
 	struct regulation_constraints *constraints = rdev->constraints;
-	char buf[80];
+	char buf[80] = "";
 	int count = 0;
 	int ret;
 

From 62737d445b149eaf0beac50de8d856b5e94150be Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Fri, 12 Feb 2010 12:30:21 +0100
Subject: [PATCH 439/640] regulator/lp3971: vol_map out of bounds in
 lp3971_{ldo,dcdc}_set_voltage()

After `for (val = LDO_VOL_MIN_IDX; val <= LDO_VOL_MAX_IDX; val++)', if no break
occurs, val reaches LDO_VOL_MIN_IDX + 1, which is out of bounds for
ldo45_voltage_map[] and ldo123_voltage_map[].

Similarly BUCK_TARGET_VOL_MAX_IDX + 1 is out of bounds for buck_voltage_map[].

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/lp3971.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c
index 76d08c282f9c..4f33a0f4a179 100644
--- a/drivers/regulator/lp3971.c
+++ b/drivers/regulator/lp3971.c
@@ -183,7 +183,7 @@ static int lp3971_ldo_set_voltage(struct regulator_dev *dev,
 		if (vol_map[val] >= min_vol)
 			break;
 
-	if (vol_map[val] > max_vol)
+	if (val > LDO_VOL_MAX_IDX || vol_map[val] > max_vol)
 		return -EINVAL;
 
 	return lp3971_set_bits(lp3971, LP3971_LDO_VOL_CONTR_REG(ldo),
@@ -272,7 +272,7 @@ static int lp3971_dcdc_set_voltage(struct regulator_dev *dev,
 		if (vol_map[val] >= min_vol)
 			break;
 
-	if (vol_map[val] > max_vol)
+	if (val > BUCK_TARGET_VOL_MAX_IDX || vol_map[val] > max_vol)
 		return -EINVAL;
 
 	ret = lp3971_set_bits(lp3971, LP3971_BUCK_TARGET_VOL1_REG(buck),

From 22208ac586f2e456c49e927b90ded50e923b6aee Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 12 Feb 2010 08:17:58 -0800
Subject: [PATCH 440/640] [IA64] preserve personality flag bits across exec

In its <asm/elf.h> ia64 defines SET_PERSONALITY in a way that unconditionally
sets the personality of the current process to PER_LINUX, losing any flag bits
from the upper 3 bytes of current->personality.  This is wrong. Those bits are
intended to be inherited across exec (other code takes care of ensuring that
security sensitive bits like ADDR_NO_RANDOMIZE are not passed to unsuspecting
setuid/setgid applications).

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/include/asm/elf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
index e14108b19c09..4c41656ede87 100644
--- a/arch/ia64/include/asm/elf.h
+++ b/arch/ia64/include/asm/elf.h
@@ -201,7 +201,9 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
    relevant until we have real hardware to play with... */
 #define ELF_PLATFORM	NULL
 
-#define SET_PERSONALITY(ex)	set_personality(PER_LINUX)
+#define SET_PERSONALITY(ex)	\
+	set_personality((current->personality & ~PER_MASK) | PER_LINUX)
+
 #define elf_read_implies_exec(ex, executable_stack)					\
 	((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0)
 

From 22a8cdd60339d931d0dca54427712b2714e5ba8b Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@redhat.com>
Date: Fri, 12 Feb 2010 10:53:08 -0500
Subject: [PATCH 441/640] parisc: fix tracing of signals

Mike Frysinger pointed out that calling tracehook_signal_handler with
stepping=0 missed testing the thread flags, resulting in not calling
ptrace_notify. Fix this by testing if we're single stepping or branch
stepping and setting the flag accordingly.

Tested, seems to work.

Reported-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/parisc/kernel/signal.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index fb37ac52e46c..35c827e94e31 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -468,7 +468,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	tracehook_signal_handler(sig, info, ka, regs, 0);
+	tracehook_signal_handler(sig, info, ka, regs, 
+		test_thread_flag(TIF_SINGLESTEP) ||
+		test_thread_flag(TIF_BLOCKSTEP));
 
 	return 1;
 }

From d6d8bf549393484e906913f02fa3c9518a2819b6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 12 Feb 2010 18:17:06 +0100
Subject: [PATCH 442/640] ALSA: hda - use WARN_ON_ONCE() for zero-division
 detection

Replace the zero-division warning message with WARN_ON_ONCE() per the
advice by Linus.  This shouldn't happen, but if it happens, it's
possible that the bug happens often due to buggy IRQs.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index e767c3f395ab..3600e9cc9bc6 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1893,12 +1893,9 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev)
 
 	if (!bdl_pos_adj[chip->dev_index])
 		return 1; /* no delayed ack */
-	if (azx_dev->period_bytes == 0) {
-		printk(KERN_WARNING
-		       "hda-intel: Divide by zero was avoided "
-		       "in azx_dev->period_bytes.\n");
-		return 0;
-	}
+	if (WARN_ONCE(!azx_dev->period_bytes,
+		      "hda-intel: zero azx_dev->period_bytes"))
+		return 0; /* this shouldn't happen! */
 	if (pos % azx_dev->period_bytes > azx_dev->period_bytes / 2)
 		return 0; /* NG - it's below the period boundary */
 	return 1; /* OK, it's fine */

From 724e6d3fe8003c3f60bf404bf22e4e331327c596 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 12 Feb 2010 11:07:45 -0800
Subject: [PATCH 443/640] Linux 2.6.33-rc8

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f8e02e9491d0..12b1aa1103ee 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 33
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*

From fafaf31bf9f965d91462ee115e27ef6c262b74ea Mon Sep 17 00:00:00 2001
From: Shanyu Zhao <shanyu.zhao@intel.com>
Date: Thu, 11 Feb 2010 10:42:22 -0800
Subject: [PATCH 444/640] iwlwifi: fix AMSDU Rx after paged Rx patch

Previous patch "use paged Rx" broke AMSDU Rx functionality. If an AP
sends out A-MSDU packets the station will crash.  Fix it by linearizing
skbuff for AMSDU packet before handing it to mac80211 since mac80211
doesn't support paged skbuff.

This fixes http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2155

Reported-by: Norbert Preining <preining@logic.at>
Signed-off-by: Shanyu Zhao <shanyu.zhao@intel.com>
Acked-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-rx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c
index 6f36b6e79f5e..2dbce85404aa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
@@ -928,7 +928,10 @@ static void iwl_pass_packet_to_mac80211(struct iwl_priv *priv,
 	if (ieee80211_is_mgmt(fc) ||
 	    ieee80211_has_protected(fc) ||
 	    ieee80211_has_morefrags(fc) ||
-	    le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG)
+	    le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG ||
+	    (ieee80211_is_data_qos(fc) &&
+	     *ieee80211_get_qos_ctl(hdr) &
+	     IEEE80211_QOS_CONTROL_A_MSDU_PRESENT))
 		ret = skb_linearize(skb);
 	else
 		ret = __pskb_pull_tail(skb, min_t(u16, IWL_LINK_HDR_MAX, len)) ?

From c6b471e6454c0e1c6d756672841cbaeae7c949f8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 7 Feb 2010 17:26:30 +0000
Subject: [PATCH 445/640] inet: Remove bogus IGMPv3 report handling

Currently we treat IGMPv3 reports as if it were an IGMPv2/v1 report.
This is broken as IGMPv3 reports are formatted differently.  So we
end up suppressing a bogus multicast group (which should be harmless
as long as the leading reserved field is zero).

In fact, IGMPv3 does not allow membership report suppression so
we should simply ignore IGMPv3 membership reports as a host.

This patch does exactly that.  I kept the case statement for it
so people won't accidentally add it back thinking that we overlooked
this case.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 76c08402c933..a42f658e756a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -946,7 +946,6 @@ int igmp_rcv(struct sk_buff *skb)
 		break;
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
-	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		/* Is it our report looped back? */
 		if (skb_rtable(skb)->fl.iif == 0)
 			break;
@@ -960,6 +959,7 @@ int igmp_rcv(struct sk_buff *skb)
 		in_dev_put(in_dev);
 		return pim_rcv_v1(skb);
 #endif
+	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 	case IGMP_DVMRP:
 	case IGMP_TRACE:
 	case IGMP_HOST_LEAVE_MESSAGE:

From 5affcd6ba2036b59a4dee3f0576ae3584e92e4f1 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Fri, 12 Feb 2010 10:05:45 +0200
Subject: [PATCH 446/640] mac80211: fix handling of null-rate control in
 rate_control_get_rate

For hardware with IEEE80211_HW_HAS_RATE_CONTROL the rate controller is not
initialized. However, calling functions such as ieee80211_beacon_get result
in the rate_control_get_rate function getting called, which is accessing
(in this case uninitialized) rate control structures unconditionally.

Fix by exiting the function before setting the rates for HW with
IEEE80211_HW_HAS_RATE_CONTROL set. The initialization of the ieee80211_tx_info
struct is intentionally still executed.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Kalle Valo <kalle.valo@nokia.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index b9007f80cb92..12a2bff7dcdb 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -245,6 +245,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 		info->control.rates[i].count = 1;
 	}
 
+	if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
+		return;
+
 	if (sta && sdata->force_unicast_rateidx > -1) {
 		info->control.rates[0].idx = sdata->force_unicast_rateidx;
 	} else {

From 232486e1e9f34889424b68ee6270440b554479a2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 12 Feb 2010 12:03:45 -0800
Subject: [PATCH 447/640] sparc64: Tighten checks in kstack_valid().

The kernel stack pointer is invalid if it is not 16-byte
aligned.

Based upon a report by Meelis Roos <mroos@linux.ee>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/kstack.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h
index 4248d969272f..5247283d1c03 100644
--- a/arch/sparc/kernel/kstack.h
+++ b/arch/sparc/kernel/kstack.h
@@ -11,6 +11,10 @@ static inline bool kstack_valid(struct thread_info *tp, unsigned long sp)
 {
 	unsigned long base = (unsigned long) tp;
 
+	/* Stack pointer must be 16-byte aligned.  */
+	if (sp & (16UL - 1))
+		return false;
+
 	if (sp >= (base + sizeof(struct thread_info)) &&
 	    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
 		return true;

From 5e2a911cecc7e0fd89b1d2d001b7b89d47057ad6 Mon Sep 17 00:00:00 2001
From: Steve Hodgson <shodgson@solarflare.com>
Date: Fri, 12 Feb 2010 12:32:27 -0800
Subject: [PATCH 448/640] sfc: Fix SFE4002 initialisation

From: Steve Hodgson <shodgson@solarflare.com>

Commit 357d46a17e54c9a87e0e6ef3930ff4ab2d232b81 "sfc: QT202x: Remove
unreliable MMD check at initialisation" broke initialisation of the
SFE4002.  efx_mdio_reset_mmd() returns a positive value rather than 0
on success.  The above commit causes this value to be propagated up
by qt202x_reset_phy(), which is treated as a failure by its callers.
Change qt202x_reset_phy() to return 0 if successful.

The PCI layer treats >0 as "fail, but please call remove() anyway",
which means that unloading the driver would cause a crash.  Add a
WARN_ON() on the failure path of efx_pci_probe() to provide early
warning if there are any other cases where we do this.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sfc/efx.c        | 1 +
 drivers/net/sfc/qt202x_phy.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 103e8b0e2a0d..46997e177ee3 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -2284,6 +2284,7 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
  fail2:
 	efx_fini_struct(efx);
  fail1:
+	WARN_ON(rc > 0);
 	EFX_LOG(efx, "initialisation failed. rc=%d\n", rc);
 	free_netdev(net_dev);
 	return rc;
diff --git a/drivers/net/sfc/qt202x_phy.c b/drivers/net/sfc/qt202x_phy.c
index e0d13a451019..67eec7a6e487 100644
--- a/drivers/net/sfc/qt202x_phy.c
+++ b/drivers/net/sfc/qt202x_phy.c
@@ -320,7 +320,7 @@ static int qt202x_reset_phy(struct efx_nic *efx)
 
 	falcon_board(efx)->type->init_phy(efx);
 
-	return rc;
+	return 0;
 
  fail:
 	EFX_ERR(efx, "PHY reset timed out\n");

From 3f6fae9559225741c91f1320090b285da1413290 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 11 Feb 2010 07:43:00 +0000
Subject: [PATCH 449/640] Btrfs: btrfs_mark_extent_written uses the wrong slot

My test do: fallocate a big file and do write. The file is 512M, but
after file write is done btrfs-debug-tree shows:
item 6 key (257 EXTENT_DATA 0) itemoff 3516 itemsize 53
                extent data disk byte 1103101952 nr 536870912
                extent data offset 0 nr 399634432 ram 536870912
                extent compression 0
Looks like a regression introducted by
6c7d54ac87f338c479d9729e8392eca3f76e11e1, where we set wrong slot.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 413a30dafcda..a7fd9f3a750a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -720,13 +720,15 @@ again:
 					inode->i_ino, orig_offset);
 		BUG_ON(ret);
 	}
-	fi = btrfs_item_ptr(leaf, path->slots[0],
-			   struct btrfs_file_extent_item);
 	if (del_nr == 0) {
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+			   struct btrfs_file_extent_item);
 		btrfs_set_file_extent_type(leaf, fi,
 					   BTRFS_FILE_EXTENT_REG);
 		btrfs_mark_buffer_dirty(leaf);
 	} else {
+		fi = btrfs_item_ptr(leaf, del_slot - 1,
+			   struct btrfs_file_extent_item);
 		btrfs_set_file_extent_type(leaf, fi,
 					   BTRFS_FILE_EXTENT_REG);
 		btrfs_set_file_extent_num_bytes(leaf, fi,

From fa644298eb24ab05b32acf6cc0f2265b833280e1 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 15 Dec 2009 12:58:09 +0000
Subject: [PATCH 450/640] powerpc/85xx: Fix oops during MSI driver probe on
 MPC85xxMDS boards

MPC85xx chips report the wrong value in feature reporting register,
and that causes the following oops:

 Unable to handle kernel paging request for data at address 0x00000c00
 Faulting instruction address: 0xc0019294
 Oops: Kernel access of bad area, sig: 11 [#1]
 MPC8569 MDS
 Modules linked in:
 [...]
 NIP [c0019294] mpic_set_irq_type+0x2f0/0x368
 LR [c0019124] mpic_set_irq_type+0x180/0x368
 Call Trace:
 [ef851d60] [c0019124] mpic_set_irq_type+0x180/0x368 (unreliable)
 [ef851d90] [c007958c] __irq_set_trigger+0x44/0xd4
 [ef851db0] [c007b550] set_irq_type+0x40/0x7c
 [ef851dc0] [c0004a60] irq_create_of_mapping+0xb4/0x114
 [ef851df0] [c0004af0] irq_of_parse_and_map+0x30/0x40
 [ef851e20] [c0405678] fsl_of_msi_probe+0x1a0/0x328
 [ef851e60] [c02e6438] of_platform_device_probe+0x5c/0x84
 [...]

This is because mpic_alloc() assigns wrong values to
mpic->isu_{size,shift,mask}, and things eventually break when
_mpic_irq_read() is trying to use them.

This patch fixes the issue by enabling MPIC_BROKEN_FRR_NIRQS quirk.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/mpc85xx_mds.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 21f61b8c445b..cc29c0f5300d 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -338,7 +338,8 @@ static void __init mpc85xx_mds_pic_init(void)
 	}
 
 	mpic = mpic_alloc(np, r.start,
-			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN,
+			MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN |
+			MPIC_BROKEN_FRR_NIRQS,
 			0, 256, " OpenPIC  ");
 	BUG_ON(mpic == NULL);
 	of_node_put(np);

From d1d47ec6e62ab08d2ebb925fd9203abfad3adfbf Mon Sep 17 00:00:00 2001
From: Peter Tyser <ptyser@xes-inc.com>
Date: Fri, 18 Dec 2009 16:50:37 -0600
Subject: [PATCH 451/640] powerpc/85xx: Fix SMP when "cpu-release-addr" is in
 lowmem

Recent U-Boot commit 5ccd29c3679b3669b0bde5c501c1aa0f325a7acb caused
the "cpu-release-addr" device tree property to contain the physical RAM
location that secondary cores were spinning at.  Previously, the
"cpu-release-addr" property contained a value referencing the boot page
translation address range of 0xfffffxxx, which then indirectly accessed
RAM.

The "cpu-release-addr" is currently ioremapped and the secondary cores
kicked.  However, due to the recent change in "cpu-release-addr", it
sometimes points to a memory location in low memory that cannot be
ioremapped.  For example on a P2020-based board with 512MB of RAM the
following error occurs on bootup:

  <...>
  mpic: requesting IPIs ...
  __ioremap(): phys addr 0x1ffff000 is RAM lr c05df9a0
  Unable to handle kernel paging request for data at address 0x00000014
  Faulting instruction address: 0xc05df9b0
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2 P2020 RDB
  Modules linked in:
  <... eventual kernel panic>

Adding logic to conditionally ioremap or access memory directly resolves
the issue.

Signed-off-by: Peter Tyser <ptyser@xes-inc.com>
Signed-off-by: Nate Case <ncase@xes-inc.com>
Reported-by: Dipen Dudhat <B09055@freescale.com>
Tested-by: Dipen Dudhat <B09055@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/smp.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 04160a4cc699..a15f582300d8 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -46,6 +46,7 @@ smp_85xx_kick_cpu(int nr)
 	__iomem u32 *bptr_vaddr;
 	struct device_node *np;
 	int n = 0;
+	int ioremappable;
 
 	WARN_ON (nr < 0 || nr >= NR_CPUS);
 
@@ -59,21 +60,37 @@ smp_85xx_kick_cpu(int nr)
 		return;
 	}
 
+	/*
+	 * A secondary core could be in a spinloop in the bootpage
+	 * (0xfffff000), somewhere in highmem, or somewhere in lowmem.
+	 * The bootpage and highmem can be accessed via ioremap(), but
+	 * we need to directly access the spinloop if its in lowmem.
+	 */
+	ioremappable = *cpu_rel_addr > virt_to_phys(high_memory);
+
 	/* Map the spin table */
-	bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY);
+	if (ioremappable)
+		bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY);
+	else
+		bptr_vaddr = phys_to_virt(*cpu_rel_addr);
 
 	local_irq_save(flags);
 
 	out_be32(bptr_vaddr + BOOT_ENTRY_PIR, nr);
 	out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start));
 
+	if (!ioremappable)
+		flush_dcache_range((ulong)bptr_vaddr,
+				(ulong)(bptr_vaddr + SIZE_BOOT_ENTRY));
+
 	/* Wait a bit for the CPU to ack. */
 	while ((__secondary_hold_acknowledge != nr) && (++n < 1000))
 		mdelay(1);
 
 	local_irq_restore(flags);
 
-	iounmap(bptr_vaddr);
+	if (ioremappable)
+		iounmap(bptr_vaddr);
 
 	pr_debug("waited %d msecs for CPU #%d.\n", n, nr);
 }

From a9bb18f36c8056f0712fb28c52c0f85d98438dfb Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 10 Feb 2010 17:23:47 +0100
Subject: [PATCH 452/640] tracing/kprobes: Fix probe parsing

Trying to add a probe like:

  echo p:myprobe 0x10000 > /sys/kernel/debug/tracing/kprobe_events

will fail since the wrong pointer is passed to strict_strtoul
when trying to convert the address to an unsigned long.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100210162346.GA6933@osiris.boeblingen.de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_kprobe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0e2c96..50b1b8239806 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv)
 			return -EINVAL;
 		}
 		/* an address specified */
-		ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
+		ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
 		if (ret) {
 			pr_info("Failed to parse address.\n");
 			return ret;

From 7f51a100bba517196ac4bdf29408d20ee1c771e8 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 8 Feb 2010 08:30:03 +0100
Subject: [PATCH 453/640] firewire: ohci: retransmit isochronous transmit
 packets on cycle loss

In isochronous transmit DMA descriptors, link the skip address pointer
back to the descriptor itself.  When a cycle is lost, the controller
will send the packet in the next cycle, instead of terminating the
entire DMA program.

There are two reasons for this:

* This behaviour is compatible with the old IEEE1394 stack.  Old
  applications would not expect the DMA program to stop in this case.

* Since the OHCI driver does not report any uncompleted packets, the
  context would stop silently; clients would not have any chance to
  detect and handle this error without a watchdog timer.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>

Pieter Palmers notes:

"The reason I added this retry behavior to the old stack is because some
cards now and then fail to send a packet (e.g. the o2micro card in my
dell laptop).  I couldn't figure out why exactly this happens, my best
guess is that the card cannot fetch the payload data on time.  This
happens much more frequently when sending large packets, which leads me
to suspect that there are some contention issues with the DMA that fills
the transmit FIFO.

In the old stack it was a pretty critical issue as it resulted in a
freeze of the userspace application.

The omission of a packet doesn't necessarily have to be an issue.  E.g.
in IEC61883 streams the DBC field can be used to detect discontinuities
in the stream.  So as long as the other side doesn't bail when no
[packet] is present in a cycle, there is not really a problem.

I'm not convinced though that retrying is the proper solution, but it is
simple and effective for what it had to do.  And I think there are no
reasons not to do it this way.  Userspace can still detect this by
checking the cycle the descriptor was sent in."

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (changelog, comment)
---
 drivers/firewire/ohci.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 2345d4103fe6..43ebf337b131 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2101,11 +2101,6 @@ static int ohci_queue_iso_transmit(struct fw_iso_context *base,
 	u32 payload_index, payload_end_index, next_page_index;
 	int page, end_page, i, length, offset;
 
-	/*
-	 * FIXME: Cycle lost behavior should be configurable: lose
-	 * packet, retransmit or terminate..
-	 */
-
 	p = packet;
 	payload_index = payload;
 
@@ -2135,6 +2130,14 @@ static int ohci_queue_iso_transmit(struct fw_iso_context *base,
 	if (!p->skip) {
 		d[0].control   = cpu_to_le16(DESCRIPTOR_KEY_IMMEDIATE);
 		d[0].req_count = cpu_to_le16(8);
+		/*
+		 * Link the skip address to this descriptor itself.  This causes
+		 * a context to skip a cycle whenever lost cycles or FIFO
+		 * overruns occur, without dropping the data.  The application
+		 * should then decide whether this is an error condition or not.
+		 * FIXME:  Make the context's cycle-lost behaviour configurable?
+		 */
+		d[0].branch_address = cpu_to_le32(d_bus | z);
 
 		header = (__le32 *) &d[1];
 		header[0] = cpu_to_le32(IT_HEADER_SY(p->sy) |

From 175359f89df39f4faed663c8cfd6ee0222d2fa1e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 11 Feb 2010 13:13:10 +0100
Subject: [PATCH 454/640] reiserfs: Fix softlockup while waiting on an inode

When we wait for an inode through reiserfs_iget(), we hold
the reiserfs lock. And waiting for an inode may imply waiting
for its writeback. But the inode writeback path may also require
the reiserfs lock, which leads to a deadlock.

We just need to release the reiserfs lock from reiserfs_iget()
to fix this.

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Tested-by: Christian Kujau <lists@nerdbynature.de>
Cc: Chris Mason <chris.mason@oracle.com>
---
 fs/reiserfs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9087b10209e6..2df0f5c7c60b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1497,9 +1497,11 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
 
 	args.objectid = key->on_disk_key.k_objectid;
 	args.dirid = key->on_disk_key.k_dir_id;
+	reiserfs_write_unlock(s);
 	inode = iget5_locked(s, key->on_disk_key.k_objectid,
 			     reiserfs_find_actor, reiserfs_init_locked_inode,
 			     (void *)(&args));
+	reiserfs_write_lock(s);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 

From 775c67090c98780b44c4f95d5c05565df715b8bd Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Fri, 12 Feb 2010 00:17:59 +0100
Subject: [PATCH 455/640] drm: Fix a bug in the range manager.

When searching for free space in a range, the function could return a node extending outside of the given range.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index cdec32977129..2ac074c8f5d2 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -405,7 +405,8 @@ struct drm_mm_node *drm_mm_search_free_in_range(const struct drm_mm *mm,
 				wasted += alignment - tmp;
 		}
 
-		if (entry->size >= size + wasted) {
+		if (entry->size >= size + wasted &&
+		    (entry->start + wasted + size) <= end) {
 			if (!best_match)
 				return entry;
 			if (entry->size < best_size) {

From e22238ea37a870f70e34668a4992bde0c92bba8d Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Fri, 12 Feb 2010 00:18:00 +0100
Subject: [PATCH 456/640] drm/ttm: Fix a bug occuring when validating a buffer
 object in a range.

If the buffer object was already in the requested memory type, but
outside of the requested range it was never moved into the requested range.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1a3e909b7bba..c7320ce4567d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1020,6 +1020,12 @@ static int ttm_bo_mem_compat(struct ttm_placement *placement,
 			     struct ttm_mem_reg *mem)
 {
 	int i;
+	struct drm_mm_node *node = mem->mm_node;
+
+	if (node && placement->lpfn != 0 &&
+	    (node->start < placement->fpfn ||
+	     node->start + node->size > placement->lpfn))
+		return -1;
 
 	for (i = 0; i < placement->num_placement; i++) {
 		if ((placement->placement[i] & mem->placement &

From ce36f00d599e0f988c2a1b7b276d9184ee9c5d82 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sat, 13 Feb 2010 20:20:19 -0500
Subject: [PATCH 457/640] drm/radeon/kms/atom: use get_unaligned_le32() for
 ctx->ps

Noticed on a DEC Alpha.

Start up into console mode caused 15 unaligned accesses, and starting X
caused another 48.

Signed-off-by: Matt Turner <mattst88@gmail.com>
CC: Jerome Glisse <jglisse@redhat.com>
CC: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/atom.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index e3b44562d265..2a3df5599ab4 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -24,6 +24,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <asm/unaligned.h>
 
 #define ATOM_DEBUG
 
@@ -212,7 +213,9 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
 	case ATOM_ARG_PS:
 		idx = U8(*ptr);
 		(*ptr)++;
-		val = le32_to_cpu(ctx->ps[idx]);
+		/* get_unaligned_le32 avoids unaligned accesses from atombios
+		 * tables, noticed on a DEC Alpha. */
+		val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
 		if (print)
 			DEBUG("PS[0x%02X,0x%04X]", idx, val);
 		break;

From 4b505db9c4c72dbd2a8e66b8d681640101325af6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 15 Feb 2010 14:17:45 +0900
Subject: [PATCH 458/640] sh64: fix tracing of signals.

This follows the parisc change to ensure that tracehook_signal_handler()
is aware of when we are single-stepping in order to ptrace_notify()
appropriately. While this was implemented for 32-bit SH, sh64 neglected
to make use of TIF_SINGLESTEP when it was folded in with the 32-bit code,
resulting in ptrace_notify() never being called.

As sh64 uses all of the other abstractions already, this simply plugs in
the thread flag in the appropriate enable/disable paths and fixes up the
tracehook notification accordingly. With this in place, sh64 is brought
in line with what 32-bit is already doing.

Reported-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/ptrace_64.c | 11 +++++++++--
 arch/sh/kernel/signal_64.c |  4 +++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index 873ebdc4f98e..b063eb8b18e3 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -133,6 +133,8 @@ void user_enable_single_step(struct task_struct *child)
 	struct pt_regs *regs = child->thread.uregs;
 
 	regs->sr |= SR_SSTEP;	/* auto-resetting upon exception */
+
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 }
 
 void user_disable_single_step(struct task_struct *child)
@@ -140,6 +142,8 @@ void user_disable_single_step(struct task_struct *child)
 	struct pt_regs *regs = child->thread.uregs;
 
 	regs->sr &= ~SR_SSTEP;
+
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
 }
 
 static int genregs_get(struct task_struct *target,
@@ -454,6 +458,8 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 
 asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
+	int step;
+
 	if (unlikely(current->audit_context))
 		audit_syscall_exit(AUDITSC_RESULT(regs->regs[9]),
 				   regs->regs[9]);
@@ -461,8 +467,9 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->regs[9]);
 
-	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		tracehook_report_syscall_exit(regs, 0);
+	step = test_thread_flag(TIF_SINGLESTEP);
+	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(regs, step);
 }
 
 /* Called with interrupts disabled */
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index ce76dbdef294..580e97d46ca5 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -118,7 +118,9 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset)
 			 * clear the TS_RESTORE_SIGMASK flag.
 			 */
 			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-			tracehook_signal_handler(signr, &info, &ka, regs, 0);
+
+			tracehook_signal_handler(signr, &info, &ka, regs,
+					test_thread_flag(TIF_SINGLESTEP));
 			return 1;
 		}
 	}

From e803e8b2628f3e9a42f45c5b7bb1f9821b08352c Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 15 Feb 2010 15:24:48 +1000
Subject: [PATCH 459/640] drm/radeon/kms: make sure retry count increases.

In testing I've never seen it go past 1 retry anyways but better
safe than sorry.

Reported by Droste on irc.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/atombios_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index b32eeea5bb8b..99915a682d59 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -350,7 +350,7 @@ retry:
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 
 	if (args.ucReplyStatus && !args.ucDataOutLen) {
-		if (args.ucReplyStatus == 0x20 && retry_count < 10)
+		if (args.ucReplyStatus == 0x20 && retry_count++ < 10)
 			goto retry;
 		DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x after %d retries\n",
 			  req_bytes[1], req_bytes[0], req_bytes[2], req_bytes[3],

From 0a27fcfaaf61108d94f0377f91bed81b2dd35f52 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 15 Feb 2010 17:05:28 +0100
Subject: [PATCH 460/640] ALSA: hda - Correct ASUA blacklist for MSI brokenness

The MSI blacklist entry for ASUS mobo added in the commit
8ce28d6abff34886d3797b25324c940471b99164 was based on the alsa-info
output wrongly posted.  Fix the id to the right one now.

Reported-by: Sid Boyce <sboyce@blueyonder.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 3600e9cc9bc6..ff6da6f386d1 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2350,7 +2350,7 @@ static void __devinit check_probe_mask(struct azx *chip, int dev)
  */
 static struct snd_pci_quirk msi_black_list[] __devinitdata = {
 	SND_PCI_QUIRK(0x1043, 0x81f2, "ASUS", 0), /* Athlon64 X2 + nvidia */
-	SND_PCI_QUIRK(0x1043, 0x829c, "ASUS", 0), /* nvidia */
+	SND_PCI_QUIRK(0x1043, 0x81f6, "ASUS", 0), /* nvidia */
 	{}
 };
 

From d9184fa97b6f48d399636e5e2669bc8419f9369e Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 16 Feb 2010 11:14:14 +1000
Subject: [PATCH 461/640] drm/nouveau: use mutex for vbios lock

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bios.c | 7 +++----
 drivers/gpu/drm/nouveau/nouveau_bios.h | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 2cd0fad17dac..0e9cd1d49130 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -5861,13 +5861,12 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nvbios *bios = &dev_priv->VBIOS;
 	struct init_exec iexec = { true, false };
-	unsigned long flags;
 
-	spin_lock_irqsave(&bios->lock, flags);
+	mutex_lock(&bios->lock);
 	bios->display.output = dcbent;
 	parse_init_table(bios, table, &iexec);
 	bios->display.output = NULL;
-	spin_unlock_irqrestore(&bios->lock, flags);
+	mutex_unlock(&bios->lock);
 }
 
 static bool NVInitVBIOS(struct drm_device *dev)
@@ -5876,7 +5875,7 @@ static bool NVInitVBIOS(struct drm_device *dev)
 	struct nvbios *bios = &dev_priv->VBIOS;
 
 	memset(bios, 0, sizeof(struct nvbios));
-	spin_lock_init(&bios->lock);
+	mutex_init(&bios->lock);
 	bios->dev = dev;
 
 	if (!NVShadowVBIOS(dev, bios->data))
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 68446fd4146b..fd94bd6dc264 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -205,7 +205,7 @@ struct nvbios {
 	struct drm_device *dev;
 	struct nouveau_bios_info pub;
 
-	spinlock_t lock;
+	struct mutex lock;
 
 	uint8_t data[NV_PROM_SIZE];
 	unsigned int length;

From bf929efa56ac174bf6d4f54cd6fe811181a51ae5 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 11 Feb 2010 12:47:40 +0100
Subject: [PATCH 462/640] drm/nouveau: Force TV encoder DPMS reinit after
 resume.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv17_tv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
index 58b917c3341b..21ac6e49b6ee 100644
--- a/drivers/gpu/drm/nouveau/nv17_tv.c
+++ b/drivers/gpu/drm/nouveau/nv17_tv.c
@@ -579,6 +579,8 @@ static void nv17_tv_restore(struct drm_encoder *encoder)
 				nouveau_encoder(encoder)->restore.output);
 
 	nv17_tv_state_load(dev, &to_tv_enc(encoder)->saved_state);
+
+	nouveau_encoder(encoder)->last_dpms = NV_DPMS_CLEARED;
 }
 
 static int nv17_tv_create_resources(struct drm_encoder *encoder,

From 65d269538a1129495ac45a14a777cd11cfe881d8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 15 Feb 2010 12:19:53 -0500
Subject: [PATCH 463/640] NFS: Too many GETATTR and ACCESS calls after direct
 I/O

The cached read and write paths initialize fattr->time_start in their
setup procedures.  The value of fattr->time_start is propagated to
read_cache_jiffies by nfs_update_inode().  Subsequent calls to
nfs_attribute_timeout() will then use a good time stamp when
computing the attribute cache timeout, and squelch unneeded GETATTR
calls.

Since the direct I/O paths erroneously leave the inode's
fattr->time_start field set to zero, read_cache_jiffies for that inode
is set to zero after any direct read or write operation.  This
triggers an otw GETATTR or ACCESS call to update the file's attribute
and access caches properly, even when the NFS READ or WRITE replies
have usable post-op attributes.

Make sure the direct read and write setup code performs the same fattr
initialization as the cached I/O paths to prevent unnecessary GETATTR
calls.

This was likely introduced by commit 0e574af1 in 2.6.15, which appears
to add new nfs_fattr_init() call sites in the cached read and write
paths, but not in the equivalent places in fs/nfs/direct.c.  A
subsequent commit in the same series, 33801147, introduces the
fattr->time_start field.

Interestingly, the direct write reschedule path already has a call to
nfs_fattr_init() in the right place.

Reported-by: Quentin Barnes <qbarnes@yahoo-inc.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@kernel.org
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/direct.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e1d415e97849..0d289823e856 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->res.fattr = &data->fattr;
 		data->res.eof = 0;
 		data->res.count = bytes;
+		nfs_fattr_init(&data->fattr);
 		msg.rpc_argp = &data->args;
 		msg.rpc_resp = &data->res;
 
@@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
+	nfs_fattr_init(&data->fattr);
 
 	NFS_PROTO(data->inode)->commit_setup(data, &msg);
 
@@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->res.fattr = &data->fattr;
 		data->res.count = bytes;
 		data->res.verf = &data->verf;
+		nfs_fattr_init(&data->fattr);
 
 		task_setup_data.task = &data->task;
 		task_setup_data.callback_data = data;

From a626e8478b18de4fdee0e6d13975cea2b23efea5 Mon Sep 17 00:00:00 2001
From: Don Skidmore <donald.c.skidmore@intel.com>
Date: Thu, 11 Feb 2010 04:13:49 +0000
Subject: [PATCH 464/640] ixgbe: Fix - Do not allow Rx FC on 82598 at 1G due to
 errata

The 82598 has an erratum that receipt of pause frames at 1G
could lead to a Tx Hang.  To avoid this this patch disables
Rx FC while at 1G speed for all 82598 parts.

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_82598.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_82598.c b/drivers/net/ixgbe/ixgbe_82598.c
index 3103f4165311..35a06b47587b 100644
--- a/drivers/net/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ixgbe/ixgbe_82598.c
@@ -357,12 +357,34 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num)
 	u32 fctrl_reg;
 	u32 rmcs_reg;
 	u32 reg;
+	u32 link_speed = 0;
+	bool link_up;
 
 #ifdef CONFIG_DCB
 	if (hw->fc.requested_mode == ixgbe_fc_pfc)
 		goto out;
 
 #endif /* CONFIG_DCB */
+	/*
+	 * On 82598 having Rx FC on causes resets while doing 1G
+	 * so if it's on turn it off once we know link_speed. For
+	 * more details see 82598 Specification update.
+	 */
+	hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+	if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) {
+		switch (hw->fc.requested_mode) {
+		case ixgbe_fc_full:
+			hw->fc.requested_mode = ixgbe_fc_tx_pause;
+			break;
+		case ixgbe_fc_rx_pause:
+			hw->fc.requested_mode = ixgbe_fc_none;
+			break;
+		default:
+			/* no change */
+			break;
+		}
+	}
+
 	/* Negotiate the fc mode to use */
 	ret_val = ixgbe_fc_autoneg(hw);
 	if (ret_val)

From e86bff0edaa514a63ecd80e1ed2b3472b2507880 Mon Sep 17 00:00:00 2001
From: Don Skidmore <donald.c.skidmore@intel.com>
Date: Thu, 11 Feb 2010 04:14:08 +0000
Subject: [PATCH 465/640] ixgbe: fix WOL register setup for 82599

We need to have the WUS register set to all 1's in order for the hardware
to be capable of ever waking up.  Set it here in the ixgbe_probe().

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 7b7c8486c0bf..951b73cf5ca2 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5763,6 +5763,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_sw_init;
 
+	/* Make it possible the adapter to be woken up via WOL */
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB)
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
+
 	/*
 	 * If there is a fan on this device and it has failed log the
 	 * failure.

From f04d5e012d73ea441bd39804ace39fd6d1ce5611 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 2 Feb 2010 14:37:58 -0800
Subject: [PATCH 466/640] thinkpad-acpi: wrong thermal attribute_group removed
 in thermal_exit()

sysfs_remove_group() removed the wrong attribute_group for
thermal_read_mode TPEC_8, ACPI_TMP07 and ACPI_UPDT

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Acked-by: Henrique de Moraes Holschuh <ibm-acpi@hmh.eng.br>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index e67e4feb35cb..eb603f1d55ca 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -5771,7 +5771,7 @@ static void thermal_exit(void)
 	case TPACPI_THERMAL_ACPI_TMP07:
 	case TPACPI_THERMAL_ACPI_UPDT:
 		sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
-				   &thermal_temp_input16_group);
+				   &thermal_temp_input8_group);
 		break;
 	case TPACPI_THERMAL_NONE:
 	default:

From 97c169d39b6846a564dc8d883832e7fef9bdb77d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 16 Feb 2010 03:30:06 -0500
Subject: [PATCH 467/640] ACPI: remove Asus P2B-DS from acpi=ht blacklist

We realized when we broke acpi=ht
http://bugzilla.kernel.org/show_bug.cgi?id=14886
that acpi=ht is not needed on this box
and folks have been using acpi=force on it anyway.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 0acbcdfa5ca4..af1c5833ff23 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1342,14 +1342,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
 		     DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
 		     },
 	 },
-	{
-	 .callback = force_acpi_ht,
-	 .ident = "ASUS P2B-DS",
-	 .matches = {
-		     DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
-		     DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
-		     },
-	 },
 	{
 	 .callback = force_acpi_ht,
 	 .ident = "ASUS CUR-DLS",

From 49c6fb2e41d41c4c0c5c753b6960bc81fe658d20 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Mon, 1 Feb 2010 10:35:18 -0700
Subject: [PATCH 468/640] ACPI: dock: properly initialize local struct
 dock_station in dock_add()

Commit fe06fba2 (ACPI: dock: add struct dock_station * directly
to platform device data) changed dock_add() to use the
platform_device_register_data() API.

We passed that interface a stack variable, which is kmemdup'ed
and assigned to the device's platform_data pointer.

Unfortunately, whatever random garbage is in the stack variable
gets coped during the kmemdup, and that leads to broken behavior.

Explicitly zero out the structure before passing it to the API.

This fixes the T41 docking button issue:
http://bugzilla.kernel.org/show_bug.cgi?id=15000

Cc: stable@kernel.org
Reported-by: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/dock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index bbc2c1315c47..b2586f57e1f5 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -935,6 +935,7 @@ static int dock_add(acpi_handle handle)
 	struct platform_device *dd;
 
 	id = dock_station_count;
+	memset(&ds, 0, sizeof(ds));
 	dd = platform_device_register_data(NULL, "dock", id, &ds, sizeof(ds));
 	if (IS_ERR(dd))
 		return PTR_ERR(dd);

From 370d5cd88509b93b76eb2f5f97efbd71c25061cb Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 27 Jan 2010 15:25:39 -0800
Subject: [PATCH 469/640] ACPI: fix High cpu temperature with 2.6.32

Since the rewrite of the CPU idle governor in 2.6.32, two laptops have
surfaced where the BIOS advertises a C2 power state, but for some reason
this state is not functioning (as verified in both cases by powertop
before the patch in .32).

The old governor had the accidental behavior that if a non-working state
was chosen too many times, it would end up falling back to C1.  The new
governor works differently and this accidental behavior is no longer
there; the result is a high temperature on these two machines.

This patch adds these 2 machines to the DMI table for C state anomalies;
by just not using C2 both these machines are better off (the TSC can be
used instead of the pm timer, giving a performance boost for example).

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14742

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Reported-by: <akwatts@ymail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_idle.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 7c0441f63b39..e88e8ae04fdb 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -110,6 +110,14 @@ static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = {
 	  DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
 	  DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
 	 (void *)2},
+	{ set_max_cstate, "Pavilion zv5000", {
+	  DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+	  DMI_MATCH(DMI_PRODUCT_NAME,"Pavilion zv5000 (DS502A#ABA)")},
+	 (void *)1},
+	{ set_max_cstate, "Asus L8400B", {
+	  DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
+	  DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")},
+	 (void *)1},
 	{},
 };
 

From 1379d2fef0ec07c7027a5e89036025ce761470c8 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 16 Feb 2010 04:16:55 -0500
Subject: [PATCH 470/640] ACPI, i915: blacklist Clevo M5x0N bad_lid state

Wrong Lid state reported.
Need to blacklist this machine for LVDS detection.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/gpu/drm/i915/intel_lvds.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index b1d0acbae4e4..c2e8a45780d5 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -636,6 +636,13 @@ static const struct dmi_system_id bad_lid_status[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "PC-81005"),
 		},
 	},
+	{
+		.ident = "Clevo M5x0N",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "CLEVO Co."),
+			DMI_MATCH(DMI_BOARD_NAME, "M5x0N"),
+		},
+	},
 	{ }
 };
 

From 1252f238db48ec419f40c1bdf30fda649860eed9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 16 Feb 2010 15:02:13 +0100
Subject: [PATCH 471/640] x86: set_personality_ia32() misses
 force_personality32

05d43ed8a "x86: get rid of the insane TIF_ABI_PENDING bit" forgot about
force_personality32.  Fix.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/process_64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 41a26a82470a..126f0b493d04 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -527,6 +527,7 @@ void set_personality_ia32(void)
 
 	/* Make sure to be in 32bit mode */
 	set_thread_flag(TIF_IA32);
+	current->personality |= force_personality32;
 
 	/* Prepare the first "return" to user space */
 	current_thread_info()->status |= TS_COMPAT;

From 11557b24fdec13cb1c3d5f681688401a651ed54e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 16 Feb 2010 15:24:01 +0100
Subject: [PATCH 472/640] x86: ELF_PLAT_INIT() shouldn't worry about TIF_IA32

The 64-bit version of ELF_PLAT_INIT() clears TIF_IA32, but at this point
it has already been cleared by SET_PERSONALITY == set_personality_64bit.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/elf.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1994d3f58443..f2ad2163109d 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t,
 }
 
 #define ELF_PLAT_INIT(_r, load_addr)			\
-do {							\
-	elf_common_init(&current->thread, _r, 0);	\
-	clear_thread_flag(TIF_IA32);			\
-} while (0)
+	elf_common_init(&current->thread, _r, 0)
 
 #define	COMPAT_ELF_PLAT_INIT(regs, load_addr)		\
 	elf_common_init(&current->thread, regs, __USER_DS)

From 781248c1b50c776a9ef4be1130f84ced1cba42fe Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Tue, 16 Feb 2010 18:42:47 +0000
Subject: [PATCH 473/640] dm stripe: avoid divide by zero with invalid stripe
 count

If a table containing zero as stripe count is passed into stripe_ctr
the code attempts to divide by zero.

This patch changes DM_TABLE_LOAD to return -EINVAL if the stripe count
is zero.

We now get the following error messages:
  device-mapper: table: 253:0: striped: Invalid stripe count
  device-mapper: ioctl: error adding target to table

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Cc: stable@kernel.org
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-stripe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index e0efc1adcaff..bd58703ee8f6 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -110,7 +110,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	stripes = simple_strtoul(argv[0], &end, 10);
-	if (*end) {
+	if (!stripes || *end) {
 		ti->error = "Invalid stripe count";
 		return -EINVAL;
 	}

From 55f67f2dedec1e3049abc30b6d82b999a14cafb7 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 16 Feb 2010 18:42:51 +0000
Subject: [PATCH 474/640] dm snapshot: persistent annotate work_queue as on
 stack

chunk_io() declares its 'struct mdata_req' on the stack and then
initializes its 'struct work_struct' member.  Annotate the
initialization of this workqueue with INIT_WORK_ON_STACK to suppress a
debugobjects warning seen when CONFIG_DEBUG_OBJECTS_WORK is enabled.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap-persistent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 7d08879689ac..c097d8a4823d 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -254,7 +254,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
 	 * Issue the synchronous I/O from a different thread
 	 * to avoid generic_make_request recursion.
 	 */
-	INIT_WORK(&req.work, do_metadata);
+	INIT_WORK_ON_STACK(&req.work, do_metadata);
 	queue_work(ps->metadata_wq, &req.work);
 	flush_workqueue(ps->metadata_wq);
 

From ebfd32bba9b518d684009d9d21a56742337ca1b3 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Tue, 16 Feb 2010 18:42:53 +0000
Subject: [PATCH 475/640] dm log: userspace fix overhead_size calcuations

This patch fixes two bugs that revolve around the miscalculation and
misuse of the variable 'overhead_size'.  'overhead_size' is the size of
the various header structures used during communication.

The first bug is the use of 'sizeof' with the pointer of a structure
instead of the structure itself - resulting in the wrong size being
computed.  This is then used in a check to see if the payload
(data_size) would be to large for the preallocated structure.  Since the
bug produces a smaller value for the overhead, it was possible for the
structure to be breached.  (Although the current users of the code do
not currently send enough data to trigger this bug.)

The second bug is that the 'overhead_size' value is used to compute how
much of the preallocated space should be cleared before populating it
with fresh data.  This should have simply been 'sizeof(struct cn_msg)'
not overhead_size.  The fact that 'overhead_size' was computed
incorrectly made this problem "less bad" - leaving only a pointer's
worth of space at the end uncleared.  Thus, this bug was never producing
a bad result, but still needs to be fixed - especially now that the
value is computed correctly.

Cc: stable@kernel.org
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log-userspace-transfer.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 54abf9e303b7..f1c8cae70b4b 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -172,11 +172,15 @@ int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
 {
 	int r = 0;
 	size_t dummy = 0;
-	int overhead_size =
-		sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg);
+	int overhead_size = sizeof(struct dm_ulog_request) + sizeof(struct cn_msg);
 	struct dm_ulog_request *tfr = prealloced_ulog_tfr;
 	struct receiving_pkg pkg;
 
+	/*
+	 * Given the space needed to hold the 'struct cn_msg' and
+	 * 'struct dm_ulog_request' - do we have enough payload
+	 * space remaining?
+	 */
 	if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) {
 		DMINFO("Size of tfr exceeds preallocated size");
 		return -EINVAL;
@@ -191,7 +195,7 @@ resend:
 	 */
 	mutex_lock(&dm_ulog_lock);
 
-	memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
+	memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg));
 	memcpy(tfr->uuid, uuid, DM_UUID_LEN);
 	tfr->luid = luid;
 	tfr->seq = dm_ulog_seq++;

From 5528d17de1cf1462f285c40ccaf8e0d0e4c64dc0 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 16 Feb 2010 18:42:55 +0000
Subject: [PATCH 476/640] dm raid1: fail writes if errors are not handled and
 log fails

If the mirror log fails when the handle_errors option was not selected
and there is no remaining valid mirror leg, writes return success even
though they weren't actually written to any device.  This patch
completes them with EIO instead.

This code path is taken:
do_writes:
	bio_list_merge(&ms->failures, &sync);
do_failures:
	if (!get_valid_mirror(ms)) (false)
	else if (errors_handled(ms)) (false)
	else bio_endio(bio, 0);

The logic in do_failures is based on presuming that the write was already
tried: if it succeeded at least on one leg (without handle_errors) it
is reported as success.

Reference: https://bugzilla.redhat.com/show_bug.cgi?id=555197

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-raid1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ad779bd13aec..6c1046df81f6 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -724,7 +724,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 	/*
 	 * Dispatch io.
 	 */
-	if (unlikely(ms->log_failure)) {
+	if (unlikely(ms->log_failure) && errors_handled(ms)) {
 		spin_lock_irq(&ms->lock);
 		bio_list_merge(&ms->failures, &sync);
 		spin_unlock_irq(&ms->lock);

From 558569aa9d83e016295bac77d900342908d7fd85 Mon Sep 17 00:00:00 2001
From: Takahiro Yasui <tyasui@redhat.com>
Date: Tue, 16 Feb 2010 18:42:58 +0000
Subject: [PATCH 477/640] dm raid1: fix null pointer dereference in suspend

When suspending a failed mirror, bios are completed by mirror_end_io() and
__rh_lookup() in dm_rh_dec() returns NULL where a non-NULL return value is
required by design.  Fix this by not changing the state of the recovery failed
region from DM_RH_RECOVERING to DM_RH_NOSYNC in dm_rh_recovery_end().

Issue

On 2.6.33-rc1 kernel, I hit the bug when I suspended the failed
mirror by dmsetup command.

BUG: unable to handle kernel NULL pointer dereference at 00000020
IP: [<f94f38e2>] dm_rh_dec+0x35/0xa1 [dm_region_hash]
...
EIP: 0060:[<f94f38e2>] EFLAGS: 00010046 CPU: 0
EIP is at dm_rh_dec+0x35/0xa1 [dm_region_hash]
EAX: 00000286 EBX: 00000000 ECX: 00000286 EDX: 00000000
ESI: eff79eac EDI: eff79e80 EBP: f6915cd4 ESP: f6915cc4
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process dmsetup (pid: 2849, ti=f6914000 task=eff03e80 task.ti=f6914000)
 ...
Call Trace:
 [<f9530af6>] ? mirror_end_io+0x53/0x1b1 [dm_mirror]
 [<f9413104>] ? clone_endio+0x4d/0xa2 [dm_mod]
 [<f9530aa3>] ? mirror_end_io+0x0/0x1b1 [dm_mirror]
 [<f94130b7>] ? clone_endio+0x0/0xa2 [dm_mod]
 [<c02d6bcb>] ? bio_endio+0x28/0x2b
 [<f952f303>] ? hold_bio+0x2d/0x62 [dm_mirror]
 [<f952f942>] ? mirror_presuspend+0xeb/0xf7 [dm_mirror]
 [<c02aa3e2>] ? vmap_page_range+0xb/0xd
 [<f9414c8d>] ? suspend_targets+0x2d/0x3b [dm_mod]
 [<f9414ca9>] ? dm_table_presuspend_targets+0xe/0x10 [dm_mod]
 [<f941456f>] ? dm_suspend+0x4d/0x150 [dm_mod]
 [<f941767d>] ? dev_suspend+0x55/0x18a [dm_mod]
 [<c0343762>] ? _copy_from_user+0x42/0x56
 [<f9417fb0>] ? dm_ctl_ioctl+0x22c/0x281 [dm_mod]
 [<f9417628>] ? dev_suspend+0x0/0x18a [dm_mod]
 [<f9417d84>] ? dm_ctl_ioctl+0x0/0x281 [dm_mod]
 [<c02c3c4b>] ? vfs_ioctl+0x22/0x85
 [<c02c422c>] ? do_vfs_ioctl+0x4cb/0x516
 [<c02c42b7>] ? sys_ioctl+0x40/0x5a
 [<c0202858>] ? sysenter_do_call+0x12/0x28

Analysis

When recovery process of a region failed, dm_rh_recovery_end() function
changes the state of the region from RM_RH_RECOVERING to DM_RH_NOSYNC.
When recovery_complete() is executed between dm_rh_update_states() and
dm_writes() in do_mirror(), bios are processed with the region state,
DM_RH_NOSYNC. However, the region data is freed without checking its
pending count when dm_rh_update_states() is called next time.

When bios are finished by mirror_end_io(), __rh_lookup() in dm_rh_dec()
returns NULL even though a valid return value are expected.

Solution

Remove the state change of the recovery failed region from DM_RH_RECOVERING
to DM_RH_NOSYNC in dm_rh_recovery_end(). We can remove the state change
because:

  - If the region data has been released by dm_rh_update_states(),
    a new region data is created with the state of DM_RH_NOSYNC, and
    bios are processed according to the DM_RH_NOSYNC state.

  - If the region data has not been released by dm_rh_update_states(),
    a state of the region is DM_RH_RECOVERING and bios are put in the
    delayed_bio list.

The flag change from DM_RH_RECOVERING to DM_RH_NOSYNC in dm_rh_recovery_end()
was added in the following commit:
  dm raid1: handle resync failures
  author  Jonathan Brassow <jbrassow@redhat.com>
    Thu, 12 Jul 2007 16:29:04 +0000 (17:29 +0100)
  http://git.kernel.org/linus/f44db678edcc6f4c2779ac43f63f0b9dfa28b724

Signed-off-by: Takahiro Yasui <tyasui@redhat.com>
Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-region-hash.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 5f19ceb6fe91..168bd38f5006 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -660,10 +660,9 @@ void dm_rh_recovery_end(struct dm_region *reg, int success)
 	spin_lock_irq(&rh->region_lock);
 	if (success)
 		list_add(&reg->list, &reg->rh->recovered_regions);
-	else {
-		reg->state = DM_RH_NOSYNC;
+	else
 		list_add(&reg->list, &reg->rh->failed_recovered_regions);
-	}
+
 	spin_unlock_irq(&rh->region_lock);
 
 	rh->wakeup_workers(rh->context);

From 9eef87da2a8ea4920e0d913ff977cac064b68ee0 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Tue, 16 Feb 2010 18:43:01 +0000
Subject: [PATCH 478/640] dm mpath: fix stall when requeueing io

This patch fixes the problem that system may stall if target's ->map_rq
returns DM_MAPIO_REQUEUE in map_request().
E.g. stall happens on 1 CPU box when a dm-mpath device with queue_if_no_path
     bounces between all-paths-down and paths-up on I/O load.

When target's ->map_rq returns DM_MAPIO_REQUEUE, map_request() requeues
the request and returns to dm_request_fn().  Then, dm_request_fn()
doesn't exit the I/O dispatching loop and continues processing
the requeued request again.
This map and requeue loop can be done with interrupt disabled,
so 1 CPU system can be stalled if this situation happens.

For example, commands below can stall my 1 CPU box within 1 minute or so:
  # dmsetup table mp
  mp: 0 2097152 multipath 1 queue_if_no_path 0 1 1 service-time 0 1 2 8:144 1 1
  # while true; do dd if=/dev/mapper/mp of=/dev/null bs=1M count=100; done &
  # while true; do \
  > dmsetup message mp 0 "fail_path 8:144" \
  > dmsetup suspend --noflush mp \
  > dmsetup resume mp \
  > dmsetup message mp 0 "reinstate_path 8:144" \
  > done

To fix the problem above, this patch changes dm_request_fn() to exit
the I/O dispatching loop once if a request is requeued in map_request().

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: stable@kernel.org
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3167480b532c..aa4e2aa86d49 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1595,10 +1595,15 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
 	return BLKPREP_OK;
 }
 
-static void map_request(struct dm_target *ti, struct request *clone,
-			struct mapped_device *md)
+/*
+ * Returns:
+ * 0  : the request has been processed (not requeued)
+ * !0 : the request has been requeued
+ */
+static int map_request(struct dm_target *ti, struct request *clone,
+		       struct mapped_device *md)
 {
-	int r;
+	int r, requeued = 0;
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
 	/*
@@ -1625,6 +1630,7 @@ static void map_request(struct dm_target *ti, struct request *clone,
 	case DM_MAPIO_REQUEUE:
 		/* The target wants to requeue the I/O */
 		dm_requeue_unmapped_request(clone);
+		requeued = 1;
 		break;
 	default:
 		if (r > 0) {
@@ -1636,6 +1642,8 @@ static void map_request(struct dm_target *ti, struct request *clone,
 		dm_kill_unmapped_request(clone, r);
 		break;
 	}
+
+	return requeued;
 }
 
 /*
@@ -1677,12 +1685,17 @@ static void dm_request_fn(struct request_queue *q)
 		atomic_inc(&md->pending[rq_data_dir(clone)]);
 
 		spin_unlock(q->queue_lock);
-		map_request(ti, clone, md);
+		if (map_request(ti, clone, md))
+			goto requeued;
+
 		spin_lock_irq(q->queue_lock);
 	}
 
 	goto out;
 
+requeued:
+	spin_lock_irq(q->queue_lock);
+
 plug_and_out:
 	if (!elv_queue_empty(q))
 		/* Some requests still remain, retry later */

From 9307f6b19ac4f5887552b5b2992f391b866f7633 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Tue, 16 Feb 2010 18:43:04 +0000
Subject: [PATCH 479/640] dm: sysfs revert add empty release function to avoid
 debug warning

Revert commit d2bb7df8cac647b92f51fb84ae735771e7adbfa7 at Greg's request.

    Author: Milan Broz <mbroz@redhat.com>
    Date:   Thu Dec 10 23:51:53 2009 +0000

    dm: sysfs add empty release function to avoid debug warning

    This patch just removes an unnecessary warning:
     kobject: 'dm': does not have a release() function,
     it is broken and must be fixed.

    The kobject is embedded in mapped device struct, so
    code does not need to release memory explicitly here.

Cc: Greg KH <gregkh@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-sysfs.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index f53392df7b97..f91b40942e07 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -79,13 +79,6 @@ static struct sysfs_ops dm_sysfs_ops = {
 	.show	= dm_attr_show,
 };
 
-/*
- * The sysfs structure is embedded in md struct, nothing to do here
- */
-static void dm_sysfs_release(struct kobject *kobj)
-{
-}
-
 /*
  * dm kobject is embedded in mapped_device structure
  * no need to define release function here
@@ -93,7 +86,6 @@ static void dm_sysfs_release(struct kobject *kobj)
 static struct kobj_type dm_ktype = {
 	.sysfs_ops	= &dm_sysfs_ops,
 	.default_attrs	= dm_attrs,
-	.release	= dm_sysfs_release
 };
 
 /*

From 1cab819b5e244e1b853c7b440981e6a960da3bfb Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 11 Feb 2010 13:48:29 +0000
Subject: [PATCH 480/640] ethtool: allow non-admin user to read GRO settings.

Looks like an oversight in GRO design.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d8aee584e8d1..236a9988ea91 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -927,6 +927,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPERMADDR:
 	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
+	case ETHTOOL_GGRO:
 	case ETHTOOL_GFLAGS:
 	case ETHTOOL_GPFLAGS:
 	case ETHTOOL_GRXFH:

From 10e7454ed7a2da39f1f6255f63d7df27ab4bb67f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Feb 2010 19:24:30 +0000
Subject: [PATCH 481/640] ipcomp: Avoid duplicate calls to ipcomp_destroy

When ipcomp_tunnel_attach fails we will call ipcomp_destroy twice.
This may lead to double-frees on certain structures.

As there is no reason to explicitly call ipcomp_destroy, this patch
removes it from ipcomp*.c and lets the standard xfrm_state destruction
take place.

This is based on the discovery and patch by Alexey Dobriyan.

Tested-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipcomp.c  | 6 +-----
 net/ipv6/ipcomp6.c | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 38fbf04150ae..544ce0876f12 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -124,16 +124,12 @@ static int ipcomp4_init_state(struct xfrm_state *x)
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp_tunnel_attach(x);
 		if (err)
-			goto error_tunnel;
+			goto out;
 	}
 
 	err = 0;
 out:
 	return err;
-
-error_tunnel:
-	ipcomp_destroy(x);
-	goto out;
 }
 
 static const struct xfrm_type ipcomp_type = {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 2f2a5ca2c878..002e6eef9120 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -154,16 +154,12 @@ static int ipcomp6_init_state(struct xfrm_state *x)
 	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
-			goto error_tunnel;
+			goto out;
 	}
 
 	err = 0;
 out:
 	return err;
-error_tunnel:
-	ipcomp_destroy(x);
-
-	goto out;
 }
 
 static const struct xfrm_type ipcomp6_type =

From 553f9118abc4fc53674fff87f6fe5fa3f56a41ed Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Feb 2010 20:00:51 +0000
Subject: [PATCH 482/640] xfrm: Fix xfrm_state_clone leak

xfrm_state_clone calls kfree instead of xfrm_state_put to free
a failed state.  Depending on the state of the failed state, it
can cause leaks to things like module references.

All states should be freed by xfrm_state_put past the point of
xfrm_init_state.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b36cc344474b..f445ea1c5f52 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1102,7 +1102,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 	int err = -ENOMEM;
 	struct xfrm_state *x = xfrm_state_alloc(net);
 	if (!x)
-		goto error;
+		goto out;
 
 	memcpy(&x->id, &orig->id, sizeof(x->id));
 	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
@@ -1160,16 +1160,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 	return x;
 
  error:
+	xfrm_state_put(x);
+out:
 	if (errp)
 		*errp = err;
-	if (x) {
-		kfree(x->aalg);
-		kfree(x->ealg);
-		kfree(x->calg);
-		kfree(x->encap);
-		kfree(x->coaddr);
-	}
-	kfree(x);
 	return NULL;
 }
 

From 07793d33b4fba00f5bd1dac78fa038bb0e23fa5c Mon Sep 17 00:00:00 2001
From: Ajit Khaparde <ajitkhaparde@gmail.com>
Date: Tue, 16 Feb 2010 00:18:46 +0000
Subject: [PATCH 483/640] be2net: set proper value to version field in req hdr

Before sending a command to the ASIC, set version properly.
This is necessary for the ARM firmware to send correct data to the driver.
This also fixes a bug in certain skews of the ASIC where the statistics
are misreported.

Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/benet/be_cmds.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index fee6eee7ae5b..006cb2efcd22 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -296,6 +296,7 @@ static void be_cmd_hdr_prepare(struct be_cmd_req_hdr *req_hdr,
 	req_hdr->opcode = opcode;
 	req_hdr->subsystem = subsystem;
 	req_hdr->request_length = cpu_to_le32(cmd_len - sizeof(*req_hdr));
+	req_hdr->version = 0;
 }
 
 static void be_cmd_page_addrs_prepare(struct phys_addr *pages, u32 max_pages,

From d4a4683ca054ed9917dfc9e3ff0f7ecf74ad90d6 Mon Sep 17 00:00:00 2001
From: Greg KH <greg@kroah.com>
Date: Mon, 15 Feb 2010 09:37:46 -0800
Subject: [PATCH 484/640] USB: usbfs: only copy the actual data received

We need to only copy the data received by the device to userspace, not
the whole kernel buffer, which can contain "stale" data.

Thanks to Marcus Meissner for pointing this out and testing the fix.

Reported-by: Marcus Meissner <meissner@suse.de>
Tested-by: Marcus Meissner <meissner@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/devio.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 6e8bcdfd23b4..ca948bbc388f 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1312,9 +1312,9 @@ static int processcompl(struct async *as, void __user * __user *arg)
 	void __user *addr = as->userurb;
 	unsigned int i;
 
-	if (as->userbuffer)
+	if (as->userbuffer && urb->actual_length)
 		if (copy_to_user(as->userbuffer, urb->transfer_buffer,
-				 urb->transfer_buffer_length))
+				 urb->actual_length))
 			goto err_out;
 	if (put_user(as->status, &userurb->status))
 		goto err_out;
@@ -1475,9 +1475,9 @@ static int processcompl_compat(struct async *as, void __user * __user *arg)
 	void __user *addr = as->userurb;
 	unsigned int i;
 
-	if (as->userbuffer)
+	if (as->userbuffer && urb->actual_length)
 		if (copy_to_user(as->userbuffer, urb->transfer_buffer,
-				 urb->transfer_buffer_length))
+				 urb->actual_length))
 			return -EFAULT;
 	if (put_user(as->status, &userurb->status))
 		return -EFAULT;

From ddeee0b2eec2a51b0712b04de4b39e7bec892a53 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 16 Feb 2010 12:35:07 -0800
Subject: [PATCH 485/640] USB: usbfs: properly clean up the as structure on
 error paths

I notice that the processcompl_compat() function seems to be leaking the
'struct async *as' in the error paths.

I think that the calling convention is fundamentally buggered. The
caller is the one that did the "reap_as()" to get the as thing, the
caller should be the one to free it too.

Freeing it in the caller also means that it very clearly always gets
freed, and avoids the need for any "free in the error case too".

From: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Marcus Meissner <meissner@suse.de>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/devio.c | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index ca948bbc388f..a678186f218f 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1334,14 +1334,11 @@ static int processcompl(struct async *as, void __user * __user *arg)
 		}
 	}
 
-	free_async(as);
-
 	if (put_user(addr, (void __user * __user *)arg))
 		return -EFAULT;
 	return 0;
 
 err_out:
-	free_async(as);
 	return -EFAULT;
 }
 
@@ -1371,8 +1368,11 @@ static struct async *reap_as(struct dev_state *ps)
 static int proc_reapurb(struct dev_state *ps, void __user *arg)
 {
 	struct async *as = reap_as(ps);
-	if (as)
-		return processcompl(as, (void __user * __user *)arg);
+	if (as) {
+		int retval = processcompl(as, (void __user * __user *)arg);
+		free_async(as);
+		return retval;
+	}
 	if (signal_pending(current))
 		return -EINTR;
 	return -EIO;
@@ -1380,11 +1380,16 @@ static int proc_reapurb(struct dev_state *ps, void __user *arg)
 
 static int proc_reapurbnonblock(struct dev_state *ps, void __user *arg)
 {
+	int retval;
 	struct async *as;
 
-	if (!(as = async_getcompleted(ps)))
-		return -EAGAIN;
-	return processcompl(as, (void __user * __user *)arg);
+	as = async_getcompleted(ps);
+	retval = -EAGAIN;
+	if (as) {
+		retval = processcompl(as, (void __user * __user *)arg);
+		free_async(as);
+	}
+	return retval;
 }
 
 #ifdef CONFIG_COMPAT
@@ -1497,7 +1502,6 @@ static int processcompl_compat(struct async *as, void __user * __user *arg)
 		}
 	}
 
-	free_async(as);
 	if (put_user(ptr_to_compat(addr), (u32 __user *)arg))
 		return -EFAULT;
 	return 0;
@@ -1506,8 +1510,11 @@ static int processcompl_compat(struct async *as, void __user * __user *arg)
 static int proc_reapurb_compat(struct dev_state *ps, void __user *arg)
 {
 	struct async *as = reap_as(ps);
-	if (as)
-		return processcompl_compat(as, (void __user * __user *)arg);
+	if (as) {
+		int retval = processcompl_compat(as, (void __user * __user *)arg);
+		free_async(as);
+		return retval;
+	}
 	if (signal_pending(current))
 		return -EINTR;
 	return -EIO;
@@ -1515,11 +1522,16 @@ static int proc_reapurb_compat(struct dev_state *ps, void __user *arg)
 
 static int proc_reapurbnonblock_compat(struct dev_state *ps, void __user *arg)
 {
+	int retval;
 	struct async *as;
 
-	if (!(as = async_getcompleted(ps)))
-		return -EAGAIN;
-	return processcompl_compat(as, (void __user * __user *)arg);
+	retval = -EAGAIN;
+	as = async_getcompleted(ps);
+	if (as) {
+		retval = processcompl_compat(as, (void __user * __user *)arg);
+		free_async(as);
+	}
+	return retval;
 }
 
 
From a7787e508acb4378d62f4584bae3dd1cd0ba3eac Mon Sep 17 00:00:00 2001
From: Radek Liboska <liboska@uochb.cas.cz>
Date: Wed, 27 Jan 2010 15:38:34 +0100
Subject: [PATCH 486/640] USB: ftdi_sio: new device id for papouch AD4USB

added new device pid (PAPOUCH_AD4USB_PID) to ftdi_sio.h and ftdi_sio.c

AD4USB measuring converter is a 4-input A/D converter which enables the
user to measure to four current inputs ranging from 0(4) to 20 mA or
voltage between 0 and 10 V. The measured values are then transferred to
a superior system in digital form. The AD4USB communicates via USB.
Powered is also via USB.  datasheet in english is here:
http://www.papouch.com/shop/scripts/pdf/ad4usb_en.pdf


Signed-off-by: Radek Liboska <liboska@uochb.cas.cz>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 216f187582ab..f6d85660a446 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -697,6 +697,7 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) },
 	{ USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) },
 	{ USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO4x4_PID) },
+	{ USB_DEVICE(PAPOUCH_VID, PAPOUCH_AD4USB_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DGQG_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DUSB_PID) },
 	{ USB_DEVICE(ALTI2_VID, ALTI2_N3_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index da92b4952ffb..2478130672ef 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -968,6 +968,7 @@
 #define PAPOUCH_VID			0x5050	/* Vendor ID */
 #define PAPOUCH_TMU_PID			0x0400	/* TMU USB Thermometer */
 #define PAPOUCH_QUIDO4x4_PID		0x0900	/* Quido 4/4 Module */
+#define PAPOUCH_AD4USB_PID		0x8003	/* AD4USB Measurement Module */
 
 /*
  * Marvell SheevaPlug

From 39232b3d8046eace9985fd898b763c585f989099 Mon Sep 17 00:00:00 2001
From: Phil Dibowitz <phil@ipom.com>
Date: Sat, 16 Jan 2010 19:52:17 +0100
Subject: [PATCH 487/640] USB: storage: Remove unneeded SC/PR from
 unusual_devs.h

This patch removes the subclass and protocol entries from a Microtech
entry in unusual_devs.h. This was reported by <ryck@pacbell.net>.

Greg, please apply.

Signed-off-by: Phil Dibowitz <phil@ipom.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/storage/unusual_devs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index c932f9053188..49575fba3756 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -941,7 +941,7 @@ UNUSUAL_DEV(  0x07ab, 0xfccd, 0x0000, 0x9999,
 UNUSUAL_DEV(  0x07af, 0x0004, 0x0100, 0x0133,
 		"Microtech",
 		"USB-SCSI-DB25",
-		US_SC_SCSI, US_PR_BULK, usb_stor_euscsi_init,
+		US_SC_DEVICE, US_PR_DEVICE, usb_stor_euscsi_init,
 		US_FL_SCM_MULT_TARG ), 
 
 UNUSUAL_DEV(  0x07af, 0x0005, 0x0100, 0x0100,

From 65e1ec6751b3eefee6d94161185e78736366126f Mon Sep 17 00:00:00 2001
From: Andreas Mohr <andi@lisas.de>
Date: Sun, 17 Jan 2010 11:45:38 +0100
Subject: [PATCH 488/640] USB: ftdi_sio: add device IDs (several ELV, one
 Mindstorms NXT)

- add FTDI device IDs for several ELV devices and NXTCam of Lego Mindstorms NXT
- add hopefully helpful new_id comment
- remove less helpful "Due to many user requests for multiple ELV devices we enable
  them by default." comment (we simply add _all_ known devices - an
  enduser shouldn't have to fiddle with obscure module parameters...).
- add myself to DRIVER_AUTHOR

The missing NXTCam ID has been found at
http://www.unixboard.de/vb3/showthread.php?t=44155
, ELV devices taken from ELV Windows .inf file.

Signed-off-by: Andreas Mohr <andi@lisas.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/ftdi_sio.c     | 24 +++++++++++++++++++++---
 drivers/usb/serial/ftdi_sio_ids.h | 17 +++++++++++++++++
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index f6d85660a446..7638828e7317 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -50,7 +50,7 @@
  * Version Information
  */
 #define DRIVER_VERSION "v1.5.0"
-#define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Bill Ryder <bryder@sgi.com>, Kuba Ober <kuba@mareimbrium.org>"
+#define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Bill Ryder <bryder@sgi.com>, Kuba Ober <kuba@mareimbrium.org>, Andreas Mohr"
 #define DRIVER_DESC "USB FTDI Serial Converters Driver"
 
 static int debug;
@@ -145,10 +145,15 @@ static struct ftdi_sio_quirk ftdi_HE_TIRA1_quirk = {
 
 
+/*
+ * Device ID not listed? Test via module params product/vendor or
+ * /sys/bus/usb/ftdi_sio/new_id, then send patch/report!
+ */
 static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_NXTCAM_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_1_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_2_PID) },
@@ -552,9 +557,16 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_IBS_PEDO_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_IBS_PROD_PID) },
 	/*
-	 * Due to many user requests for multiple ELV devices we enable
-	 * them by default.
+	 * ELV devices:
 	 */
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_USR_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_MSM1_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_KL100_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_WS550_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_EC3000_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_WS888_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_TWS550_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_FEM_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_CLI7000_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_PPS7330_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_TFM100_PID) },
@@ -571,11 +583,17 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_PCK100_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_RFP500_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_FS20SIG_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_UTP8_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_WS300PC_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_WS444PC_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1300PC_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_EM1010PC_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_WS500_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_HS485_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_UMS100_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) },
 	{ USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) },
 	{ USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) },
 	{ USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 2478130672ef..c8951aeed983 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -38,6 +38,8 @@
 /* www.candapter.com Ewert Energy Systems CANdapter device */
 #define FTDI_CANDAPTER_PID 0x9F80 /* Product Id */
 
+#define FTDI_NXTCAM_PID		0xABB8 /* NXTCam for Mindstorms NXT */
+
 /* OOCDlink by Joern Kaipf <joernk@web.de>
  * (http://www.joernonline.de/dw/doku.php?id=start&idx=projects:oocdlink) */
 #define FTDI_OOCDLINK_PID	0xbaf8	/* Amontec JTAGkey */
@@ -161,22 +163,37 @@
 /*
  * ELV USB devices submitted by Christian Abt of ELV (www.elv.de).
  * All of these devices use FTDI's vendor ID (0x0403).
+ * Further IDs taken from ELV Windows .inf file.
  *
  * The previously included PID for the UO 100 module was incorrect.
  * In fact, that PID was for ELV's UR 100 USB-RS232 converter (0xFB58).
  *
  * Armin Laeuger originally sent the PID for the UM 100 module.
  */
+#define FTDI_ELV_USR_PID	0xE000	/* ELV Universal-Sound-Recorder */
+#define FTDI_ELV_MSM1_PID	0xE001	/* ELV Mini-Sound-Modul */
+#define FTDI_ELV_KL100_PID	0xE002	/* ELV Kfz-Leistungsmesser KL 100 */
+#define FTDI_ELV_WS550_PID	0xE004	/* WS 550 */
+#define FTDI_ELV_EC3000_PID	0xE006	/* ENERGY CONTROL 3000 USB */
+#define FTDI_ELV_WS888_PID	0xE008	/* WS 888 */
+#define FTDI_ELV_TWS550_PID	0xE009	/* Technoline WS 550 */
+#define FTDI_ELV_FEM_PID	0xE00A	/* Funk Energie Monitor */
 #define FTDI_ELV_FHZ1300PC_PID	0xE0E8	/* FHZ 1300 PC */
 #define FTDI_ELV_WS500_PID	0xE0E9	/* PC-Wetterstation (WS 500) */
 #define FTDI_ELV_HS485_PID	0xE0EA	/* USB to RS-485 adapter */
+#define FTDI_ELV_UMS100_PID	0xE0EB	/* ELV USB Master-Slave Schaltsteckdose UMS 100 */
+#define FTDI_ELV_TFD128_PID	0xE0EC	/* ELV Temperatur-Feuchte-Datenlogger TFD 128 */
+#define FTDI_ELV_FM3RX_PID	0xE0ED	/* ELV Messwertuebertragung FM3 RX */
+#define FTDI_ELV_WS777_PID	0xE0EE	/* Conrad WS 777 */
 #define FTDI_ELV_EM1010PC_PID	0xE0EF	/* Engery monitor EM 1010 PC */
 #define FTDI_ELV_CSI8_PID	0xE0F0	/* Computer-Schalt-Interface (CSI 8) */
 #define FTDI_ELV_EM1000DL_PID	0xE0F1	/* PC-Datenlogger fuer Energiemonitor (EM 1000 DL) */
 #define FTDI_ELV_PCK100_PID	0xE0F2	/* PC-Kabeltester (PCK 100) */
 #define FTDI_ELV_RFP500_PID	0xE0F3	/* HF-Leistungsmesser (RFP 500) */
 #define FTDI_ELV_FS20SIG_PID	0xE0F4	/* Signalgeber (FS 20 SIG) */
+#define FTDI_ELV_UTP8_PID	0xE0F5	/* ELV UTP 8 */
 #define FTDI_ELV_WS300PC_PID	0xE0F6	/* PC-Wetterstation (WS 300 PC) */
+#define FTDI_ELV_WS444PC_PID	0xE0F7	/* Conrad WS 444 PC */
 #define FTDI_PHI_FISCO_PID      0xE40B  /* PHI Fisco USB to Serial cable */
 #define FTDI_ELV_UAD8_PID	0xF068	/* USB-AD-Wandler (UAD 8) */
 #define FTDI_ELV_UDA7_PID	0xF069	/* USB-DA-Wandler (UDA 7) */

From ae3a07924f0a31b96d52bf16bdf1713445a5a414 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 18 Jan 2010 12:03:18 +0000
Subject: [PATCH 489/640] USB: r8a66597-udc: Prototype IS_ERR() and PTR_ERR()

The build of r8a66597-udc was failing on ARM since IS_ERR() and
PTR_ERR() weren't protyped.  Presumably err.h is being pulled in by
another header on other platforms.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/r8a66597-udc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/r8a66597-udc.c b/drivers/usb/gadget/r8a66597-udc.c
index e220fb8091a3..8b45145b9136 100644
--- a/drivers/usb/gadget/r8a66597-udc.c
+++ b/drivers/usb/gadget/r8a66597-udc.c
@@ -26,6 +26,7 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/err.h>
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>

From 6feb63b69f4f6e876ea5a2edc6119b8e7ac90102 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 18 Jan 2010 13:18:34 +0000
Subject: [PATCH 490/640] USB: s3c-hsotg: Export usb_gadget_register_driver()

USB gadget controller drivers normally export their driver registration
function, allowing modular builds of the individual gadget drivers so
do so for s3c-hsotg, fixing builds.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/s3c-hsotg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c
index 4b5dbd0127f5..5fc80a104150 100644
--- a/drivers/usb/gadget/s3c-hsotg.c
+++ b/drivers/usb/gadget/s3c-hsotg.c
@@ -2582,6 +2582,7 @@ err:
 	hsotg->gadget.dev.driver = NULL;
 	return ret;
 }
+EXPORT_SYMBOL(usb_gadget_register_driver);
 
 int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 {

From b9df794258de24d10b0616634d4c30d8b6e9d381 Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Tue, 19 Jan 2010 16:31:31 +0800
Subject: [PATCH 491/640] USB: ehci: phy low power mode bug fixing

1. There are two msleep calls inside two spin lock sections, need to unlock
   and lock again after msleep.
2. Save a extra status reg setting.

Signed-off-by: Alek Du <alek.du@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/ehci-hub.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index c75d9270c752..19372673bf09 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -196,7 +196,9 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 			if (hostpc_reg) {
 				u32	t3;
 
+				spin_unlock_irq(&ehci->lock);
 				msleep(5);/* 5ms for HCD enter low pwr mode */
+				spin_lock_irq(&ehci->lock);
 				t3 = ehci_readl(ehci, hostpc_reg);
 				ehci_writel(ehci, t3 | HOSTPC_PHCD, hostpc_reg);
 				t3 = ehci_readl(ehci, hostpc_reg);
@@ -904,17 +906,18 @@ static int ehci_hub_control (
 			if ((temp & PORT_PE) == 0
 					|| (temp & PORT_RESET) != 0)
 				goto error;
-			ehci_writel(ehci, temp | PORT_SUSPEND, status_reg);
+
 			/* After above check the port must be connected.
 			 * Set appropriate bit thus could put phy into low power
 			 * mode if we have hostpc feature
 			 */
+			temp &= ~PORT_WKCONN_E;
+			temp |= PORT_WKDISC_E | PORT_WKOC_E;
+			ehci_writel(ehci, temp | PORT_SUSPEND, status_reg);
 			if (hostpc_reg) {
-				temp &= ~PORT_WKCONN_E;
-				temp |= (PORT_WKDISC_E | PORT_WKOC_E);
-				ehci_writel(ehci, temp | PORT_SUSPEND,
-							status_reg);
+				spin_unlock_irqrestore(&ehci->lock, flags);
 				msleep(5);/* 5ms for HCD enter low pwr mode */
+				spin_lock_irqsave(&ehci->lock, flags);
 				temp1 = ehci_readl(ehci, hostpc_reg);
 				ehci_writel(ehci, temp1 | HOSTPC_PHCD,
 					hostpc_reg);

From bbcb8bbad52b8795912e8f02c2b319092b96078e Mon Sep 17 00:00:00 2001
From: Tanaka Akira <akr@fsij.org>
Date: Thu, 21 Jan 2010 02:31:09 +0900
Subject: [PATCH 492/640] USB: SIS USB2VGA DRIVER: support KAIREN's USB VGA
 adaptor USB20SVGA-MB-PLUS

This patch adds the USB product ID of KAIREN's USB VGA Adaptor,
USB20SVGA-MB-PLUS, to sisusbvga work with it.

Signed-off-by: Tanaka Akira <akr@fsij.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/misc/sisusbvga/sisusb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index 0025847743f3..8b37a4b9839e 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -3245,6 +3245,7 @@ static struct usb_device_id sisusb_table [] = {
 	{ USB_DEVICE(0x0711, 0x0902) },
 	{ USB_DEVICE(0x0711, 0x0903) },
 	{ USB_DEVICE(0x0711, 0x0918) },
+	{ USB_DEVICE(0x0711, 0x0920) },
 	{ USB_DEVICE(0x182d, 0x021c) },
 	{ USB_DEVICE(0x182d, 0x0269) },
 	{ }

From 1ebca9dad5abe8b2ed4dbd186cd657fb47c1f321 Mon Sep 17 00:00:00 2001
From: Richard Farina <sidhayn@gmail.com>
Date: Wed, 20 Jan 2010 16:42:33 -0500
Subject: [PATCH 493/640] USB: serial: add usbid for dell wwan card to sierra.c

This patch adds support for Dell Computer Corp. Wireless 5720 VZW Mobile
Broadband (EVDO Rev-A) Minicard GPS Port.  I stole the name from lsusb,
but my card does not have a GPS on it (at least not that I can make
function). I'm sure the patch is whitespace damaged but the one line
addition should be fairly straightforward nonetheless.

Tested-by: Rick Farina <sidhayn@gmail.com>
Signed-off-by: Rick Farina <sidhayn@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/sierra.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index ac1b6449fb6a..3eb6143bb646 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -298,6 +298,7 @@ static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x1199, 0x68A3), 	/* Sierra Wireless Direct IP modems */
 	  .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
 	},
+       { USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */
 
 	{ }
 };

From 1a02d59aba9b61b820517fb135086471c065b573 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 27 Jan 2010 17:09:34 +0300
Subject: [PATCH 494/640] kfifo: Make kfifo_initialized work after kfifo_free

After kfifo rework it's no longer possible to reliably know if kfifo is
usable, since after kfifo_free(), kfifo_initialized() would still return
true. The correct behaviour is needed for at least FHCI USB driver.

This patch fixes the issue by resetting the kfifo to zero values (the
same approach is used in kfifo_alloc() if allocation failed).

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 kernel/kfifo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 498cabba225e..559fb5582b60 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc);
 void kfifo_free(struct kfifo *fifo)
 {
 	kfree(fifo->buffer);
+	_kfifo_init(fifo, NULL, 0);
 }
 EXPORT_SYMBOL(kfifo_free);
 

From 4c743d0ae60462e91465483dd87f4458d71af550 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 27 Jan 2010 17:09:36 +0300
Subject: [PATCH 495/640] USB: FHCI: Fix build after kfifo rework

After kfifo rework FHCI fails to build:

  CC      drivers/usb/host/fhci-tds.o
drivers/usb/host/fhci-tds.c: In function 'fhci_ep0_free':
drivers/usb/host/fhci-tds.c:108: error: used struct type value where scalar is required
drivers/usb/host/fhci-tds.c:118: error: used struct type value where scalar is required
drivers/usb/host/fhci-tds.c:128: error: used struct type value where scalar is required

This is because kfifos are no longer pointers in the ep struct.
So, instead of checking the pointers, we should now check if kfifo
is initialized.

Reported-by: Josh Boyer <jwboyer@gmail.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/fhci-tds.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/fhci-tds.c b/drivers/usb/host/fhci-tds.c
index d224ab467a40..e1232890c78b 100644
--- a/drivers/usb/host/fhci-tds.c
+++ b/drivers/usb/host/fhci-tds.c
@@ -105,7 +105,7 @@ void fhci_ep0_free(struct fhci_usb *usb)
 		if (ep->td_base)
 			cpm_muram_free(cpm_muram_offset(ep->td_base));
 
-		if (ep->conf_frame_Q) {
+		if (kfifo_initialized(&ep->conf_frame_Q)) {
 			size = cq_howmany(&ep->conf_frame_Q);
 			for (; size; size--) {
 				struct packet *pkt = cq_get(&ep->conf_frame_Q);
@@ -115,7 +115,7 @@ void fhci_ep0_free(struct fhci_usb *usb)
 			cq_delete(&ep->conf_frame_Q);
 		}
 
-		if (ep->empty_frame_Q) {
+		if (kfifo_initialized(&ep->empty_frame_Q)) {
 			size = cq_howmany(&ep->empty_frame_Q);
 			for (; size; size--) {
 				struct packet *pkt = cq_get(&ep->empty_frame_Q);
@@ -125,7 +125,7 @@ void fhci_ep0_free(struct fhci_usb *usb)
 			cq_delete(&ep->empty_frame_Q);
 		}
 
-		if (ep->dummy_packets_Q) {
+		if (kfifo_initialized(&ep->dummy_packets_Q)) {
 			size = cq_howmany(&ep->dummy_packets_Q);
 			for (; size; size--) {
 				u8 *buff = cq_get(&ep->dummy_packets_Q);

From 5a5e0f4c7038168e38d1db6af09d1ac715ee9888 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 27 Jan 2010 17:09:38 +0300
Subject: [PATCH 496/640] kfifo: Don't use integer as NULL pointer

This patch fixes following sparse warnings:

include/linux/kfifo.h:127:25: warning: Using plain integer as NULL pointer
kernel/kfifo.c:83:21: warning: Using plain integer as NULL pointer

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kfifo.h | 2 +-
 kernel/kfifo.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6f6c5f300af6..bc0fc795bd35 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,7 +124,7 @@ extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
  */
 static inline bool kfifo_initialized(struct kfifo *fifo)
 {
-	return fifo->buffer != 0;
+	return fifo->buffer != NULL;
 }
 
 /**
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 559fb5582b60..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0);
+		_kfifo_init(fifo, NULL, 0);
 		return -ENOMEM;
 	}
 

From dbe4a99d846e565f0f99914cc82658cd9ce60bdc Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <m.nazarewicz@samsung.com>
Date: Fri, 22 Jan 2010 15:18:21 +0100
Subject: [PATCH 497/640] USB: g_multi: fix CONFIG_USB_G_MULTI_RNDIS usage

g_multi used CONFIG_USB_ETH_RNDIS to check if RNDIS option was requested
where it should check for CONFIG_USB_G_MULTI_RNDIS.  As a result, RNDIS
was never present in g_multi regardless of configuration.

This fixes changes made in commit 396cda90d228d0851f3d64c7c85a1ecf6b8ae1e8.

Signed-off-by: Michal Nazarewicz <m.nazarewicz@samsung.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/multi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/multi.c b/drivers/usb/gadget/multi.c
index 429560100b10..76496f5d272c 100644
--- a/drivers/usb/gadget/multi.c
+++ b/drivers/usb/gadget/multi.c
@@ -29,7 +29,7 @@
 #if defined USB_ETH_RNDIS
 #  undef USB_ETH_RNDIS
 #endif
-#ifdef CONFIG_USB_ETH_RNDIS
+#ifdef CONFIG_USB_G_MULTI_RNDIS
 #  define USB_ETH_RNDIS y
 #endif
 

From dd091c7b3280d4811b855d034fa91519fd3485ef Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@epfl.ch>
Date: Wed, 20 Jan 2010 20:06:31 +0100
Subject: [PATCH 498/640] USB: otg Kconfig: let USB_OTG_UTILS select USB_ULPI
 option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With CONFIG_USB_ULPI=y, CONFIG_USB<=m, CONFIG_PCI=n and
CONFIG_USB_OTG_UTILS=n, which is the default used for mx31moboard,
the build for all mx3 platforms fails because drivers/usb/otg/ulpi.c
where otg_ulpi_create is defined is not compiled.

Build error:
arch/arm/mach-mx3/built-in.o: In function `mxc_board_init':
kzmarm11.c:(.init.text+0x73c): undefined reference to `otg_ulpi_create'
kzmarm11.c:(.init.text+0x1020): undefined reference to `otg_ulpi_create'

This isn't a strong dependency as drivers/usb/otg/ulpi.c doesn't
use functions defined in drivers/usb/otg/otg.o and is only needed
to get ulpi.o linked into the kernel image.

Signed-off-by: Valentin Longchamp <valentin.longchamp@epfl.ch>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index de56b3d743d7..3d2d3e549bd1 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -44,6 +44,7 @@ config ISP1301_OMAP
 config USB_ULPI
 	bool "Generic ULPI Transceiver Driver"
 	depends on ARM
+	select USB_OTG_UTILS
 	help
 	  Enable this to support ULPI connected USB OTG transceivers which
 	  are likely found on embedded boards.

From 31e5d4abceaa3d11ff583ddf76ec292e90eacb7d Mon Sep 17 00:00:00 2001
From: Brian Niebuhr <bniebuhr3@gmail.com>
Date: Mon, 25 Jan 2010 14:45:40 -0600
Subject: [PATCH 499/640] USB: gadget: fix EEM gadget CRC usage

eem_wrap() is sending a sentinel CRC, but it didn't indicate that to
the host, it should zero bit 14 (bmCRC) in the EEM packet header,
instead of setting it.

Also remove a redundant crc calculation in eem_unwrap().


Signed-off-by: Steve Longerbeam <stevel@netspectrum.com>
Acked-by: Brian Niebuhr <bniebuhr@efjohnson.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/f_eem.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c
index 0a577d5694fd..d4f0db58a8ad 100644
--- a/drivers/usb/gadget/f_eem.c
+++ b/drivers/usb/gadget/f_eem.c
@@ -358,7 +358,7 @@ done:
 	 * b15:		bmType (0 == data)
 	 */
 	len = skb->len;
-	put_unaligned_le16((len & 0x3FFF) | BIT(14), skb_push(skb, 2));
+	put_unaligned_le16(len & 0x3FFF, skb_push(skb, 2));
 
 	/* add a zero-length EEM packet, if needed */
 	if (padlen)
@@ -464,7 +464,6 @@ static int eem_unwrap(struct gether *port,
 			}
 
 			/* validate CRC */
-			crc = get_unaligned_le32(skb->data + len - ETH_FCS_LEN);
 			if (header & BIT(14)) {
 				crc = get_unaligned_le32(skb->data + len
 							- ETH_FCS_LEN);

From 7c0ff870d1ed287504a61ed865f3d728c757436b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 3 Feb 2010 23:13:24 -0800
Subject: [PATCH 500/640] sysfs: sysfs_sd_setattr set iattrs unconditionally

There is currently a bug in sysfs_sd_setattr inherited from
sysfs_setattr in 2.6.32 where the first time we set the attributes
on a sysfs file we allocate backing store but do not set the
backing store attributes.  Resulting in overly restrictive
permissions on sysfs files.

The fix is to simply modify the code so that it always executes
when we update the sysfs attributes, as we did in 2.6.31 and earlier.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Tested-by: Jean Delvare <khali@linux-fr.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/inode.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 220b758523ae..6a06a1d1ea7b 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -81,24 +81,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr)
 		if (!sd_attrs)
 			return -ENOMEM;
 		sd->s_iattr = sd_attrs;
-	} else {
-		/* attributes were changed at least once in past */
-		iattrs = &sd_attrs->ia_iattr;
+	}
+	/* attributes were changed at least once in past */
+	iattrs = &sd_attrs->ia_iattr;
 
-		if (ia_valid & ATTR_UID)
-			iattrs->ia_uid = iattr->ia_uid;
-		if (ia_valid & ATTR_GID)
-			iattrs->ia_gid = iattr->ia_gid;
-		if (ia_valid & ATTR_ATIME)
-			iattrs->ia_atime = iattr->ia_atime;
-		if (ia_valid & ATTR_MTIME)
-			iattrs->ia_mtime = iattr->ia_mtime;
-		if (ia_valid & ATTR_CTIME)
-			iattrs->ia_ctime = iattr->ia_ctime;
-		if (ia_valid & ATTR_MODE) {
-			umode_t mode = iattr->ia_mode;
-			iattrs->ia_mode = sd->s_mode = mode;
-		}
+	if (ia_valid & ATTR_UID)
+		iattrs->ia_uid = iattr->ia_uid;
+	if (ia_valid & ATTR_GID)
+		iattrs->ia_gid = iattr->ia_gid;
+	if (ia_valid & ATTR_ATIME)
+		iattrs->ia_atime = iattr->ia_atime;
+	if (ia_valid & ATTR_MTIME)
+		iattrs->ia_mtime = iattr->ia_mtime;
+	if (ia_valid & ATTR_CTIME)
+		iattrs->ia_ctime = iattr->ia_ctime;
+	if (ia_valid & ATTR_MODE) {
+		umode_t mode = iattr->ia_mode;
+		iattrs->ia_mode = sd->s_mode = mode;
 	}
 	return 0;
 }

From 18d19c96457d172d913510c083bc7411ed40cb10 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Wed, 10 Feb 2010 13:32:49 +0100
Subject: [PATCH 501/640] class: Free the class private data in class_release

Fix a memory leak by freeing the memory allocated in __class_register
for the class private data.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 161746deab4b..6e2c3b064f53 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -59,6 +59,8 @@ static void class_release(struct kobject *kobj)
 	else
 		pr_debug("class '%s' does not have a release() function, "
 			 "be careful\n", class->name);
+
+	kfree(cp);
 }
 
 static struct sysfs_ops class_sysfs_ops = {

From bca476139d2ded86be146dae09b06e22548b67f3 Mon Sep 17 00:00:00 2001
From: Dick Hollenbeck <dick@softplc.com>
Date: Wed, 9 Dec 2009 12:31:34 -0800
Subject: [PATCH 502/640] serial: 8250: add serial transmitter fully empty test

When controlling an industrial radio modem it can be necessary to
manipulate the handshake lines in order to control the radio modem's
transmitter, from userspace.

The transmitter should not be turned off before all characters have been
transmitted.  serial8250_tx_empty() was reporting that all characters were
transmitted before they actually were.

===

Discovered in parallel with more testing and analysis by Kees Schoenmakers
as follows:

I ran into an NetMos 9835 serial pci board which behaves a little
different than the standard.  This type of expansion board is very common.

"Standard" 8250 compatible devices clear the 'UART_LST_TEMT" bit together
with the "UART_LSR_THRE" bit when writing data to the device.

The NetMos device does it slightly different

I believe that the TEMT bit is coupled to the shift register.  The problem
is that after writing data to the device and very quickly after that one
does call serial8250_tx_empty, it returns the wrong information.

My patch makes the test more robust (and solves the problem) and it does
not affect the already correct devices.

Alan:

  We may yet need to quirk this but now we know which chips we have a
  way to do that should we find this breaks some other 8250 clone with
  dodgy THRE.

Signed-off-by: Dick Hollenbeck <dick@softplc.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: Kees Schoenmakers <k.schoenmakers@sigmae.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index c3e37c8e7e26..e9b15c3746fa 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -83,6 +83,9 @@ static unsigned int skip_txen_test; /* force skip of txen test at init time */
 
 #define PASS_LIMIT	256
 
+#define BOTH_EMPTY 	(UART_LSR_TEMT | UART_LSR_THRE)
+
+
 /*
  * We default to IRQ0 for the "no irq" hack.   Some
  * machine types want others as well - they're free
@@ -1792,7 +1795,7 @@ static unsigned int serial8250_tx_empty(struct uart_port *port)
 	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
 	spin_unlock_irqrestore(&up->port.lock, flags);
 
-	return lsr & UART_LSR_TEMT ? TIOCSER_TEMT : 0;
+	return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0;
 }
 
 static unsigned int serial8250_get_mctrl(struct uart_port *port)
@@ -1850,8 +1853,6 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
 	spin_unlock_irqrestore(&up->port.lock, flags);
 }
 
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
 /*
  *	Wait for transmitter & holding register to empty
  */

From fee099b278894a1c7383a08cb3c62a5b62a134e8 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 12 Jan 2010 22:48:00 -0600
Subject: [PATCH 503/640] [SCSI] iscsi_tcp regression: remove bogus warn on in
 write path

An empty r2tqueue is a valid state. It just means that we have
processed all that there was to do. This patch removes the WARN_ON
that was added when the kfifo changes were merged.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libiscsi_tcp.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index db6856c138fc..4ad87fd74ddd 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -992,12 +992,10 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		if (r2t == NULL) {
 			if (kfifo_out(&tcp_task->r2tqueue,
 			    (void *)&tcp_task->r2t, sizeof(void *)) !=
-			    sizeof(void *)) {
-				WARN_ONCE(1, "unexpected fifo state");
+			    sizeof(void *))
 				r2t = NULL;
-			}
-
-			r2t = tcp_task->r2t;
+			else
+				r2t = tcp_task->r2t;
 		}
 		spin_unlock_bh(&session->lock);
 	}

From 10897ae71dd6e205969726e0f817f3327ef32f83 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Thu, 21 Jan 2010 10:15:44 -0800
Subject: [PATCH 504/640] [SCSI] libfc: call ddp setup for only FCP reads to
 avoid accessing junk fsp pointer

Adds check to call fc_fcp_ddp_setup for only FCP read cmds to avoid
accessing junk fsp pointer at least in ESX since non FCP frame had
junk fsp value, though fsp is implicitly initialized to null
by __alloc_skb but with this patch no more relying on fsp
initialized to null value and hitting junk fsp ptr access.

Removes fsp pointer checking in fc_fcp_ddp_setup as this is not
needed any more since its only caller for FCP read will always
have a valid fsp.

Reported by: Frank Zhang <frank_1.zhang@intel.com>
Reported by: Rob Love <robert.w.love@intel.com>
Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_exch.c | 2 +-
 drivers/scsi/libfc/fc_fcp.c  | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 19d711cb938c..7f4364770e4a 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -1890,7 +1890,7 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport,
 	fc_exch_setup_hdr(ep, fp, ep->f_ctl);
 	sp->cnt++;
 
-	if (ep->xid <= lport->lro_xid)
+	if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD)
 		fc_fcp_ddp_setup(fr_fsp(fp), ep->xid);
 
 	if (unlikely(lport->tt.frame_send(lport, fp)))
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 881d5dfe8c74..6fde2fabfd9b 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -298,9 +298,6 @@ void fc_fcp_ddp_setup(struct fc_fcp_pkt *fsp, u16 xid)
 {
 	struct fc_lport *lport;
 
-	if (!fsp)
-		return;
-
 	lport = fsp->lp;
 	if ((fsp->req_flags & FC_SRB_READ) &&
 	    (lport->lro_enabled) && (lport->tt.ddp_setup)) {

From 3b709150b73205710d05128b925090aac048ed23 Mon Sep 17 00:00:00 2001
From: Hugh Daschbach <hdasch@broadcom.com>
Date: Thu, 21 Jan 2010 10:15:49 -0800
Subject: [PATCH 505/640] [SCSI] libfc: Fix e_d_tov ns -> ms scaling factor in
 PLOGI response.

Both PLOGI and RTV response processing conditionally scale e_d_tov,
but use different scaling factors.  The scaling factor is correct in
RTV response processing.  Bring PLOGI e_d_tov scaling in line with RTV
common service parameter inspection.

Signed-off-by: Hugh Daschbach <hdasch@broadcom.com>
Acked-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 02300523b234..97923bb07765 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -623,7 +623,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 		tov = ntohl(plp->fl_csp.sp_e_d_tov);
 		if (ntohs(plp->fl_csp.sp_features) & FC_SP_FT_EDTR)
-			tov /= 1000;
+			tov /= 1000000;
 		if (tov > rdata->e_d_tov)
 			rdata->e_d_tov = tov;
 		csp_seq = ntohs(plp->fl_csp.sp_tot_seq);

From b248df30fca3aeee1d650b570e8cbc4e8cc45710 Mon Sep 17 00:00:00 2001
From: Hugh Daschbach <hdasch@broadcom.com>
Date: Thu, 21 Jan 2010 10:15:55 -0800
Subject: [PATCH 506/640] [SCSI] libfc: Don't assume response request present.

Fix NULL pointer dereference crash occurs in fc_lport_bsg_request()
for bsg requests that do not contain a response request.
Specifically, FC_BSG_HST_ADD_RPORT and FC_BSG_HST_DEL_RPORT bsg
requests are not guaranteed to include a response request.

Signed-off-by: Hugh Daschbach <hdasch@broadcom.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_lport.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 0b165024a219..7ec8ce75007c 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1800,7 +1800,8 @@ int fc_lport_bsg_request(struct fc_bsg_job *job)
 	u32 did;
 
 	job->reply->reply_payload_rcv_len = 0;
-	rsp->resid_len = job->reply_payload.payload_len;
+	if (rsp)
+		rsp->resid_len = job->reply_payload.payload_len;
 
 	mutex_lock(&lport->lp_mutex);
 

From f47dd855d9e64a5d499a93e858a82bc5e7b21345 Mon Sep 17 00:00:00 2001
From: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
Date: Thu, 21 Jan 2010 10:16:00 -0800
Subject: [PATCH 507/640] [SCSI] libfcoe: Send port LKA every FIP_VN_KA_PERIOD
 secs.

libfcoe module doesnt send port keep alive every
FIP_VN_KA_PERIOD due to improper assignment of timeout value.
Update the port_ka_time appropriately by incrementing it by
FIP_VN_KA_PERIOD in fcoe_ctlr_timeout(), so that the link_work
is scheduled to send the port LKA.

Signed-off-by: Bhanu Gollapudi <bprakash@broadcom.com>
Acked-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/libfcoe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index 9823291395ad..511cb6b371ee 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -1187,7 +1187,7 @@ static void fcoe_ctlr_timeout(unsigned long arg)
 			next_timer = fip->ctlr_ka_time;
 
 		if (time_after_eq(jiffies, fip->port_ka_time)) {
-			fip->port_ka_time += jiffies +
+			fip->port_ka_time = jiffies +
 				msecs_to_jiffies(FIP_VN_KA_PERIOD);
 			fip->send_port_ka = 1;
 		}

From 6409ea65b3b81ef693cbbc7c4b2300e50a4219dd Mon Sep 17 00:00:00 2001
From: Rob Love <robert.w.love@intel.com>
Date: Thu, 21 Jan 2010 10:16:05 -0800
Subject: [PATCH 508/640] [SCSI] fcoe: Only rmmod fcoe.ko if there are no
 active connections

Currently we're gracefully tearing down each active connection
when fcoe.ko is removed. We shouldn't allow the user to destroy
connections by removing the module. We should force the user to
destroy each connection and then the module can be removed.

This patch makes it so a refrerence count on the module is taken
each time a fcoe_interface is created. The reference count
is dropped when the fcoe_interface is destroyed. This makes it
so that module_exit() doesn't get called unless all fcoe_interfaces
have been destroyed.

This patch leaves the removal of interfaces in the module_exit
routine so that if the user does a 'rmmod -f' we'll clean everything
up before removing the module.

The module_put line was put before the out_putdev goto line because
we should only be decrementing the reference count if a
fcoe_interface is actually destroyed. If we can't find the netdev
or the fcoe_interface then it's assumed that something else has
destroyed the fcoe_interface and it would have decremented the
reference count at that time.

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 10be9f36a4cc..2f47ae7cce91 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2009,6 +2009,8 @@ static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
 	fcoe_interface_cleanup(fcoe);
 	rtnl_unlock();
 	fcoe_if_destroy(fcoe->ctlr.lp);
+	module_put(THIS_MODULE);
+
 out_putdev:
 	dev_put(netdev);
 out_nodev:
@@ -2059,6 +2061,11 @@ static int fcoe_create(const char *buffer, struct kernel_param *kp)
 	}
 #endif
 
+	if (!try_module_get(THIS_MODULE)) {
+		rc = -EINVAL;
+		goto out_nomod;
+	}
+
 	rtnl_lock();
 	netdev = fcoe_if_to_netdev(buffer);
 	if (!netdev) {
@@ -2099,17 +2106,24 @@ static int fcoe_create(const char *buffer, struct kernel_param *kp)
 	if (!fcoe_link_ok(lport))
 		fcoe_ctlr_link_up(&fcoe->ctlr);
 
-	rc = 0;
-out_free:
 	/*
 	 * Release from init in fcoe_interface_create(), on success lport
 	 * should be holding a reference taken in fcoe_if_create().
 	 */
 	fcoe_interface_put(fcoe);
+	dev_put(netdev);
+	rtnl_unlock();
+	mutex_unlock(&fcoe_config_mutex);
+
+	return 0;
+out_free:
+	fcoe_interface_put(fcoe);
 out_putdev:
 	dev_put(netdev);
 out_nodev:
 	rtnl_unlock();
+	module_put(THIS_MODULE);
+out_nomod:
 	mutex_unlock(&fcoe_config_mutex);
 	return rc;
 }

From b72c7d543589736d43da531566490dd31572f5ca Mon Sep 17 00:00:00 2001
From: Ranjith Lohithakshan <ranjithl@ti.com>
Date: Wed, 17 Feb 2010 17:17:01 +0000
Subject: [PATCH 509/640] omap: Remove DEBUG_FS dependency for mux name
 checking

The check for a valid mux name should be performed regardless of whether
DEBUG_FS is enabled or not. Otherwise without DEBUG_FS, we get:

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 5 [#1]
last sysfs file:
Modules linked in:
CPU: 0    Not tainted  (2.6.33-rc8 #10)
PC is at strcmp+0x18/0x40
LR is at omap_mux_init_signal+0x68/0x14c
...

This fixes the issue currently seen with boards not booting up
if DEBUG_FS is not enabled in defconfig.

Note that the earlier ifndef + ifdef now becomes simpler ifdef else:
If CONFIG_OMAP_MUX is selected, we use pin names. If it's not selected,
we only want the GPIO to mux register mapping.

Signed-off-by: Ranjith Lohithakshan <ranjithl@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/mux.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 5fedc50c58e4..5fef73f4743d 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -961,16 +961,14 @@ static void __init omap_mux_init_list(struct omap_mux *superset)
 	while (superset->reg_offset !=  OMAP_MUX_TERMINATOR) {
 		struct omap_mux *entry;
 
-#ifndef CONFIG_OMAP_MUX
-		/* Skip pins that are not muxed as GPIO by bootloader */
-		if (!OMAP_MODE_GPIO(omap_mux_read(superset->reg_offset))) {
+#ifdef CONFIG_OMAP_MUX
+		if (!superset->muxnames || !superset->muxnames[0]) {
 			superset++;
 			continue;
 		}
-#endif
-
-#if defined(CONFIG_OMAP_MUX) && defined(CONFIG_DEBUG_FS)
-		if (!superset->muxnames || !superset->muxnames[0]) {
+#else
+		/* Skip pins that are not muxed as GPIO by bootloader */
+		if (!OMAP_MODE_GPIO(omap_mux_read(superset->reg_offset))) {
 			superset++;
 			continue;
 		}

From 172d2d0041fdd4f3617dbdff8296bf279db3d5fb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 17 Feb 2010 16:42:08 -0800
Subject: [PATCH 510/640] sparc64: Sync of_create_pci_dev() with
 drivers/pci/probe.c changes.

Mirrors powerpc commits bb209c8287d2d55ec4a67e3933346e0a3ee0da76
("powerpc/pci: Add calls to set_pcie_port_type() and set_pcie_hotplug_bridge()")
and 26b4a0ca46985ae9586c194f7859f3838b1230f8
("powerpc/pci: Add missing hookup to pci_slot")

We also need to initialize ->dma_mask explicitly here too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/pci.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 539e83f8e087..592b03d85167 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -247,6 +247,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 					 struct pci_bus *bus, int devfn)
 {
 	struct dev_archdata *sd;
+	struct pci_slot *slot;
 	struct of_device *op;
 	struct pci_dev *dev;
 	const char *type;
@@ -286,6 +287,11 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	dev->dev.bus = &pci_bus_type;
 	dev->devfn = devfn;
 	dev->multifunction = 0;		/* maybe a lie? */
+	set_pcie_port_type(dev);
+
+	list_for_each_entry(slot, &dev->bus->slots, list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = slot;
 
 	dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff);
 	dev->device = of_getintprop_default(node, "device-id", 0xffff);
@@ -322,6 +328,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 
 	dev->current_state = 4;		/* unknown power state */
 	dev->error_state = pci_channel_io_normal;
+	dev->dma_mask = 0xffffffff;
 
 	if (!strcmp(node->name, "pci")) {
 		/* a PCI-PCI bridge */

From d7ecfb3c2aa155c9f6152ebe91de92067d16ba6e Mon Sep 17 00:00:00 2001
From: Kristoffer Glembo <kristoffer@gaisler.com>
Date: Mon, 15 Feb 2010 16:10:28 +0100
Subject: [PATCH 511/640] sparc: Fix incorrect comparison in
 of_bus_ambapp_match()

Use type instead of name in comparison.

Signed-off-by: Kristoffer Glembo <kristoffer@gaisler.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/of_device_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c
index 4c26eb59e742..53a58b349849 100644
--- a/arch/sparc/kernel/of_device_32.c
+++ b/arch/sparc/kernel/of_device_32.c
@@ -105,7 +105,7 @@ static unsigned long of_bus_sbus_get_flags(const u32 *addr, unsigned long flags)
 
 static int of_bus_ambapp_match(struct device_node *np)
 {
-	return !strcmp(np->name, "ambapp");
+	return !strcmp(np->type, "ambapp");
 }
 
 static void of_bus_ambapp_count_cells(struct device_node *child,

From 01d4503968f471f876fb44335800d2cf8dc5a2ce Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sun, 31 Jan 2010 07:07:14 +1000
Subject: [PATCH 512/640] drm/radeon/kms: use udelay for short delays

For usec delays use udelay instead of scheduling, this should
allow reclocking to happen faster. This also was the cause
of reported 33s delays at bootup on certain systems.

fixes: freedesktop.org bug 25506

Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/atom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index 2a3df5599ab4..7f152f66f196 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -643,7 +643,7 @@ static void atom_op_delay(atom_exec_context *ctx, int *ptr, int arg)
 	uint8_t count = U8((*ptr)++);
 	SDEBUG("   count: %d\n", count);
 	if (arg == ATOM_UNIT_MICROSEC)
-		schedule_timeout_uninterruptible(usecs_to_jiffies(count));
+		udelay(count);
 	else
 		schedule_timeout_uninterruptible(msecs_to_jiffies(count));
 }

From f2d12b8e2c05e86b1a2070efcc07f1b8a79afb4c Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 15 Feb 2010 14:45:22 +0000
Subject: [PATCH 513/640] drm/vmwgfx: Use fb handover mechanism instead of
 stealth mode.

When the vmwgfx module is loaded on top of vesafb, it would operate in
stealth mode in parallel with vesafb, evicting VRAM on dropmaster.

Change that to use the vesafb handover mechanism, like other drmfb drivers.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 49 ++++++++++-------------------
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c  |  3 ++
 2 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a6e8f687fa64..0c9c0811f42d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -348,22 +348,19 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 		 */
 
 		DRM_INFO("It appears like vesafb is loaded. "
-			 "Ignore above error if any. Entering stealth mode.\n");
+			 "Ignore above error if any.\n");
 		ret = pci_request_region(dev->pdev, 2, "vmwgfx stealth probe");
 		if (unlikely(ret != 0)) {
 			DRM_ERROR("Failed reserving the SVGA MMIO resource.\n");
 			goto out_no_device;
 		}
-		vmw_kms_init(dev_priv);
-		vmw_overlay_init(dev_priv);
-	} else {
-		ret = vmw_request_device(dev_priv);
-		if (unlikely(ret != 0))
-			goto out_no_device;
-		vmw_kms_init(dev_priv);
-		vmw_overlay_init(dev_priv);
-		vmw_fb_init(dev_priv);
 	}
+	ret = vmw_request_device(dev_priv);
+	if (unlikely(ret != 0))
+		goto out_no_device;
+	vmw_kms_init(dev_priv);
+	vmw_overlay_init(dev_priv);
+	vmw_fb_init(dev_priv);
 
 	dev_priv->pm_nb.notifier_call = vmwgfx_pm_notifier;
 	register_pm_notifier(&dev_priv->pm_nb);
@@ -406,17 +403,15 @@ static int vmw_driver_unload(struct drm_device *dev)
 
 	unregister_pm_notifier(&dev_priv->pm_nb);
 
-	if (!dev_priv->stealth) {
-		vmw_fb_close(dev_priv);
-		vmw_kms_close(dev_priv);
-		vmw_overlay_close(dev_priv);
-		vmw_release_device(dev_priv);
-		pci_release_regions(dev->pdev);
-	} else {
-		vmw_kms_close(dev_priv);
-		vmw_overlay_close(dev_priv);
+	vmw_fb_close(dev_priv);
+	vmw_kms_close(dev_priv);
+	vmw_overlay_close(dev_priv);
+	vmw_release_device(dev_priv);
+	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
-	}
+	else
+		pci_release_regions(dev->pdev);
+
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
 		drm_irq_uninstall(dev_priv->dev);
 	if (dev->devname == vmw_devname)
@@ -585,11 +580,6 @@ static int vmw_master_set(struct drm_device *dev,
 	int ret = 0;
 
 	DRM_INFO("Master set.\n");
-	if (dev_priv->stealth) {
-		ret = vmw_request_device(dev_priv);
-		if (unlikely(ret != 0))
-			return ret;
-	}
 
 	if (active) {
 		BUG_ON(active != &dev_priv->fbdev_master);
@@ -649,18 +639,11 @@ static void vmw_master_drop(struct drm_device *dev,
 
 	ttm_lock_set_kill(&vmaster->lock, true, SIGTERM);
 
-	if (dev_priv->stealth) {
-		ret = ttm_bo_evict_mm(&dev_priv->bdev, TTM_PL_VRAM);
-		if (unlikely(ret != 0))
-			DRM_ERROR("Unable to clean VRAM on master drop.\n");
-		vmw_release_device(dev_priv);
-	}
 	dev_priv->active_master = &dev_priv->fbdev_master;
 	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
 	ttm_vt_unlock(&dev_priv->fbdev_master.lock);
 
-	if (!dev_priv->stealth)
-		vmw_fb_on(dev_priv);
+	vmw_fb_on(dev_priv);
 }
 
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 4f4f6432be8b..a93367041cdc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -559,6 +559,9 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
 	info->pixmap.scan_align = 1;
 #endif
 
+	info->aperture_base = vmw_priv->vram_start;
+	info->aperture_size = vmw_priv->vram_size;
+
 	/*
 	 * Dirty & Deferred IO
 	 */

From b58db2c6dd18d35f59862d3352c86a0a58838bf3 Mon Sep 17 00:00:00 2001
From: Adam Jackson <ajax@redhat.com>
Date: Mon, 15 Feb 2010 22:15:39 +0000
Subject: [PATCH 514/640] drm/edid: Fix interlaced detailed timings to be frame
 size, not field.

cf. https://bugzilla.redhat.com/show_bug.cgi?id=533561

Signed-off-by: Adam Jackson <ajax@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_edid.c | 47 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index f665b05592f3..ab6c97330412 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -598,6 +598,50 @@ struct drm_display_mode *drm_mode_std(struct drm_device *dev,
 	return mode;
 }
 
+/*
+ * EDID is delightfully ambiguous about how interlaced modes are to be
+ * encoded.  Our internal representation is of frame height, but some
+ * HDTV detailed timings are encoded as field height.
+ *
+ * The format list here is from CEA, in frame size.  Technically we
+ * should be checking refresh rate too.  Whatever.
+ */
+static void
+drm_mode_do_interlace_quirk(struct drm_display_mode *mode,
+			    struct detailed_pixel_timing *pt)
+{
+	int i;
+	static const struct {
+		int w, h;
+	} cea_interlaced[] = {
+		{ 1920, 1080 },
+		{  720,  480 },
+		{ 1440,  480 },
+		{ 2880,  480 },
+		{  720,  576 },
+		{ 1440,  576 },
+		{ 2880,  576 },
+	};
+	static const int n_sizes =
+		sizeof(cea_interlaced)/sizeof(cea_interlaced[0]);
+
+	if (!(pt->misc & DRM_EDID_PT_INTERLACED))
+		return;
+
+	for (i = 0; i < n_sizes; i++) {
+		if ((mode->hdisplay == cea_interlaced[i].w) &&
+		    (mode->vdisplay == cea_interlaced[i].h / 2)) {
+			mode->vdisplay *= 2;
+			mode->vsync_start *= 2;
+			mode->vsync_end *= 2;
+			mode->vtotal *= 2;
+			mode->vtotal |= 1;
+		}
+	}
+
+	mode->flags |= DRM_MODE_FLAG_INTERLACE;
+}
+
 /**
  * drm_mode_detailed - create a new mode from an EDID detailed timing section
  * @dev: DRM device (needed to create new mode)
@@ -680,8 +724,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
 
 	drm_mode_set_name(mode);
 
-	if (pt->misc & DRM_EDID_PT_INTERLACED)
-		mode->flags |= DRM_MODE_FLAG_INTERLACE;
+	drm_mode_do_interlace_quirk(mode, pt);
 
 	if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
 		pt->misc |= DRM_EDID_PT_HSYNC_POSITIVE | DRM_EDID_PT_VSYNC_POSITIVE;

From 91cb91becf372b5308cdd7d2e15b2e3ef66bae7e Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Mon, 15 Feb 2010 21:36:13 +0100
Subject: [PATCH 515/640] drm/radeon/kms: fix indirect buffer management V2

There is 3 different distinct states for an indirect buffer (IB) :
  1- free with no fence
  2- free with a fence
  3- non free (fence doesn't matter)
Previous code mixed case 2 & 3 in a single one leading to possible
catastrophique failure. This patch rework the handling and properly
separate each case. So when you get ib we set the ib as non free and
fence status doesn't matter. Fence become active (ie has a meaning
for the ib code) once the ib is scheduled or free. This patch also
get rid of the alloc bitmap as it was overkill, we know go through
IB pool list like in a ring buffer as the oldest IB is the first
one the will be free.

Fix :
https://bugs.freedesktop.org/show_bug.cgi?id=26438
and likely other bugs.

V2 remove the scheduled list, it's useless now, fix free ib scanning

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600_blit_kms.c |   3 -
 drivers/gpu/drm/radeon/radeon.h        |   9 ++-
 drivers/gpu/drm/radeon/radeon_ring.c   | 105 ++++++++++---------------
 3 files changed, 45 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index af1c3ca8a4cb..446b765ac72a 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -543,9 +543,6 @@ int r600_vb_ib_get(struct radeon_device *rdev)
 void r600_vb_ib_put(struct radeon_device *rdev)
 {
 	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
-	mutex_lock(&rdev->ib_pool.mutex);
-	list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs);
-	mutex_unlock(&rdev->ib_pool.mutex);
 	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index f57480ba1355..c0356bb193e5 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -96,6 +96,7 @@ extern int radeon_audio;
  * symbol;
  */
 #define RADEON_MAX_USEC_TIMEOUT		100000	/* 100 ms */
+/* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE		16
 #define RADEON_DEBUGFS_MAX_NUM_FILES	32
 #define RADEONFB_CONN_LIMIT		4
@@ -363,11 +364,12 @@ void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev);
  */
 struct radeon_ib {
 	struct list_head	list;
-	unsigned long		idx;
+	unsigned		idx;
 	uint64_t		gpu_addr;
 	struct radeon_fence	*fence;
-	uint32_t	*ptr;
+	uint32_t		*ptr;
 	uint32_t		length_dw;
+	bool			free;
 };
 
 /*
@@ -377,10 +379,9 @@ struct radeon_ib {
 struct radeon_ib_pool {
 	struct mutex		mutex;
 	struct radeon_bo	*robj;
-	struct list_head	scheduled_ibs;
 	struct radeon_ib	ibs[RADEON_IB_POOL_SIZE];
 	bool			ready;
-	DECLARE_BITMAP(alloc_bm, RADEON_IB_POOL_SIZE);
+	unsigned		head_id;
 };
 
 struct radeon_cp {
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 4d12b2d17b4d..694799f6fac1 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -41,68 +41,55 @@ int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib)
 {
 	struct radeon_fence *fence;
 	struct radeon_ib *nib;
-	unsigned long i;
-	int r = 0;
+	int r = 0, i, c;
 
 	*ib = NULL;
 	r = radeon_fence_create(rdev, &fence);
 	if (r) {
-		DRM_ERROR("failed to create fence for new IB\n");
+		dev_err(rdev->dev, "failed to create fence for new IB\n");
 		return r;
 	}
 	mutex_lock(&rdev->ib_pool.mutex);
-	i = find_first_zero_bit(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE);
-	if (i < RADEON_IB_POOL_SIZE) {
-		set_bit(i, rdev->ib_pool.alloc_bm);
-		rdev->ib_pool.ibs[i].length_dw = 0;
-		*ib = &rdev->ib_pool.ibs[i];
-		mutex_unlock(&rdev->ib_pool.mutex);
-		goto out;
+	for (i = rdev->ib_pool.head_id, c = 0, nib = NULL; c < RADEON_IB_POOL_SIZE; c++, i++) {
+		i &= (RADEON_IB_POOL_SIZE - 1);
+		if (rdev->ib_pool.ibs[i].free) {
+			nib = &rdev->ib_pool.ibs[i];
+			break;
+		}
 	}
-	if (list_empty(&rdev->ib_pool.scheduled_ibs)) {
-		/* we go do nothings here */
+	if (nib == NULL) {
+		/* This should never happen, it means we allocated all
+		 * IB and haven't scheduled one yet, return EBUSY to
+		 * userspace hoping that on ioctl recall we get better
+		 * luck
+		 */
+		dev_err(rdev->dev, "no free indirect buffer !\n");
 		mutex_unlock(&rdev->ib_pool.mutex);
-		DRM_ERROR("all IB allocated none scheduled.\n");
-		r = -EINVAL;
-		goto out;
+		radeon_fence_unref(&fence);
+		return -EBUSY;
 	}
-	/* get the first ib on the scheduled list */
-	nib = list_entry(rdev->ib_pool.scheduled_ibs.next,
-			 struct radeon_ib, list);
-	if (nib->fence == NULL) {
-		/* we go do nothings here */
+	rdev->ib_pool.head_id = (nib->idx + 1) & (RADEON_IB_POOL_SIZE - 1);
+	nib->free = false;
+	if (nib->fence) {
 		mutex_unlock(&rdev->ib_pool.mutex);
-		DRM_ERROR("IB %lu scheduled without a fence.\n", nib->idx);
-		r = -EINVAL;
-		goto out;
-	}
-	mutex_unlock(&rdev->ib_pool.mutex);
-
-	r = radeon_fence_wait(nib->fence, false);
-	if (r) {
-		DRM_ERROR("radeon: IB(%lu:0x%016lX:%u)\n", nib->idx,
-			  (unsigned long)nib->gpu_addr, nib->length_dw);
-		DRM_ERROR("radeon: GPU lockup detected, fail to get a IB\n");
-		goto out;
+		r = radeon_fence_wait(nib->fence, false);
+		if (r) {
+			dev_err(rdev->dev, "error waiting fence of IB(%u:0x%016lX:%u)\n",
+				nib->idx, (unsigned long)nib->gpu_addr, nib->length_dw);
+			mutex_lock(&rdev->ib_pool.mutex);
+			nib->free = true;
+			mutex_unlock(&rdev->ib_pool.mutex);
+			radeon_fence_unref(&fence);
+			return r;
+		}
+		mutex_lock(&rdev->ib_pool.mutex);
 	}
 	radeon_fence_unref(&nib->fence);
-
+	nib->fence = fence;
 	nib->length_dw = 0;
-
-	/* scheduled list is accessed here */
-	mutex_lock(&rdev->ib_pool.mutex);
-	list_del(&nib->list);
-	INIT_LIST_HEAD(&nib->list);
 	mutex_unlock(&rdev->ib_pool.mutex);
-
 	*ib = nib;
-out:
-	if (r) {
-		radeon_fence_unref(&fence);
-	} else {
-		(*ib)->fence = fence;
-	}
-	return r;
+	return 0;
 }
 
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
@@ -114,18 +101,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
 		return;
 	}
 	mutex_lock(&rdev->ib_pool.mutex);
-	if (!list_empty(&tmp->list) && !radeon_fence_signaled(tmp->fence)) {
-		/* IB is scheduled & not signaled don't do anythings */
-		mutex_unlock(&rdev->ib_pool.mutex);
-		return;
-	}
-	list_del(&tmp->list);
-	INIT_LIST_HEAD(&tmp->list);
-	if (tmp->fence)
-		radeon_fence_unref(&tmp->fence);
-
-	tmp->length_dw = 0;
-	clear_bit(tmp->idx, rdev->ib_pool.alloc_bm);
+	tmp->free = true;
 	mutex_unlock(&rdev->ib_pool.mutex);
 }
 
@@ -135,7 +111,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 
 	if (!ib->length_dw || !rdev->cp.ready) {
 		/* TODO: Nothings in the ib we should report. */
-		DRM_ERROR("radeon: couldn't schedule IB(%lu).\n", ib->idx);
+		DRM_ERROR("radeon: couldn't schedule IB(%u).\n", ib->idx);
 		return -EINVAL;
 	}
 
@@ -148,7 +124,8 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 	radeon_ring_ib_execute(rdev, ib);
 	radeon_fence_emit(rdev, ib->fence);
 	mutex_lock(&rdev->ib_pool.mutex);
-	list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs);
+	/* once scheduled IB is considered free and protected by the fence */
+	ib->free = true;
 	mutex_unlock(&rdev->ib_pool.mutex);
 	radeon_ring_unlock_commit(rdev);
 	return 0;
@@ -164,7 +141,6 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
 	if (rdev->ib_pool.robj)
 		return 0;
 	/* Allocate 1M object buffer */
-	INIT_LIST_HEAD(&rdev->ib_pool.scheduled_ibs);
 	r = radeon_bo_create(rdev, NULL,  RADEON_IB_POOL_SIZE*64*1024,
 				true, RADEON_GEM_DOMAIN_GTT,
 				&rdev->ib_pool.robj);
@@ -195,9 +171,9 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
 		rdev->ib_pool.ibs[i].ptr = ptr + offset;
 		rdev->ib_pool.ibs[i].idx = i;
 		rdev->ib_pool.ibs[i].length_dw = 0;
-		INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].list);
+		rdev->ib_pool.ibs[i].free = true;
 	}
-	bitmap_zero(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE);
+	rdev->ib_pool.head_id = 0;
 	rdev->ib_pool.ready = true;
 	DRM_INFO("radeon: ib pool ready.\n");
 	if (radeon_debugfs_ib_init(rdev)) {
@@ -214,7 +190,6 @@ void radeon_ib_pool_fini(struct radeon_device *rdev)
 		return;
 	}
 	mutex_lock(&rdev->ib_pool.mutex);
-	bitmap_zero(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE);
 	if (rdev->ib_pool.robj) {
 		r = radeon_bo_reserve(rdev->ib_pool.robj, false);
 		if (likely(r == 0)) {
@@ -363,7 +338,7 @@ static int radeon_debugfs_ib_info(struct seq_file *m, void *data)
 	if (ib == NULL) {
 		return 0;
 	}
-	seq_printf(m, "IB %04lu\n", ib->idx);
+	seq_printf(m, "IB %04u\n", ib->idx);
 	seq_printf(m, "IB fence %p\n", ib->fence);
 	seq_printf(m, "IB size %05u dwords\n", ib->length_dw);
 	for (i = 0; i < ib->length_dw; i++) {

From 94429bb6c8343722544e282d89dc4638672e49b4 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Mon, 15 Feb 2010 21:36:33 +0100
Subject: [PATCH 516/640] drm/radeon/kms: fix bo's fence association

Previous code did associate fence to bo before the fence was emited
and it also didn't lock protected access to ttm sync_obj member.
Both of this flaw leads to possible race between different code
path. This patch fix this by associating fence only once the fence
is emitted and properly lock protect access to sync_obj member of
ttm.

Fix:
https://bugs.freedesktop.org/show_bug.cgi?id=26438
and likely similar others bugs
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_cs.c     | 10 +++-----
 drivers/gpu/drm/radeon/radeon_object.c | 34 +++++++++++---------------
 drivers/gpu/drm/radeon/radeon_object.h |  4 +--
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 1190148cf5e6..e9d085021c1f 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -86,7 +86,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 						&p->validated);
 		}
 	}
-	return radeon_bo_list_validate(&p->validated, p->ib->fence);
+	return radeon_bo_list_validate(&p->validated);
 }
 
 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
@@ -189,12 +189,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 {
 	unsigned i;
 
-	if (error && parser->ib) {
-		radeon_bo_list_unvalidate(&parser->validated,
-						parser->ib->fence);
-	} else {
-		radeon_bo_list_unreserve(&parser->validated);
+	if (!error && parser->ib) {
+		radeon_bo_list_fence(&parser->validated, parser->ib->fence);
 	}
+	radeon_bo_list_unreserve(&parser->validated);
 	for (i = 0; i < parser->nrelocs; i++) {
 		if (parser->relocs[i].gobj) {
 			mutex_lock(&parser->rdev->ddev->struct_mutex);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index d72a71bff218..f1da370928eb 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -306,11 +306,10 @@ void radeon_bo_list_unreserve(struct list_head *head)
 	}
 }
 
-int radeon_bo_list_validate(struct list_head *head, void *fence)
+int radeon_bo_list_validate(struct list_head *head)
 {
 	struct radeon_bo_list *lobj;
 	struct radeon_bo *bo;
-	struct radeon_fence *old_fence = NULL;
 	int r;
 
 	r = radeon_bo_list_reserve(head);
@@ -334,32 +333,27 @@ int radeon_bo_list_validate(struct list_head *head, void *fence)
 		}
 		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
 		lobj->tiling_flags = bo->tiling_flags;
-		if (fence) {
-			old_fence = (struct radeon_fence *)bo->tbo.sync_obj;
-			bo->tbo.sync_obj = radeon_fence_ref(fence);
-			bo->tbo.sync_obj_arg = NULL;
-		}
-		if (old_fence) {
-			radeon_fence_unref(&old_fence);
-		}
 	}
 	return 0;
 }
 
-void radeon_bo_list_unvalidate(struct list_head *head, void *fence)
+void radeon_bo_list_fence(struct list_head *head, void *fence)
 {
 	struct radeon_bo_list *lobj;
-	struct radeon_fence *old_fence;
+	struct radeon_bo *bo;
+	struct radeon_fence *old_fence = NULL;
 
-	if (fence)
-		list_for_each_entry(lobj, head, list) {
-			old_fence = to_radeon_fence(lobj->bo->tbo.sync_obj);
-			if (old_fence == fence) {
-				lobj->bo->tbo.sync_obj = NULL;
-				radeon_fence_unref(&old_fence);
-			}
+	list_for_each_entry(lobj, head, list) {
+		bo = lobj->bo;
+		spin_lock(&bo->tbo.lock);
+		old_fence = (struct radeon_fence *)bo->tbo.sync_obj;
+		bo->tbo.sync_obj = radeon_fence_ref(fence);
+		bo->tbo.sync_obj_arg = NULL;
+		spin_unlock(&bo->tbo.lock);
+		if (old_fence) {
+			radeon_fence_unref(&old_fence);
 		}
-	radeon_bo_list_unreserve(head);
+	}
 }
 
 int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index a02f18011ad1..7ab43de1e244 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -156,8 +156,8 @@ extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head);
 extern int radeon_bo_list_reserve(struct list_head *head);
 extern void radeon_bo_list_unreserve(struct list_head *head);
-extern int radeon_bo_list_validate(struct list_head *head, void *fence);
-extern void radeon_bo_list_unvalidate(struct list_head *head, void *fence);
+extern int radeon_bo_list_validate(struct list_head *head);
+extern void radeon_bo_list_fence(struct list_head *head, void *fence);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 				struct vm_area_struct *vma);
 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,

From 49bf83a45fc677db1ed44d0e072e6aaeabe4e124 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 16 Feb 2010 03:45:45 -0500
Subject: [PATCH 517/640] ACPI: fix "acpi=ht" boot option

We broke "acpi=ht" in 2.6.32 by disabling MADT parsing
for acpi=disabled.  e5b8fc6ac158f65598f58dba2c0d52ba3b412f52
This also broke systems which invoked acpi=ht via DMI blacklist.

acpi=ht is a really ugly hack,
but restore it for those that still use it.

http://bugzilla.kernel.org/show_bug.cgi?id=14886

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/include/asm/acpi.h | 1 +
 drivers/acpi/tables.c        | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index 7ae58892ba8d..e97b255d97bc 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -94,6 +94,7 @@ ia64_acpi_release_global_lock (unsigned int *lock)
 #define acpi_noirq 0	/* ACPI always enabled on IA64 */
 #define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */
 #define acpi_strict 1	/* no ACPI spec workarounds on IA64 */
+#define acpi_ht 0	/* no HT-only mode on IA64 */
 #endif
 #define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */
 static inline void disable_acpi(void) { }
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index f336bca7c450..8a0ed2800e63 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -213,7 +213,7 @@ acpi_table_parse_entries(char *id,
 	unsigned long table_end;
 	acpi_size tbl_size;
 
-	if (acpi_disabled)
+	if (acpi_disabled && !acpi_ht)
 		return -ENODEV;
 
 	if (!handler)
@@ -280,7 +280,7 @@ int __init acpi_table_parse(char *id, acpi_table_handler handler)
 	struct acpi_table_header *table = NULL;
 	acpi_size tbl_size;
 
-	if (acpi_disabled)
+	if (acpi_disabled && !acpi_ht)
 		return -ENODEV;
 
 	if (!handler)

From c2d1a2a11b9b29c3be1dd781dc88518ffab8d4be Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 17 Feb 2010 12:17:33 -0800
Subject: [PATCH 518/640] Input: i8042 - fix KBC jam during hibernate

633aae2 "Input: i8042 - switch to using dev_pm_ops" removed handling
for PMSG_THAW, since we do not need to do anything during freeze and
thus it was thougt that thaw is not needed as well. However, there is
a period when interrupts are kept off, and if key happens to be pressed
during that time KBC becomes jammed. To avoid the jam we simply need
to poll KBC once during thaw.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/i8042.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index d84a36e545f6..b54aee7cd9e3 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -1161,9 +1161,17 @@ static int i8042_pm_restore(struct device *dev)
 	return 0;
 }
 
+static int i8042_pm_thaw(struct device *dev)
+{
+	i8042_interrupt(0, NULL);
+
+	return 0;
+}
+
 static const struct dev_pm_ops i8042_pm_ops = {
 	.suspend	= i8042_pm_reset,
 	.resume		= i8042_pm_restore,
+	.thaw		= i8042_pm_thaw,
 	.poweroff	= i8042_pm_reset,
 	.restore	= i8042_pm_restore,
 };

From 3b77fd8ee6a8ae34e349651e9d5f5000d1cc206e Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 17 Feb 2010 12:21:45 -0800
Subject: [PATCH 519/640] Input: add KEY_RFKILL

Most laptops have keys that are intended to toggle all device state, not
just wifi. These are currently generally mapped to KEY_WLAN. As a result,
rfkill will only kill or enable wifi in response to the key press. This
confuses users and can make it difficult for them to enable bluetooth
and wwan devices.

This patch adds a new keycode, KEY_RFKILL. It indicates that the system
should toggle the state of all rfkillable devices.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/input.h b/include/linux/input.h
index 735ceaf1bc2d..663208afb64c 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -376,6 +376,7 @@ struct input_absinfo {
 #define KEY_DISPLAY_OFF		245	/* display device to off state */
 
 #define KEY_WIMAX		246
+#define KEY_RFKILL		247	/* Key that controls all radios */
 
 /* Range 248 - 255 is reserved for special needs of AT keyboard driver */
 

From 6c09f09d44690d341d970559b64779bef8b9236b Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 16 Feb 2010 07:57:43 +0100
Subject: [PATCH 520/640] ARM: 5938/1: ARM: L2: export outer_cache_fns

The 'outer_cache' variable is needed by the outer_inv_range(),
outer_clean_range() and outer_flush_range() functions, which are
declared as inline in asm/cacheflush.h.  Otherwise drivers built
as a loadable module, which access these functions, will have
an undefined symbol.

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c6c57b640b6b..621acad8ea43 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -102,6 +102,7 @@ struct cpu_cache_fns cpu_cache;
 #endif
 #ifdef CONFIG_OUTER_CACHE
 struct outer_cache_fns outer_cache;
+EXPORT_SYMBOL(outer_cache);
 #endif
 
 struct stack {

From f8b55f251012e104093e105483c45c5d85ad3040 Mon Sep 17 00:00:00 2001
From: Christine Caulfield <christine.caulfield@googlemail.com>
Date: Thu, 18 Feb 2010 11:33:13 +0000
Subject: [PATCH 521/640] Orphan DECnet

Due to lack of time, space, motivation, hardware and probably expertise,
I have reluctantly decided to orphan the DECnet code in the kernel.

Judging by the deafening silence on the linux-decnet mailing list I
suspect it's either not being used anyway, or the few people that are
using it are happy with their older kernels.

Signed-off-by: Christine Caulfield <christine.caulfield@googlemail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 412eff60c33d..8ed3d0a8b3f4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1733,10 +1733,9 @@ F:	include/linux/tfrc.h
 F:	net/dccp/
 
 DECnet NETWORK LAYER
-M:	Christine Caulfield <christine.caulfield@googlemail.com>
 W:	http://linux-decnet.sourceforge.net
 L:	linux-decnet-user@lists.sourceforge.net
-S:	Maintained
+S:	Orphan
 F:	Documentation/networking/decnet.txt
 F:	net/decnet/
 

From b857df1acc634b18db1db2a40864af985100266e Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 18 Feb 2010 18:07:18 +0100
Subject: [PATCH 522/640] ARM: 5944/1: scsi: fix timer setup in fas216.c

mod_timer() takes an absolute time and not a delay as its argument.

Cc: <stable@kernel.org>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/scsi/arm/fas216.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c
index 477542602284..9e71ac611146 100644
--- a/drivers/scsi/arm/fas216.c
+++ b/drivers/scsi/arm/fas216.c
@@ -2516,7 +2516,7 @@ int fas216_eh_device_reset(struct scsi_cmnd *SCpnt)
 		if (info->scsi.phase == PHASE_IDLE)
 			fas216_kick(info);
 
-		mod_timer(&info->eh_timer, 30 * HZ);
+		mod_timer(&info->eh_timer, jiffies + 30 * HZ);
 		spin_unlock_irqrestore(&info->host_lock, flags);
 
 		/*

From 079e1091a2901c81fc2d7ad2079344c3edab2c4d Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Thu, 18 Feb 2010 21:54:11 +0200
Subject: [PATCH 523/640] Gemini: wrong registers used to set reg_level in
 gpio_set_irq_type()

It appears the wrong GPIO registers were used

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
---
 arch/arm/mach-gemini/gpio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c
index e7263854bc7b..fe3bd5ac8b10 100644
--- a/arch/arm/mach-gemini/gpio.c
+++ b/arch/arm/mach-gemini/gpio.c
@@ -86,7 +86,7 @@ static int gpio_set_irq_type(unsigned int irq, unsigned int type)
 	unsigned int reg_both, reg_level, reg_type;
 
 	reg_type = __raw_readl(base + GPIO_INT_TYPE);
-	reg_level = __raw_readl(base + GPIO_INT_BOTH_EDGE);
+	reg_level = __raw_readl(base + GPIO_INT_LEVEL);
 	reg_both = __raw_readl(base + GPIO_INT_BOTH_EDGE);
 
 	switch (type) {
@@ -117,7 +117,7 @@ static int gpio_set_irq_type(unsigned int irq, unsigned int type)
 	}
 
 	__raw_writel(reg_type, base + GPIO_INT_TYPE);
-	__raw_writel(reg_level, base + GPIO_INT_BOTH_EDGE);
+	__raw_writel(reg_level, base + GPIO_INT_LEVEL);
 	__raw_writel(reg_both, base + GPIO_INT_BOTH_EDGE);
 
 	gpio_ack_irq(irq);

From 083c88fcf1a89986ffa160826f96509fb4b370bb Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Date: Thu, 18 Feb 2010 21:54:12 +0200
Subject: [PATCH 524/640] MAINTAINERS: fix my e-mail and status for Gemini and
 FA526

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
---
 MAINTAINERS | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 412eff60c33d..44c669d92a49 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -616,10 +616,10 @@ M:	Richard Purdie <rpurdie@rpsys.net>
 S:	Maintained
 
 ARM/CORTINA SYSTEMS GEMINI ARM ARCHITECTURE
-M:	Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+M:	Paulius Zaleckas <paulius.zaleckas@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://gitorious.org/linux-gemini/mainline.git
-S:	Maintained
+S:	Odd Fixes
 F:	arch/arm/mach-gemini/
 
 ARM/EBSA110 MACHINE SUPPORT
@@ -641,9 +641,9 @@ T:	topgit git://git.openezx.org/openezx.git
 F:	arch/arm/mach-pxa/ezx.c
 
 ARM/FARADAY FA526 PORT
-M:	Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+M:	Paulius Zaleckas <paulius.zaleckas@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
+S:	Odd Fixes
 F:	arch/arm/mm/*-fa*
 
 ARM/FOOTBRIDGE ARCHITECTURE

From c1db53b36633e6a7511dbec7c372f01a31528f0c Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Tue, 9 Feb 2010 20:16:03 -0300
Subject: [PATCH 525/640] V4L/DVB: dvb: l64781.ko broken with gcc 4.5

I'm trying to fix it on the GCC side (PR43007), but the module is
quite stupid in using ULL constants to operate on u32 values:

static int apply_frontend_param (struct dvb_frontend* fe, struct
dvb_frontend_parameters *param)
{
...
 static const u32 ppm = 8000;
 u32 spi_bias;
...

 spi_bias *= 1000ULL;
 spi_bias /= 1000ULL + ppm/1000;

which causes current GCC 4.5 to emit calls to __udivdi3 for i?86 again.

This patch fixes this issue.

Signed-off-by: Richard Guenther <rguenther@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
CC: stable@kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/frontends/l64781.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/dvb/frontends/l64781.c b/drivers/media/dvb/frontends/l64781.c
index 3051b64aa17c..445fa1068064 100644
--- a/drivers/media/dvb/frontends/l64781.c
+++ b/drivers/media/dvb/frontends/l64781.c
@@ -192,8 +192,8 @@ static int apply_frontend_param (struct dvb_frontend* fe, struct dvb_frontend_pa
 	spi_bias *= qam_tab[p->constellation];
 	spi_bias /= p->code_rate_HP + 1;
 	spi_bias /= (guard_tab[p->guard_interval] + 32);
-	spi_bias *= 1000ULL;
-	spi_bias /= 1000ULL + ppm/1000;
+	spi_bias *= 1000;
+	spi_bias /= 1000 + ppm/1000;
 	spi_bias *= p->code_rate_HP;
 
 	val0x04 = (p->transmission_mode << 2) | p->guard_interval;

From fc4a7f93087a48619005111895dcaa115f807399 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 10 Feb 2010 23:57:17 -0300
Subject: [PATCH 526/640] V4L/DVB: cxusb: Select all required frontend and
 tuner modules

cxusb uses the atbm8830 and lgs8gxx (not lgs8gl5) frontends and the
max2165 tuner, so it needs to select them.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Cc: stable@kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-usb/Kconfig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig
index 1b249897c9fb..465295b1d14b 100644
--- a/drivers/media/dvb/dvb-usb/Kconfig
+++ b/drivers/media/dvb/dvb-usb/Kconfig
@@ -112,11 +112,13 @@ config DVB_USB_CXUSB
 	select DVB_MT352 if !DVB_FE_CUSTOMISE
 	select DVB_ZL10353 if !DVB_FE_CUSTOMISE
 	select DVB_DIB7000P if !DVB_FE_CUSTOMISE
-	select DVB_LGS8GL5 if !DVB_FE_CUSTOMISE
 	select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE
+	select DVB_ATBM8830 if !DVB_FE_CUSTOMISE
+	select DVB_LGS8GXX if !DVB_FE_CUSTOMISE
 	select MEDIA_TUNER_SIMPLE if !MEDIA_TUNER_CUSTOMISE
 	select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE
 	select MEDIA_TUNER_MXL5005S if !MEDIA_TUNER_CUSTOMISE
+	select MEDIA_TUNER_MAX2165 if !MEDIA_TUNER_CUSTOMISE
 	help
 	  Say Y here to support the Conexant USB2.0 hybrid reference design.
 	  Currently, only DVB and ATSC modes are supported, analog mode

From 2b59125b1b5f8c9bb0524b8a0bdad4b780a239ac Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Tue, 2 Feb 2010 13:17:54 +0900
Subject: [PATCH 527/640] soc-camera: mt9t112: modify exiting conditions from
 standby mode

This polling is needed if camera is in standby mode, but current exiting
condition is inverted.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9t112.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/video/mt9t112.c b/drivers/media/video/mt9t112.c
index fc4dd6045720..7438f8d775ba 100644
--- a/drivers/media/video/mt9t112.c
+++ b/drivers/media/video/mt9t112.c
@@ -514,7 +514,7 @@ static int mt9t112_init_pll(const struct i2c_client *client)
 	/* poll to verify out of standby. Must Poll this bit */
 	for (i = 0; i < 100; i++) {
 		mt9t112_reg_read(data, client, 0x0018);
-		if (0x4000 & data)
+		if (!(0x4000 & data))
 			break;
 
 		mdelay(10);

From 53f68607caba85db9a73846ccd289e4b7fa96295 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@gmail.com>
Date: Thu, 11 Feb 2010 10:50:31 -0300
Subject: [PATCH 528/640] V4L/DVB: Video : pwc : Fix regression in
 pwc_set_shutter_speed caused by bad 	constant => sizeof conversion.

Regression was caused by my commit 6b35ca0d3d586b8ecb8396821af21186e20afaf0
which determined message size using sizeof rather than hardcoded constants.

Unfortunately pwc_set_shutter_speed reuses a 2 byte buffer for a one byte
message too so the sizeof was bogus in this case.

All other uses of sizeof checked and are ok.

Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: stable@kernel.org
Signed-off-by: Martin Fuzzey <mfuzzey@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/pwc/pwc-ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/video/pwc/pwc-ctrl.c b/drivers/media/video/pwc/pwc-ctrl.c
index 50b415e07eda..f7f7e04cf485 100644
--- a/drivers/media/video/pwc/pwc-ctrl.c
+++ b/drivers/media/video/pwc/pwc-ctrl.c
@@ -753,7 +753,7 @@ int pwc_set_shutter_speed(struct pwc_device *pdev, int mode, int value)
 		buf[0] = 0xff; /* fixed */
 
 	ret = send_control_msg(pdev,
-		SET_LUM_CTL, SHUTTER_MODE_FORMATTER, &buf, sizeof(buf));
+		SET_LUM_CTL, SHUTTER_MODE_FORMATTER, &buf, 1);
 
 	if (!mode && ret >= 0) {
 		if (value < 0)

From 2434466432464110b5307757e0285dd41f15512e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Feb 2010 00:18:41 -0300
Subject: [PATCH 529/640] V4L/DVB: bttv: Move I2C IR initialization

Move I2C IR initialization from just after I2C bus setup to right
before non-I2C IR initialization. This avoids the case where an I2C IR
device is blocking audio support (at least the PV951 suffers from
this). It is also more logical to group IR support together,
regardless of the connectivity.

This fixes bug #15184:
http://bugzilla.kernel.org/show_bug.cgi?id=15184

Signed-off-by: Jean Delvare <khali@linux-fr.org>
CC: stable@kernel.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/bt8xx/bttv-driver.c | 1 +
 drivers/media/video/bt8xx/bttv-i2c.c    | 8 ++++++--
 drivers/media/video/bt8xx/bttvp.h       | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 3182a406bdd1..ae08b077fd04 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -4461,6 +4461,7 @@ static int __devinit bttv_probe(struct pci_dev *dev,
 		request_modules(btv);
 	}
 
+	init_bttv_i2c_ir(btv);
 	bttv_input_init(btv);
 
 	/* everything is fine */
diff --git a/drivers/media/video/bt8xx/bttv-i2c.c b/drivers/media/video/bt8xx/bttv-i2c.c
index 63aa31a041e8..407fa61e4cda 100644
--- a/drivers/media/video/bt8xx/bttv-i2c.c
+++ b/drivers/media/video/bt8xx/bttv-i2c.c
@@ -388,7 +388,12 @@ int __devinit init_bttv_i2c(struct bttv *btv)
 	if (0 == btv->i2c_rc && i2c_scan)
 		do_i2c_scan(btv->c.v4l2_dev.name, &btv->i2c_client);
 
-	/* Instantiate the IR receiver device, if present */
+	return btv->i2c_rc;
+}
+
+/* Instantiate the I2C IR receiver device, if present */
+void __devinit init_bttv_i2c_ir(struct bttv *btv)
+{
 	if (0 == btv->i2c_rc) {
 		struct i2c_board_info info;
 		/* The external IR receiver is at i2c address 0x34 (0x35 for
@@ -408,7 +413,6 @@ int __devinit init_bttv_i2c(struct bttv *btv)
 		strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
 		i2c_new_probed_device(&btv->c.i2c_adap, &info, addr_list);
 	}
-	return btv->i2c_rc;
 }
 
 int __devexit fini_bttv_i2c(struct bttv *btv)
diff --git a/drivers/media/video/bt8xx/bttvp.h b/drivers/media/video/bt8xx/bttvp.h
index a1d0e9c9f286..6cccc2a17eee 100644
--- a/drivers/media/video/bt8xx/bttvp.h
+++ b/drivers/media/video/bt8xx/bttvp.h
@@ -279,6 +279,7 @@ extern unsigned int bttv_debug;
 extern unsigned int bttv_gpio;
 extern void bttv_gpio_tracking(struct bttv *btv, char *comment);
 extern int init_bttv_i2c(struct bttv *btv);
+extern void init_bttv_i2c_ir(struct bttv *btv);
 extern int fini_bttv_i2c(struct bttv *btv);
 
 #define bttv_printk if (bttv_verbose) printk

From 6f6ef82cc9de24153ba7d5cedab5970e276aefa1 Mon Sep 17 00:00:00 2001
From: Carlos Corbacho <carlos@strangeworlds.co.uk>
Date: Sat, 26 Dec 2009 19:24:31 +0000
Subject: [PATCH 530/640] acer-wmi: Respect current backlight level when
 loading

Set the backlight to use the current brightness when loaded, rather than
always resetting the backlight to maximum brightness.

Fixes kernel bugzilla #14207

Signed-off-by: Carlos Corbacho <carlos@strangeworlds.co.uk>
Reported-by: Denis Mukhin <denis_mukhin@yahoo.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/platform/x86/acer-wmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 07d14dfdf0b4..226b3e93498c 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -934,7 +934,7 @@ static int __devinit acer_backlight_init(struct device *dev)
 	acer_backlight_device = bd;
 
 	bd->props.power = FB_BLANK_UNBLANK;
-	bd->props.brightness = max_brightness;
+	bd->props.brightness = read_brightness(bd);
 	bd->props.max_brightness = max_brightness;
 	backlight_update_status(bd);
 	return 0;

From 455c0d71d46e86b0b7ff2c9dcfc19bc162302ee9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Thu, 18 Feb 2010 10:28:20 -0800
Subject: [PATCH 531/640] ACPI: Fix regression where _PPC is not read at boot
 even when ignore_ppc=0

Earlier, Ingo Molnar posted a patch to make it so that the kernel would avoid
reading _PPC on his broken T60.  Unfortunately, it seems that with Thomas
Renninger's patch last July to eliminate _PPC evaluations when the processor
driver loads, the kernel never actually reads _PPC at all!  This is problematic
if you happen to boot your non-T60 computer in a state where the BIOS _wants_
_PPC to be something other than zero.

So, put the _PPC evaluation back into acpi_processor_get_performance_info if
ignore_ppc isn't 1.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_perflib.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 2cabadcc4d8c..a959f6a07508 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -413,7 +413,11 @@ static int acpi_processor_get_performance_info(struct acpi_processor *pr)
 	if (result)
 		goto update_bios;
 
-	return 0;
+	/* We need to call _PPC once when cpufreq starts */
+	if (ignore_ppc != 1)
+		result = acpi_processor_get_platform_limit(pr);
+
+	return result;
 
 	/*
 	 * Having _PPC but missing frequencies (_PSS, _PCT) is a very good hint that

From ac278a9c505092dd82077a2446af8f9fc0d9c095 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Tue, 16 Feb 2010 18:09:36 +0000
Subject: [PATCH 532/640] fix LOOKUP_FOLLOW on automount "symlinks"

Make sure that automount "symlinks" are followed regardless of LOOKUP_FOLLOW;
it should have no effect on them.

Cc: stable@kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index d62fdc875f22..a4855af776a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -822,6 +822,17 @@ fail:
 	return PTR_ERR(dentry);
 }
 
+/*
+ * This is a temporary kludge to deal with "automount" symlinks; proper
+ * solution is to trigger them on follow_mount(), so that do_lookup()
+ * would DTRT.  To be killed before 2.6.34-final.
+ */
+static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
+{
+	return inode && unlikely(inode->i_op->follow_link) &&
+		((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
+}
+
 /*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
@@ -942,8 +953,7 @@ last_component:
 		if (err)
 			break;
 		inode = next.dentry->d_inode;
-		if ((lookup_flags & LOOKUP_FOLLOW)
-		    && inode && inode->i_op->follow_link) {
+		if (follow_on_final(inode, lookup_flags)) {
 			err = do_follow_link(&next, nd);
 			if (err)
 				goto return_err;

From 7fee4868be91e71a3ee8e57289ebf5e10a12297e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 14 Jan 2010 01:03:28 -0500
Subject: [PATCH 533/640] Switch proc/self to nd_set_link()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index e42bbd843ed1..58324c299165 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2369,16 +2369,30 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
 	pid_t tgid = task_tgid_nr_ns(current, ns);
-	char tmp[PROC_NUMBUF];
-	if (!tgid)
-		return ERR_PTR(-ENOENT);
-	sprintf(tmp, "%d", task_tgid_nr_ns(current, ns));
-	return ERR_PTR(vfs_follow_link(nd,tmp));
+	char *name = ERR_PTR(-ENOENT);
+	if (tgid) {
+		name = __getname();
+		if (!name)
+			name = ERR_PTR(-ENOMEM);
+		else
+			sprintf(name, "%d", tgid);
+	}
+	nd_set_link(nd, name);
+	return NULL;
+}
+
+static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
+				void *cookie)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		__putname(s);
 }
 
 static const struct inode_operations proc_self_inode_operations = {
 	.readlink	= proc_self_readlink,
 	.follow_link	= proc_self_follow_link,
+	.put_link	= proc_self_put_link,
 };
 
 /*

From 4e70af56319e56423d6eb1ce25fc321cdf8cd41d Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Mon, 15 Feb 2010 11:16:11 +0000
Subject: [PATCH 534/640] fs: inode - remove 8 bytes of padding on 64bits
 allowing 1 more objects/slab under slub

This removes 8 bytes of padding from struct inode on 64bit builds, and
so allows 1 more object/slab in the inode_cache when using slub.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
----
patch against 2.6.33-rc8
compiled & tested on x86_64 AMDX2

I've been running this patch for over a week with no obvious problems
regards
Richard
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b1bcb275b596..ebb1cd5bc241 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -729,6 +729,7 @@ struct inode {
 	uid_t			i_uid;
 	gid_t			i_gid;
 	dev_t			i_rdev;
+	unsigned int		i_blkbits;
 	u64			i_version;
 	loff_t			i_size;
 #ifdef __NEED_I_SIZE_ORDERED
@@ -738,7 +739,6 @@ struct inode {
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	blkcnt_t		i_blocks;
-	unsigned int		i_blkbits;
 	unsigned short          i_bytes;
 	umode_t			i_mode;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */

From 5e2f75b8993a0d83d469388b50716dd5551f2eb4 Mon Sep 17 00:00:00 2001
From: Dan Halperin <dhalperi@cs.washington.edu>
Date: Thu, 18 Feb 2010 22:01:39 -0800
Subject: [PATCH 535/640] iwlwifi: set HT flags after channel in rxon

The HT extension channel settings require priv->staging_rxon.channel to be
accurate. However, iwl_set_rxon_ht was being called before iwl_set_rxon_channel
and thus HT40 could be broken unless another call to iwl_mac_config came in.

This problem was recently introduced by "iwlwifi: Fix to set correct ht
configuration"

The particular setting in which I noticed this was monitor mode:

	iwconfig wlan0 mode monitor
	ifconfig wlan0 up
	./iw wlan0 set channel 64 HT40-
	#./iw wlan0 set channel 64 HT40-
	tcpdump -i wlan0 -y IEEE802_11_RADIO

would only catch HT40 packets if I issued the IW command twice.

From visual inspection, iwl_set_rxon_channel does not depend on
iwl_set_rxon_ht, so simply swapping them should be safe and fixes this problem.

Signed-off-by: Daniel Halperin <dhalperi@cs.washington.edu>
Acked-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
CC: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index d10bea64fce3..f36f804804fc 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -2744,8 +2744,8 @@ int iwl_mac_config(struct ieee80211_hw *hw, u32 changed)
 		if ((le16_to_cpu(priv->staging_rxon.channel) != ch))
 			priv->staging_rxon.flags = 0;
 
-		iwl_set_rxon_ht(priv, ht_conf);
 		iwl_set_rxon_channel(priv, conf->channel);
+		iwl_set_rxon_ht(priv, ht_conf);
 
 		iwl_set_flags_for_band(priv, conf->channel->band);
 		spin_unlock_irqrestore(&priv->lock, flags);

From b3dc1a212e5167984616445990c76056034f8eeb Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Thu, 11 Feb 2010 18:01:50 +0100
Subject: [PATCH 536/640] [SCSI] megaraid_sas: fix for 32bit apps

It looks like this patch -

commit 7b2519afa1abd1b9f63aa1e90879307842422dae
Author: Yang, Bo <Bo.Yang@lsi.com>
Date:   Tue Oct 6 14:52:20 2009 -0600

    [SCSI] megaraid_sas: fix 64 bit sense pointer truncation

has caused a problem for 32bit programs with 64bit os -

http://bugzilla.kernel.org/show_bug.cgi?id=15001

fix by converting the user space 32bit pointer to a 64 bit one when
needed.

[jejb: fix up some 64 bit warnings]
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Cc: Bo Yang <Bo.Yang@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/megaraid/megaraid_sas.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 708ea3157b60..d9b8ca5116bc 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -3781,6 +3781,7 @@ static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg)
 	    compat_alloc_user_space(sizeof(struct megasas_iocpacket));
 	int i;
 	int error = 0;
+	compat_uptr_t ptr;
 
 	if (clear_user(ioc, sizeof(*ioc)))
 		return -EFAULT;
@@ -3793,9 +3794,22 @@ static int megasas_mgmt_compat_ioctl_fw(struct file *file, unsigned long arg)
 	    copy_in_user(&ioc->sge_count, &cioc->sge_count, sizeof(u32)))
 		return -EFAULT;
 
-	for (i = 0; i < MAX_IOCTL_SGE; i++) {
-		compat_uptr_t ptr;
+	/*
+	 * The sense_ptr is used in megasas_mgmt_fw_ioctl only when
+	 * sense_len is not null, so prepare the 64bit value under
+	 * the same condition.
+	 */
+	if (ioc->sense_len) {
+		void __user **sense_ioc_ptr =
+			(void __user **)(ioc->frame.raw + ioc->sense_off);
+		compat_uptr_t *sense_cioc_ptr =
+			(compat_uptr_t *)(cioc->frame.raw + cioc->sense_off);
+		if (get_user(ptr, sense_cioc_ptr) ||
+		    put_user(compat_ptr(ptr), sense_ioc_ptr))
+			return -EFAULT;
+	}
 
+	for (i = 0; i < MAX_IOCTL_SGE; i++) {
 		if (get_user(ptr, &cioc->sgl[i].iov_base) ||
 		    put_user(compat_ptr(ptr), &ioc->sgl[i].iov_base) ||
 		    copy_in_user(&ioc->sgl[i].iov_len,

From 2cc9116c2b37c525965d76a3e6def38913259427 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@redhat.com>
Date: Tue, 16 Feb 2010 16:18:37 -0500
Subject: [PATCH 537/640] vgaarb: fix "target=default" passing

Commit 77c1ff3982c6b36961725dd19e872a1c07df7f3b fixed the userspace
pointer dereference, but introduced another bug pointed out by Eugene Teo
in RH bug #564264. Instead of comparing the point we were at in the string,
we instead compared the beginning of the string to "default".

Signed-off-by: Kyle McMartin <kyle@redhat.com>
Reported-by: Eugene Teo <eteo@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/vga/vgaarb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 24b56dc54597..2f6cf69ecb39 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -961,7 +961,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		remaining -= 7;
 		pr_devel("client 0x%p called 'target'\n", priv);
 		/* if target is default */
-		if (!strncmp(kbuf, "default", 7))
+		if (!strncmp(curr_pos, "default", 7))
 			pdev = pci_dev_get(vga_default_device());
 		else {
 			if (!vga_pci_str_to_vars(curr_pos, remaining,

From c86a90383638fa830c32cf086a1520be72167086 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Thu, 18 Feb 2010 14:14:58 -0500
Subject: [PATCH 538/640] drm/radeon/kms/rs600: add connector quirk

rs600 board lists DVI port as HDMI.

Fixes fdo bug 26605

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_atombios.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 2dcda6115874..4d8831548a5f 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -206,6 +206,15 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 			*connector_type = DRM_MODE_CONNECTOR_DVID;
 	}
 
+	/* Asrock RS600 board lists the DVI port as HDMI */
+	if ((dev->pdev->device == 0x7941) &&
+	    (dev->pdev->subsystem_vendor == 0x1849) &&
+	    (dev->pdev->subsystem_device == 0x7941)) {
+		if ((*connector_type == DRM_MODE_CONNECTOR_HDMIA) &&
+		    (supported_device == ATOM_DEVICE_DFP3_SUPPORT))
+			*connector_type = DRM_MODE_CONNECTOR_DVID;
+	}
+
 	/* a-bit f-i90hd - ciaranm on #radeonhd - this board has no DVI */
 	if ((dev->pdev->device == 0x7941) &&
 	    (dev->pdev->subsystem_vendor == 0x147b) &&

From d3932d6c475f8307ac66b4ce21563285ec05f6ea Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 19 Feb 2010 02:13:56 -0500
Subject: [PATCH 539/640] drm/radeon/kms: fix shared ddc detection

Just compare the i2c id since the i2c structs
may be slighly different.

Fixes fdo bug 26616.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 238188540017..65f81942f399 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -780,7 +780,7 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect
 			 * connected and the DVI port disconnected.  If the edid doesn't
 			 * say HDMI, vice versa.
 			 */
-			if (radeon_connector->shared_ddc && connector_status_connected) {
+			if (radeon_connector->shared_ddc && (ret == connector_status_connected)) {
 				struct drm_device *dev = connector->dev;
 				struct drm_connector *list_connector;
 				struct radeon_connector *list_radeon_connector;
@@ -1060,8 +1060,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 			return;
 		}
 		if (radeon_connector->ddc_bus && i2c_bus->valid) {
-			if (memcmp(&radeon_connector->ddc_bus->rec, i2c_bus,
-				    sizeof(struct radeon_i2c_bus_rec)) == 0) {
+			if (radeon_connector->ddc_bus->rec.i2c_id == i2c_bus->i2c_id) {
 				radeon_connector->shared_ddc = true;
 				shared_ddc = true;
 			}

From 6a660f06e8120977b25d30ace354c8f9dc3aff2a Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 19 Feb 2010 16:07:02 -0500
Subject: [PATCH 540/640] drm/radeon/rv740: fix backend setup

This patch fixes occlusion queries and rendering errors
on rv740 boards. Hardcoding the backend map is not an optimal
solution, but a better fix is being worked on.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600_cp.c | 9 ++++++---
 drivers/gpu/drm/radeon/rv770.c   | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 6d5a711c2e91..75bcf35a0931 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -1428,9 +1428,12 @@ static void r700_gfx_init(struct drm_device *dev,
 
 	gb_tiling_config |= R600_BANK_SWAPS(1);
 
-	backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
-							dev_priv->r600_max_backends,
-							(0xff << dev_priv->r600_max_backends) & 0xff);
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)
+		backend_map = 0x28;
+	else
+		backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
+								dev_priv->r600_max_backends,
+								(0xff << dev_priv->r600_max_backends) & 0xff);
 	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
 
 	cc_gc_shader_pipe_config =
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 5943d561fd1e..03021674d097 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -549,9 +549,12 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 
 	gb_tiling_config |= BANK_SWAPS(1);
 
-	backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes,
-							rdev->config.rv770.max_backends,
-							(0xff << rdev->config.rv770.max_backends) & 0xff);
+	if (rdev->family == CHIP_RV740)
+		backend_map = 0x28;
+	else
+		backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes,
+								rdev->config.rv770.max_backends,
+								(0xff << rdev->config.rv770.max_backends) & 0xff);
 	gb_tiling_config |= BACKEND_MAP(backend_map);
 
 	cc_gc_shader_pipe_config =

From f0e2f38befa787f0267419082b33e8ac72269d77 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sat, 20 Feb 2010 07:30:15 +1000
Subject: [PATCH 541/640] drm/ttm: fix caching problem on non-PAT systems.

http://bugzilla.kernel.org/show_bug.cgi?id=15328

This fixes a serious regression on AGP/non-PAT systems, where
pages were ending up in the wrong state and slowing down the
whole system.

[airlied: taken this from the bug as the other option is to revert
the change which caused it].

Tested-by: John W. Linville (in bug).
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_tt.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index e2123af7775a..3d47a2c12322 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -196,14 +196,15 @@ EXPORT_SYMBOL(ttm_tt_populate);
 
 #ifdef CONFIG_X86
 static inline int ttm_tt_set_page_caching(struct page *p,
-					  enum ttm_caching_state c_state)
+					  enum ttm_caching_state c_old,
+					  enum ttm_caching_state c_new)
 {
 	int ret = 0;
 
 	if (PageHighMem(p))
 		return 0;
 
-	if (get_page_memtype(p) != -1) {
+	if (c_old != tt_cached) {
 		/* p isn't in the default caching state, set it to
 		 * writeback first to free its current memtype. */
 
@@ -212,16 +213,17 @@ static inline int ttm_tt_set_page_caching(struct page *p,
 			return ret;
 	}
 
-	if (c_state == tt_wc)
+	if (c_new == tt_wc)
 		ret = set_memory_wc((unsigned long) page_address(p), 1);
-	else if (c_state == tt_uncached)
+	else if (c_new == tt_uncached)
 		ret = set_pages_uc(p, 1);
 
 	return ret;
 }
 #else /* CONFIG_X86 */
 static inline int ttm_tt_set_page_caching(struct page *p,
-					  enum ttm_caching_state c_state)
+					  enum ttm_caching_state c_old,
+					  enum ttm_caching_state c_new)
 {
 	return 0;
 }
@@ -254,7 +256,9 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
 	for (i = 0; i < ttm->num_pages; ++i) {
 		cur_page = ttm->pages[i];
 		if (likely(cur_page != NULL)) {
-			ret = ttm_tt_set_page_caching(cur_page, c_state);
+			ret = ttm_tt_set_page_caching(cur_page,
+						      ttm->caching_state,
+						      c_state);
 			if (unlikely(ret != 0))
 				goto out_err;
 		}
@@ -268,7 +272,7 @@ out_err:
 	for (j = 0; j < i; ++j) {
 		cur_page = ttm->pages[j];
 		if (likely(cur_page != NULL)) {
-			(void)ttm_tt_set_page_caching(cur_page,
+			(void)ttm_tt_set_page_caching(cur_page, c_state,
 						      ttm->caching_state);
 		}
 	}

From 7d404c7b5f4c004712bc15ed6e6edd6779842126 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Thu, 18 Feb 2010 13:13:29 +0000
Subject: [PATCH 542/640] drm/radeon/kms: free fence IB if it wasn't emited at
 IB free time

If at IB free time fence wasn't emited that means the IB wasn't
scheduled because an error occured somewhere, thus we can free
then fence and mark the IB as free.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_ring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 694799f6fac1..6579eb4c1f28 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -100,6 +100,8 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
 	if (tmp == NULL) {
 		return;
 	}
+	if (!tmp->fence->emited)
+		radeon_fence_unref(&tmp->fence);
 	mutex_lock(&rdev->ib_pool.mutex);
 	tmp->free = true;
 	mutex_unlock(&rdev->ib_pool.mutex);

From 635f1a31292087a2e99568bf4451c10ee287adaa Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 20 Feb 2010 09:17:18 +1000
Subject: [PATCH 543/640] drm/radeon: bump the UMS driver version number to
 indicate rv740 fix

This lets UMS userspace know the rv740 fix is in. For KMS we can
consider the kernel release to be the v2.0.0 release so we don't need the
bump there.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_drv.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index e13785282a82..c57ad606504d 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -106,9 +106,10 @@
  * 1.29- R500 3D cmd buffer support
  * 1.30- Add support for occlusion queries
  * 1.31- Add support for num Z pipes from GET_PARAM
+ * 1.32- fixes for rv740 setup
  */
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		31
+#define DRIVER_MINOR		32
 #define DRIVER_PATCHLEVEL	0
 
 enum radeon_cp_microcode_version {

From 1f474646fdc36b457606bbcd6a3592e6cbd31ac4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 19 Feb 2010 15:19:52 -0800
Subject: [PATCH 544/640] sparc64: Fix sun4u execute bit check in TSB I-TLB
 load.

Thanks to testcase and report from Brad Spengler:

--------------------
#include <stdio.h>

typedef int (* _wee)(void);

int main(void)
{
        char buf[8] = { '\x81', '\xc7', '\xe0', '\x08', '\x81', '\xe8',
                        '\x00', '\x00' };
        _wee wee;
        printf("%p\n", &buf);
        wee = (_wee)&buf;
        wee();

        return 0;
}
--------------------

TSB I-tlb load code tries to use andcc to check the _PAGE_EXEC_4U bit,
but that's bit 12 so it gets sign extended all the way up to bit 63
and the test nearly always passes as a result.

Use sethi to fix the bug.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/tsb.S | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index 8c91d9b29a2f..db15d123f054 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -191,10 +191,12 @@ tsb_dtlb_load:
 
 tsb_itlb_load:
 	/* Executable bit must be set.  */
-661:	andcc		%g5, _PAGE_EXEC_4U, %g0
-	.section	.sun4v_1insn_patch, "ax"
+661:	sethi		%hi(_PAGE_EXEC_4U), %g4
+	andcc		%g5, %g4, %g0
+	.section	.sun4v_2insn_patch, "ax"
 	.word		661b
 	andcc		%g5, _PAGE_EXEC_4V, %g0
+	nop
 	.previous
 
 	be,pn		%xcc, tsb_do_fault

From 88af182e389097997c5e2a0b42285b3522796759 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 19 Feb 2010 13:22:59 +0000
Subject: [PATCH 545/640] net: Fix sysctl restarts...

Yuck.  It turns out that when we restart sysctls we were restarting
with the values already changed.  Which unfortunately meant that
the second time through we thought there was no change and skipped
all kinds of work, despite the fact that there was indeed a change.

I have fixed this the simplest way possible by restoring the changed
values when we restart the sysctl write.

One of my coworkers spotted this bug when after disabling forwarding
on an interface pings were still forwarded.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c  |  7 ++++++-
 net/ipv6/addrconf.c | 16 ++++++++++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 040c4f05b653..26dec2be9615 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1317,14 +1317,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 {
 	int *valp = ctl->data;
 	int val = *valp;
+	loff_t pos = *ppos;
 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
 		struct net *net = ctl->extra2;
 
 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
-			if (!rtnl_trylock())
+			if (!rtnl_trylock()) {
+				/* Restore the original values before restarting */
+				*valp = val;
+				*ppos = pos;
 				return restart_syscall();
+			}
 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
 				inet_forward_change(net);
 			} else if (*valp) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index de7a194a64ab..143791da062c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -502,8 +502,11 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 	if (p == &net->ipv6.devconf_dflt->forwarding)
 		return 0;
 
-	if (!rtnl_trylock())
+	if (!rtnl_trylock()) {
+		/* Restore the original values before restarting */
+		*p = old;
 		return restart_syscall();
+	}
 
 	if (p == &net->ipv6.devconf_all->forwarding) {
 		__s32 newf = net->ipv6.devconf_all->forwarding;
@@ -4028,12 +4031,15 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
 {
 	int *valp = ctl->data;
 	int val = *valp;
+	loff_t pos = *ppos;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_fixup_forwarding(ctl, valp, val);
+	if (ret)
+		*ppos = pos;
 	return ret;
 }
 
@@ -4075,8 +4081,11 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
 	if (p == &net->ipv6.devconf_dflt->disable_ipv6)
 		return 0;
 
-	if (!rtnl_trylock())
+	if (!rtnl_trylock()) {
+		/* Restore the original values before restarting */
+		*p = old;
 		return restart_syscall();
+	}
 
 	if (p == &net->ipv6.devconf_all->disable_ipv6) {
 		__s32 newf = net->ipv6.devconf_all->disable_ipv6;
@@ -4095,12 +4104,15 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
 {
 	int *valp = ctl->data;
 	int val = *valp;
+	loff_t pos = *ppos;
 	int ret;
 
 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_disable_ipv6(ctl, valp, val);
+	if (ret)
+		*ppos = pos;
 	return ret;
 }
 

From b8afe6416101549e877f8470f2a160df69676166 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 19 Feb 2010 13:23:47 +0000
Subject: [PATCH 546/640] net-sysfs: Use rtnl_trylock in wireless sysfs
 methods.

The wireless sysfs methods like the rest of the networking sysfs
methods are removed with the rtnl_lock held and block until
the existing methods stop executing.  So use rtnl_trylock
and restart_syscall so that the code continues to work.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fbc1c7472c5e..099c753c4213 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -410,7 +410,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
 	const struct iw_statistics *iw;
 	ssize_t ret = -EINVAL;
 
-	rtnl_lock();
+	if (!rtnl_trylock())
+		return restart_syscall();
 	if (dev_isalive(dev)) {
 		iw = get_wireless_stats(dev);
 		if (iw)

From e0bf54c93a15c365a37cfc4fe0137f5bc012d1b9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 19 Feb 2010 13:29:27 +0000
Subject: [PATCH 547/640] sfc: Fix sign of efx_mcdi_poll_reboot() error in
 efx_mcdi_poll()

efx_mcdi_poll() uses positive error numbers, matching the MCDI
protocol.  It must negate the result of efx_mcdi_poll_reboot() which
returns the usual negative error numbers.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sfc/mcdi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c
index 9f035b9f0350..f66b3da6ddff 100644
--- a/drivers/net/sfc/mcdi.c
+++ b/drivers/net/sfc/mcdi.c
@@ -127,7 +127,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
 	efx_dword_t reg;
 
 	/* Check for a reboot atomically with respect to efx_mcdi_copyout() */
-	rc = efx_mcdi_poll_reboot(efx);
+	rc = -efx_mcdi_poll_reboot(efx);
 	if (rc)
 		goto out;
 

From 242cc0547f3bcecc0b02ca6f3e9512760185727e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 19 Feb 2010 13:34:03 +0000
Subject: [PATCH 548/640] sfc: SFE4002/SFN4112F: Widen temperature and voltage
 tolerances

The temperature and voltage limits currently set on these boards are
too conservative and will cause the driver to stop the net device
erroneously in some systems.

Based on a review of the chip datasheets and advice from the designer
of these boards:

- Raise the maximum board temperatures to the specified maximum ambient
  temperatures for their PHYs plus the expected temperature bias of the
  board
- Raise the maximum controller temperature to 90 degrees
- Lower the minimum temperatures to 0 degrees
- Widen the voltage tolerances to at least +/- 10%

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sfc/falcon_boards.c | 45 ++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/net/sfc/falcon_boards.c b/drivers/net/sfc/falcon_boards.c
index bf0b96af5334..5712fddd72f2 100644
--- a/drivers/net/sfc/falcon_boards.c
+++ b/drivers/net/sfc/falcon_boards.c
@@ -29,6 +29,15 @@
 #define FALCON_BOARD_SFN4111T 0x51
 #define FALCON_BOARD_SFN4112F 0x52
 
+/* Board temperature is about 15°C above ambient when air flow is
+ * limited. */
+#define FALCON_BOARD_TEMP_BIAS	15
+
+/* SFC4000 datasheet says: 'The maximum permitted junction temperature
+ * is 125°C; the thermal design of the environment for the SFC4000
+ * should aim to keep this well below 100°C.' */
+#define FALCON_JUNC_TEMP_MAX	90
+
 /*****************************************************************************
  * Support for LM87 sensor chip used on several boards
  */
@@ -548,16 +557,16 @@ fail_hwmon:
 static u8 sfe4002_lm87_channel = 0x03; /* use AIN not FAN inputs */
 
 static const u8 sfe4002_lm87_regs[] = {
-	LM87_IN_LIMITS(0, 0x83, 0x91),		/* 2.5V:  1.8V +/- 5% */
-	LM87_IN_LIMITS(1, 0x51, 0x5a),		/* Vccp1: 1.2V +/- 5% */
-	LM87_IN_LIMITS(2, 0xb6, 0xca),		/* 3.3V:  3.3V +/- 5% */
-	LM87_IN_LIMITS(3, 0xb0, 0xc9),		/* 5V:    4.6-5.2V */
-	LM87_IN_LIMITS(4, 0xb0, 0xe0),		/* 12V:   11-14V */
-	LM87_IN_LIMITS(5, 0x44, 0x4b),		/* Vccp2: 1.0V +/- 5% */
-	LM87_AIN_LIMITS(0, 0xa0, 0xb2),		/* AIN1:  1.66V +/- 5% */
-	LM87_AIN_LIMITS(1, 0x91, 0xa1),		/* AIN2:  1.5V +/- 5% */
-	LM87_TEMP_INT_LIMITS(10, 60),		/* board */
-	LM87_TEMP_EXT1_LIMITS(10, 70),		/* Falcon */
+	LM87_IN_LIMITS(0, 0x7c, 0x99),		/* 2.5V:  1.8V +/- 10% */
+	LM87_IN_LIMITS(1, 0x4c, 0x5e),		/* Vccp1: 1.2V +/- 10% */
+	LM87_IN_LIMITS(2, 0xac, 0xd4),		/* 3.3V:  3.3V +/- 10% */
+	LM87_IN_LIMITS(3, 0xac, 0xd4),		/* 5V:    5.0V +/- 10% */
+	LM87_IN_LIMITS(4, 0xac, 0xe0),		/* 12V:   10.8-14V */
+	LM87_IN_LIMITS(5, 0x3f, 0x4f),		/* Vccp2: 1.0V +/- 10% */
+	LM87_AIN_LIMITS(0, 0x98, 0xbb),		/* AIN1:  1.66V +/- 10% */
+	LM87_AIN_LIMITS(1, 0x8a, 0xa9),		/* AIN2:  1.5V +/- 10% */
+	LM87_TEMP_INT_LIMITS(0, 80 + FALCON_BOARD_TEMP_BIAS),
+	LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX),
 	0
 };
 
@@ -619,14 +628,14 @@ static int sfe4002_init(struct efx_nic *efx)
 static u8 sfn4112f_lm87_channel = 0x03; /* use AIN not FAN inputs */
 
 static const u8 sfn4112f_lm87_regs[] = {
-	LM87_IN_LIMITS(0, 0x83, 0x91),		/* 2.5V:  1.8V +/- 5% */
-	LM87_IN_LIMITS(1, 0x51, 0x5a),		/* Vccp1: 1.2V +/- 5% */
-	LM87_IN_LIMITS(2, 0xb6, 0xca),		/* 3.3V:  3.3V +/- 5% */
-	LM87_IN_LIMITS(4, 0xb0, 0xe0),		/* 12V:   11-14V */
-	LM87_IN_LIMITS(5, 0x44, 0x4b),		/* Vccp2: 1.0V +/- 5% */
-	LM87_AIN_LIMITS(1, 0x91, 0xa1),		/* AIN2:  1.5V +/- 5% */
-	LM87_TEMP_INT_LIMITS(10, 60),		/* board */
-	LM87_TEMP_EXT1_LIMITS(10, 70),		/* Falcon */
+	LM87_IN_LIMITS(0, 0x7c, 0x99),		/* 2.5V:  1.8V +/- 10% */
+	LM87_IN_LIMITS(1, 0x4c, 0x5e),		/* Vccp1: 1.2V +/- 10% */
+	LM87_IN_LIMITS(2, 0xac, 0xd4),		/* 3.3V:  3.3V +/- 10% */
+	LM87_IN_LIMITS(4, 0xac, 0xe0),		/* 12V:   10.8-14V */
+	LM87_IN_LIMITS(5, 0x3f, 0x4f),		/* Vccp2: 1.0V +/- 10% */
+	LM87_AIN_LIMITS(1, 0x8a, 0xa9),		/* AIN2:  1.5V +/- 10% */
+	LM87_TEMP_INT_LIMITS(0, 60 + FALCON_BOARD_TEMP_BIAS),
+	LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX),
 	0
 };
 

From aeaa5ccd6421fbf9e7ded0ac67b12ea2b9fcf51e Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert@redhat.com>
Date: Mon, 15 Feb 2010 18:07:39 -0500
Subject: [PATCH 549/640] vfs: don't call ima_file_check() unconditionally in
 nfsd_open()

commit 1e41568d7378d1ba8c64ba137b9ddd00b59f893a ("Take ima_path_check()
in nfsd past dentry_open() in nfsd_open()") moved this code back to its
original location but missed the "else".

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/vfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 97d79eff6b7f..8715d194561a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,7 +752,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_file_check(*filp, access);
+	else
+		host_err = ima_file_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:

From d9c4f846997c6d37e4f56907d93f1be022c17c6b Mon Sep 17 00:00:00 2001
From: Samu Onkalo <samu.p.onkalo@nokia.com>
Date: Fri, 19 Feb 2010 23:17:58 -0800
Subject: [PATCH 550/640] Input: polldev can cause crash in case when polling
 disabled

When polled input device is opened and closed and there are no other
users of polled device, the workqueue is created and destroyed in
every open / close operation. It is probable that at some point
dynamic allocation of internal parts of the workqueue cause changes to the
workqueue.

When a work is queued to the workqueue the work struct contains pointers
to the workqueue data. If the workqueue has been changed and the work
has never been queued to the new workqueue, work-struct contains pointers
to the non-existing workqueue. This will cause crash at the work
cancellation during device close since cancellation of a work assumes
that the workqueue exists.

To prevent that, work struct is cleaned up at device close. This keeps
work struct clean for the next use.

Signed-off-by: Samu Onkalo <samu.p.onkalo@nokia.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input-polldev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index aa6713b4a988..291d9393d359 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -100,6 +100,12 @@ static void input_close_polled_device(struct input_dev *input)
 	struct input_polled_dev *dev = input_get_drvdata(input);
 
 	cancel_delayed_work_sync(&dev->work);
+	/*
+	 * Clean up work struct to remove references to the workqueue.
+	 * It may be destroyed by the next call. This causes problems
+	 * at next device open-close in case of poll_interval == 0.
+	 */
+	INIT_DELAYED_WORK(&dev->work, dev->work.work.func);
 	input_polldev_stop_workqueue();
 
 	if (dev->close)

From 1c8e170aaa7ba62c0160e96a52e25ad004419109 Mon Sep 17 00:00:00 2001
From: Abdoulaye Walsimou Gaye <walsimou@walsimou.com>
Date: Fri, 19 Feb 2010 12:47:14 +0100
Subject: [PATCH 551/640] ARM: 5950/1: ARM: Fix build error for arm1026ej-s
 processor

This patch fix the below build error for arm1026ej-s processor (IntegratorCP/arm1026ej-s board).
  CC      init/main.o
In file included from include/linux/highmem.h:8,
                 from include/linux/pagemap.h:10,
                 from include/linux/mempolicy.h:62,
                 from init/main.c:52:
arch/arm/include/asm/cacheflush.h:134:2: error: #error Unknown cache maintainence model
make[1]: *** [init/main.o] Erreur 1
make: *** [init] Erreur 2

Signed-off-by: Abdoulaye Walsimou Gaye <walsimou@walsimou.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index c77d2fa1f6e5..8113bb5fb66e 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -42,7 +42,8 @@
 #endif
 
 #if defined(CONFIG_CPU_ARM920T) || defined(CONFIG_CPU_ARM922T) || \
-    defined(CONFIG_CPU_ARM925T) || defined(CONFIG_CPU_ARM1020)
+    defined(CONFIG_CPU_ARM925T) || defined(CONFIG_CPU_ARM1020) || \
+    defined(CONFIG_CPU_ARM1026)
 # define MULTI_CACHE 1
 #endif
 

From 4e10ae11317b238609fc3ec9d50a5dee9473e045 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sat, 20 Feb 2010 09:41:30 +0100
Subject: [PATCH 552/640] ARM: 5951/1: ARM: fix documentation of the PrimeCell
 bus

This fixes the filepath encoded in <linux/amba/bus.h> and adds
some documentation as to what this bus really means.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index ab94335b4bb9..6816be6c3f77 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -1,5 +1,9 @@
 /*
- *  linux/include/asm-arm/hardware/amba.h
+ *  linux/include/amba/bus.h
+ *
+ *  This device type deals with ARM PrimeCells and anything else that
+ *  presents a proper CID (0xB105F00D) at the end of the I/O register
+ *  region or that is derived from a PrimeCell.
  *
  *  Copyright (C) 2003 Deep Blue Solutions Ltd, All Rights Reserved.
  *

From 0fa11802e0dcbd4e211a9310500bf52d701b9c1b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 20 Feb 2010 14:16:16 +0000
Subject: [PATCH 553/640] ARM: Update mach-types

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/tools/mach-types | 46 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 5a79fc6ee818..31c2f4c30a95 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
 #
 #   http://www.arm.linux.org.uk/developer/machines/?action=new
 #
-# Last update: Thu Jan 28 22:15:54 2010
+# Last update: Sat Feb 20 14:16:15 2010
 #
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 #
@@ -2257,7 +2257,7 @@ oratisalog		MACH_ORATISALOG		ORATISALOG		2268
 oratismadi		MACH_ORATISMADI		ORATISMADI		2269
 oratisot16		MACH_ORATISOT16		ORATISOT16		2270
 oratisdesk		MACH_ORATISDESK		ORATISDESK		2271
-v2_ca9			MACH_V2P_CA9		V2P_CA9			2272
+vexpress		MACH_VEXPRESS		VEXPRESS		2272
 sintexo			MACH_SINTEXO		SINTEXO			2273
 cm3389			MACH_CM3389		CM3389			2274
 omap3_cio		MACH_OMAP3_CIO		OMAP3_CIO		2275
@@ -2636,3 +2636,45 @@ hw90240			MACH_HW90240		HW90240			2648
 dm365_leopard		MACH_DM365_LEOPARD	DM365_LEOPARD		2649
 mityomapl138		MACH_MITYOMAPL138	MITYOMAPL138		2650
 scat110			MACH_SCAT110		SCAT110			2651
+acer_a1			MACH_ACER_A1		ACER_A1			2652
+cmcontrol		MACH_CMCONTROL		CMCONTROL		2653
+pelco_lamar		MACH_PELCO_LAMAR	PELCO_LAMAR		2654
+rfp43			MACH_RFP43		RFP43			2655
+sk86r0301		MACH_SK86R0301		SK86R0301		2656
+ctpxa			MACH_CTPXA		CTPXA			2657
+epb_arm9_a		MACH_EPB_ARM9_A		EPB_ARM9_A		2658
+guruplug		MACH_GURUPLUG		GURUPLUG		2659
+spear310		MACH_SPEAR310		SPEAR310		2660
+spear320		MACH_SPEAR320		SPEAR320		2661
+robotx			MACH_ROBOTX		ROBOTX			2662
+lsxhl			MACH_LSXHL		LSXHL			2663
+smartlite		MACH_SMARTLITE		SMARTLITE		2664
+cws2			MACH_CWS2		CWS2			2665
+m619			MACH_M619		M619			2666
+smartview		MACH_SMARTVIEW		SMARTVIEW		2667
+lsa_salsa		MACH_LSA_SALSA		LSA_SALSA		2668
+kizbox			MACH_KIZBOX		KIZBOX			2669
+htccharmer		MACH_HTCCHARMER		HTCCHARMER		2670
+guf_neso_lt		MACH_GUF_NESO_LT	GUF_NESO_LT		2671
+pm9g45			MACH_PM9G45		PM9G45			2672
+htcpanther		MACH_HTCPANTHER		HTCPANTHER		2673
+htcpanther_cdma		MACH_HTCPANTHER_CDMA	HTCPANTHER_CDMA		2674
+reb01			MACH_REB01		REB01			2675
+aquila			MACH_AQUILA		AQUILA			2676
+spark_sls_hw2		MACH_SPARK_SLS_HW2	SPARK_SLS_HW2		2677
+sheeva_esata		MACH_ESATA_SHEEVAPLUG	ESATA_SHEEVAPLUG	2678
+surf7x30		MACH_SURF7X30		SURF7X30		2679
+micro2440		MACH_MICRO2440		MICRO2440		2680
+am2440			MACH_AM2440		AM2440			2681
+tq2440			MACH_TQ2440		TQ2440			2682
+lpc2478oem		MACH_LPC2478OEM		LPC2478OEM		2683
+ak880x			MACH_AK880X		AK880X			2684
+cobra3530		MACH_COBRA3530		COBRA3530		2685
+pmppb			MACH_PMPPB		PMPPB			2686
+u6715			MACH_U6715		U6715			2687
+axar1500_sender		MACH_AXAR1500_SENDER	AXAR1500_SENDER		2688
+g30_dvb			MACH_G30_DVB		G30_DVB			2689
+vc088x			MACH_VC088X		VC088X			2690
+mioa702			MACH_MIOA702		MIOA702			2691
+hpmin			MACH_HPMIN		HPMIN			2692
+ak880xak		MACH_AK880XAK		AK880XAK		2693

From 8f9941aeccc318f243ab3fa55aaa17f4c1cb33f9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 19 Feb 2010 18:14:21 +0000
Subject: [PATCH 554/640] CacheFiles: Fix a race in cachefiles_delete_object()
 vs rename

cachefiles_delete_object() can race with rename.  It gets the parent directory
of the object it's asked to delete, then locks it - but rename may have changed
the object's parent between the get and the completion of the lock.

However, if such a circumstance is detected, we abandon our attempt to delete
the object - since it's no longer in the index key path, it won't be seen
again by lookups of that key.  The assumption is that cachefilesd may have
culled it by renaming it to the graveyard for later destruction.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cachefiles/namei.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 14ac4806e291..eeb4986ea7db 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -348,7 +348,17 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
 	dir = dget_parent(object->dentry);
 
 	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-	ret = cachefiles_bury_object(cache, dir, object->dentry);
+
+	/* we need to check that our parent is _still_ our parent - it may have
+	 * been renamed */
+	if (dir == object->dentry->d_parent) {
+		ret = cachefiles_bury_object(cache, dir, object->dentry);
+	} else {
+		/* it got moved, presumably by cachefilesd culling it, so it's
+		 * no longer in the key path and we can ignore it */
+		mutex_unlock(&dir->d_inode->i_mutex);
+		ret = 0;
+	}
 
 	dput(dir);
 	_leave(" = %d", ret);

From d944d549aa86e08cba080396513234cf048fee1f Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 20 Feb 2010 16:13:29 +0000
Subject: [PATCH 555/640] ARM: allow alignment fault mode to be configured at
 kernel boot

Some glibc versions intentionally create lots of alignment faults in
their gconv code, which if not fixed up, results in segfaults during
boot.  This can prevent systems booting properly.

There is no clear hard-configurable default for this; the desired
default depends on the nature of the userspace which is going to be
booted.

So, provide a way for the alignment fault handler to be configured via
the kernel command line.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/kernel-parameters.txt | 5 +++++
 arch/arm/mm/alignment.c             | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 826b6e148316..e7848a0d99eb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -315,6 +315,11 @@ and is between 256 and 4096 characters. It is defined in the file
 	aic79xx=	[HW,SCSI]
 			See Documentation/scsi/aic79xx.txt.
 
+	alignment=	[KNL,ARM]
+			Allow the default userspace alignment fault handler
+			behaviour to be specified.  Bit 0 enables warnings,
+			bit 1 enables fixups, and bit 2 sends a segfault.
+
 	amd_iommu=	[HW,X86-84]
 			Pass parameters to the AMD IOMMU driver in the system.
 			Possible values are:
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index b270d6228fe2..62820eda84d9 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -11,6 +11,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/moduleparam.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -77,6 +78,8 @@ static unsigned long ai_dword;
 static unsigned long ai_multi;
 static int ai_usermode;
 
+core_param(alignment, ai_usermode, int, 0600);
+
 #define UM_WARN		(1 << 0)
 #define UM_FIXUP	(1 << 1)
 #define UM_SIGNAL	(1 << 2)

From 10fe12ef631a7e85022ed26304a37f033a6a95b8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 20 Feb 2010 19:53:13 -0200
Subject: [PATCH 556/640] perf symbols: Fix up map end too on modular kernels
 with no modules installed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In 2161db9 we stopped failing when not finding modules when
asked too, but then the kernel maps (just one, for vmlinux)
wasn't having its ->end field correctly set up, so symbols were
not being found for the vmlinux map because its range was 0-0.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1266702793-29434-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/event.c  | 6 ++++++
 tools/perf/util/symbol.c | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index c3831f633dec..9eb7005bc6d6 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -379,6 +379,12 @@ int event__process_mmap(event_t *self, struct perf_session *session)
 
 			session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start;
 			session->vmlinux_maps[MAP__FUNCTION]->end   = self->mmap.start + self->mmap.len;
+			/*
+			 * Be a bit paranoid here, some perf.data file came with
+			 * a zero sized synthesized MMAP event for the kernel.
+			 */
+			if (session->vmlinux_maps[MAP__FUNCTION]->end == 0)
+				session->vmlinux_maps[MAP__FUNCTION]->end = ~0UL;
 
 			perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name,
 								 self->mmap.pgoff);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 6882e9fec2d6..ee9c37efdd36 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1937,7 +1937,7 @@ int map_groups__create_kernel_maps(struct map_groups *self,
 		return -1;
 
 	if (symbol_conf.use_modules && map_groups__create_modules(self) < 0)
-		return 0;
+		pr_debug("Problems creating module maps, continuing anyway...\n");
 	/*
 	 * Now that we have all the maps created, just set the ->end of them:
 	 */

From faa5c5c36ec50bf43e39c7798ce9701e6b002db3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Feb 2010 23:02:07 -0200
Subject: [PATCH 557/640] perf tools: Don't use parent comm if not set at fork
 time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the parent comm then is worthless, confusing users about the
thread where the sample really happened, leading to think that
the sample happened in the parent, not where it really happened,
in the children of a thread for which a PERF_RECORD_COMM event
was not received.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1266627727-19715-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/thread.c | 18 ++++++++++++------
 tools/perf/util/thread.h |  1 +
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 634b7f7140d5..9e8995eaf2b6 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -36,7 +36,10 @@ int thread__set_comm(struct thread *self, const char *comm)
 	if (self->comm)
 		free(self->comm);
 	self->comm = strdup(comm);
-	return self->comm ? 0 : -ENOMEM;
+	if (self->comm == NULL)
+		return -ENOMEM;
+	self->comm_set = true;
+	return 0;
 }
 
 int thread__comm_len(struct thread *self)
@@ -255,11 +258,14 @@ int thread__fork(struct thread *self, struct thread *parent)
 {
 	int i;
 
-	if (self->comm)
-		free(self->comm);
-	self->comm = strdup(parent->comm);
-	if (!self->comm)
-		return -ENOMEM;
+	if (parent->comm_set) {
+		if (self->comm)
+			free(self->comm);
+		self->comm = strdup(parent->comm);
+		if (!self->comm)
+			return -ENOMEM;
+		self->comm_set = true;
+	}
 
 	for (i = 0; i < MAP__NR_TYPES; ++i)
 		if (map_groups__clone(&self->mg, &parent->mg, i) < 0)
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 56f317b8a06c..0a28f39de545 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -15,6 +15,7 @@ struct thread {
 	struct map_groups	mg;
 	pid_t			pid;
 	char			shortname[3];
+	bool			comm_set;
 	char			*comm;
 	int			comm_len;
 };

From 87b8d1adefa1548b591cbf0d63965987e2cf893d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 18 Feb 2010 16:13:40 -0800
Subject: [PATCH 558/640] mm: Make copy_from_user() in migrate.c statically
 predictable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

x86-32 has had a static test for copy_on_user() overflow for a while.
This test currently fails in mm/migrate.c resulting in an
allyesconfig/allmodconfig build failure on x86-32:

In function ‘copy_from_user’,
    inlined from ‘do_pages_stat’ at
    /home/hpa/kernel/git/mm/migrate.c:1012:
/home/hpa/kernel/git/arch/x86/include/asm/uaccess_32.h:212: error:
    call to ‘copy_from_user_overflow’ declared

Make the logic more explicit and therefore easier for gcc to
understand.

v2: rewrite the loop entirely using a more normal structure for a
    chunked-data loop (Linus Torvalds)

Reported-by: Len Brown <lenb@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-and-Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Arjan van de Ven <arjan@linux.kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 9a0db5bbabe4..880bd592d38e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1002,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
 #define DO_PAGES_STAT_CHUNK_NR 16
 	const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
 	int chunk_status[DO_PAGES_STAT_CHUNK_NR];
-	unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
-	int err;
 
-	for (i = 0; i < nr_pages; i += chunk_nr) {
-		if (chunk_nr > nr_pages - i)
-			chunk_nr = nr_pages - i;
+	while (nr_pages) {
+		unsigned long chunk_nr;
 
-		err = copy_from_user(chunk_pages, &pages[i],
-				     chunk_nr * sizeof(*chunk_pages));
-		if (err) {
-			err = -EFAULT;
-			goto out;
-		}
+		chunk_nr = nr_pages;
+		if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
+			chunk_nr = DO_PAGES_STAT_CHUNK_NR;
+
+		if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
+			break;
 
 		do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
 
-		err = copy_to_user(&status[i], chunk_status,
-				   chunk_nr * sizeof(*chunk_status));
-		if (err) {
-			err = -EFAULT;
-			goto out;
-		}
-	}
-	err = 0;
+		if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
+			break;
 
-out:
-	return err;
+		pages += chunk_nr;
+		status += chunk_nr;
+		nr_pages -= chunk_nr;
+	}
+	return nr_pages ? -EFAULT : 0;
 }
 
 /*

From 3dae93ec3ee1fceec69f40ef9b97892ce62ba7a5 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 15 Feb 2010 19:32:25 -0500
Subject: [PATCH 559/640] [WATCHDOG] bfin: fix max timeout calculation

Relying on overflow/wrap around isn't exact because if you wrap far
enough, you get back to "valid" values.

Reported-by: Thorsten Pohlmann <pohlmann@tetronik.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Cc: stable <stable@kernel.org>
---
 drivers/watchdog/bfin_wdt.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/watchdog/bfin_wdt.c b/drivers/watchdog/bfin_wdt.c
index c7b3f9df2317..2159e668751c 100644
--- a/drivers/watchdog/bfin_wdt.c
+++ b/drivers/watchdog/bfin_wdt.c
@@ -1,9 +1,8 @@
 /*
  * Blackfin On-Chip Watchdog Driver
- *  Supports BF53[123]/BF53[467]/BF54[2489]/BF561
  *
  * Originally based on softdog.c
- * Copyright 2006-2007 Analog Devices Inc.
+ * Copyright 2006-2010 Analog Devices Inc.
  * Copyright 2006-2007 Michele d'Amico
  * Copyright 1996 Alan Cox <alan@lxorguk.ukuu.org.uk>
  *
@@ -137,13 +136,15 @@ static int bfin_wdt_running(void)
  */
 static int bfin_wdt_set_timeout(unsigned long t)
 {
-	u32 cnt;
+	u32 cnt, max_t, sclk;
 	unsigned long flags;
 
-	stampit();
+	sclk = get_sclk();
+	max_t = -1 / sclk;
+	cnt = t * sclk;
+	stamp("maxtimeout=%us newtimeout=%lus (cnt=%#x)", max_t, t, cnt);
 
-	cnt = t * get_sclk();
-	if (cnt < get_sclk()) {
+	if (t > max_t) {
 		printk(KERN_WARNING PFX "timeout value is too large\n");
 		return -EINVAL;
 	}

From 2531be413b3f2f64c0282073de89fe52bbcbbab5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 21 Feb 2010 18:03:16 -0800
Subject: [PATCH 560/640] sparc32: Fix struct stat uid/gid types.

Commit 085219f79cad89291699bd2bfb21c9fdabafe65f
("sparc32: use proper types in struct stat")

Accidently changed the struct stat uid/gid members
to uid_t and gid_t, but those get set to
__kernel_uid32_t and __kernel_gid32_t respectively.
Those are of type 'int' but the structure is meant
to have 'short'.  So use uid16_t and gid16_t to
correct this.

Reported-by: Rob Landley <rob@landley.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/stat.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/include/asm/stat.h b/arch/sparc/include/asm/stat.h
index 55db5eca08e2..39327d6a57eb 100644
--- a/arch/sparc/include/asm/stat.h
+++ b/arch/sparc/include/asm/stat.h
@@ -53,8 +53,8 @@ struct stat {
 	ino_t		st_ino;
 	mode_t		st_mode;
 	short		st_nlink;
-	uid_t		st_uid;
-	gid_t		st_gid;
+	uid16_t		st_uid;
+	gid16_t		st_gid;
 	unsigned short	st_rdev;
 	off_t		st_size;
 	time_t		st_atime;

From eb083ba260f21ad79e83e1ad05a0d27e93b58c83 Mon Sep 17 00:00:00 2001
From: Roy Yin <yhch@generaltouch.com>
Date: Sun, 21 Feb 2010 22:52:49 -0800
Subject: [PATCH 561/640] Input: usbtouchscreen - extend coordinate range for
 Generaltouch devices

Generaltouch protocol allows for coordinates in [0, 0xffff] range and
there are devices reporting coordinates as high as 0x7fff so let's update
the driver to reflect that.

Signed-off-by: Roy Yin <yhch@generaltouch.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/usbtouchscreen.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 09a5e7341bd5..5256123a5228 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -618,8 +618,8 @@ static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
 #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
 static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
 {
-	dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1] ;
-	dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3] ;
+	dev->x = (pkt[2] << 8) | pkt[1];
+	dev->y = (pkt[4] << 8) | pkt[3];
 	dev->press = pkt[5] & 0xff;
 	dev->touch = pkt[0] & 0x01;
 
@@ -809,9 +809,9 @@ static struct usbtouch_device_info usbtouch_dev_info[] = {
 #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH
 	[DEVTYPE_GENERAL_TOUCH] = {
 		.min_xc		= 0x0,
-		.max_xc		= 0x0500,
+		.max_xc		= 0x7fff,
 		.min_yc		= 0x0,
-		.max_yc		= 0x0500,
+		.max_yc		= 0x7fff,
 		.rept_size	= 7,
 		.read_data	= general_touch_read_data,
 	},

From a239a8b47cc0e5e6d7416a89f340beac06d5edaa Mon Sep 17 00:00:00 2001
From: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Date: Fri, 19 Feb 2010 15:47:32 -0800
Subject: [PATCH 562/640] iwlwifi: error checking for number of tfds in queue

When receive reply_tx and ready to decrement the count for number of
tfds in queue, do error checking to prevent error condition and
tfds_in_queue become negative number.

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
CC: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-4965.c |  2 +-
 drivers/net/wireless/iwlwifi/iwl-5000.c |  4 ++--
 drivers/net/wireless/iwlwifi/iwl-core.h |  2 ++
 drivers/net/wireless/iwlwifi/iwl-tx.c   | 16 +++++++++++++++-
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index 9b4b8b5c7574..31462813bac0 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -2008,7 +2008,7 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv,
 			IWL_DEBUG_TX_REPLY(priv, "Retry scheduler reclaim scd_ssn "
 					   "%d index %d\n", scd_ssn , index);
 			freed = iwl_tx_queue_reclaim(priv, txq_id, index);
-			priv->stations[sta_id].tid[tid].tfds_in_queue -= freed;
+			iwl_free_tfds_in_queue(priv, sta_id, tid, freed);
 
 			if (priv->mac80211_registered &&
 			    (iwl_queue_space(&txq->q) > txq->q.low_mark) &&
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index de45f308b744..f27c514886a5 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -1125,7 +1125,7 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv,
 					scd_ssn , index, txq_id, txq->swq_id);
 
 			freed = iwl_tx_queue_reclaim(priv, txq_id, index);
-			priv->stations[sta_id].tid[tid].tfds_in_queue -= freed;
+			iwl_free_tfds_in_queue(priv, sta_id, tid, freed);
 
 			if (priv->mac80211_registered &&
 			    (iwl_queue_space(&txq->q) > txq->q.low_mark) &&
@@ -1154,7 +1154,7 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv,
 
 		freed = iwl_tx_queue_reclaim(priv, txq_id, index);
 		if (ieee80211_is_data_qos(tx_resp->frame_ctrl))
-			priv->stations[sta_id].tid[tid].tfds_in_queue -= freed;
+			iwl_free_tfds_in_queue(priv, sta_id, tid, freed);
 
 		if (priv->mac80211_registered &&
 		    (iwl_queue_space(&txq->q) > txq->q.low_mark))
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index 675b7df632fc..fe37d6a6bf97 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -446,6 +446,8 @@ void iwl_hw_txq_ctx_free(struct iwl_priv *priv);
 int iwl_hw_tx_queue_init(struct iwl_priv *priv,
 			 struct iwl_tx_queue *txq);
 int iwl_txq_update_write_ptr(struct iwl_priv *priv, struct iwl_tx_queue *txq);
+void iwl_free_tfds_in_queue(struct iwl_priv *priv,
+			    int sta_id, int tid, int freed);
 int iwl_tx_queue_init(struct iwl_priv *priv, struct iwl_tx_queue *txq,
 		      int slots_num, u32 txq_id);
 void iwl_tx_queue_free(struct iwl_priv *priv, int txq_id);
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 87ce2bd292c7..72136c8f51da 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -120,6 +120,20 @@ int iwl_txq_update_write_ptr(struct iwl_priv *priv, struct iwl_tx_queue *txq)
 EXPORT_SYMBOL(iwl_txq_update_write_ptr);
 
 
+void iwl_free_tfds_in_queue(struct iwl_priv *priv,
+			    int sta_id, int tid, int freed)
+{
+	if (priv->stations[sta_id].tid[tid].tfds_in_queue >= freed)
+		priv->stations[sta_id].tid[tid].tfds_in_queue -= freed;
+	else {
+		IWL_ERR(priv, "free more than tfds_in_queue (%u:%d)\n",
+			priv->stations[sta_id].tid[tid].tfds_in_queue,
+			freed);
+		priv->stations[sta_id].tid[tid].tfds_in_queue = 0;
+	}
+}
+EXPORT_SYMBOL(iwl_free_tfds_in_queue);
+
 /**
  * iwl_tx_queue_free - Deallocate DMA queue.
  * @txq: Transmit queue to deallocate.
@@ -1559,7 +1573,7 @@ void iwl_rx_reply_compressed_ba(struct iwl_priv *priv,
 	if (txq->q.read_ptr != (ba_resp_scd_ssn & 0xff)) {
 		/* calculate mac80211 ampdu sw queue to wake */
 		int freed = iwl_tx_queue_reclaim(priv, scd_flow, index);
-		priv->stations[sta_id].tid[tid].tfds_in_queue -= freed;
+		iwl_free_tfds_in_queue(priv, sta_id, tid, freed);
 
 		if ((iwl_queue_space(&txq->q) > txq->q.low_mark) &&
 		    priv->mac80211_registered &&

From a120e912eb51e347f36c71b60a1d13af74d30e83 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Fri, 19 Feb 2010 15:47:33 -0800
Subject: [PATCH 563/640] iwlwifi: sanity check before counting number of tfds
 can be free

Check the frame control for ieee80211_is_data_qos() is true before
counting the number of tfds can be free, the tfds_in_queue only
increment when ieee80211_is_data_qos() is true before transmit; so it
should only decrement if the type match.

Remove ieee80211_is_data_qos check for frame_ctrl in tx_resp to avoid
invalid information pass from uCode.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
CC: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-5000.c | 6 ++----
 drivers/net/wireless/iwlwifi/iwl-tx.c   | 6 +++++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index f27c514886a5..cffaae772d51 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -1153,16 +1153,14 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv,
 				   tx_resp->failure_frame);
 
 		freed = iwl_tx_queue_reclaim(priv, txq_id, index);
-		if (ieee80211_is_data_qos(tx_resp->frame_ctrl))
-			iwl_free_tfds_in_queue(priv, sta_id, tid, freed);
+		iwl_free_tfds_in_queue(priv, sta_id, tid, freed);
 
 		if (priv->mac80211_registered &&
 		    (iwl_queue_space(&txq->q) > txq->q.low_mark))
 			iwl_wake_queue(priv, txq_id);
 	}
 
-	if (ieee80211_is_data_qos(tx_resp->frame_ctrl))
-		iwl_txq_check_empty(priv, sta_id, tid, txq_id);
+	iwl_txq_check_empty(priv, sta_id, tid, txq_id);
 
 	if (iwl_check_bits(status, TX_ABORT_REQUIRED_MSK))
 		IWL_ERR(priv, "TODO:  Implement Tx ABORT REQUIRED!!!\n");
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 72136c8f51da..8f4071562857 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -1145,6 +1145,7 @@ int iwl_tx_queue_reclaim(struct iwl_priv *priv, int txq_id, int index)
 	struct iwl_queue *q = &txq->q;
 	struct iwl_tx_info *tx_info;
 	int nfreed = 0;
+	struct ieee80211_hdr *hdr;
 
 	if ((index >= q->n_bd) || (iwl_queue_used(q, index) == 0)) {
 		IWL_ERR(priv, "Read index for DMA queue txq id (%d), index %d, "
@@ -1159,13 +1160,16 @@ int iwl_tx_queue_reclaim(struct iwl_priv *priv, int txq_id, int index)
 
 		tx_info = &txq->txb[txq->q.read_ptr];
 		iwl_tx_status(priv, tx_info->skb[0]);
+
+		hdr = (struct ieee80211_hdr *)tx_info->skb[0]->data;
+		if (hdr && ieee80211_is_data_qos(hdr->frame_control))
+			nfreed++;
 		tx_info->skb[0] = NULL;
 
 		if (priv->cfg->ops->lib->txq_inval_byte_cnt_tbl)
 			priv->cfg->ops->lib->txq_inval_byte_cnt_tbl(priv, txq);
 
 		priv->cfg->ops->lib->txq_free_tfd(priv, txq);
-		nfreed++;
 	}
 	return nfreed;
 }

From 61caf87cb5c2a198966018343a6ce4c5ab6cf8df Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 18 Feb 2010 23:06:27 +0100
Subject: [PATCH 564/640] i915 / PM: Fix hibernate regression caused by
 suspend/resume splitting

Commit 84b79f8d2882b0a84330c04839ed4d3cefd2ff77 (drm/i915: Fix crash
while aborting hibernation) attempted to fix a regression introduced
by commit cbda12d77ea590082edb6d30bd342a67ebc459e0 (drm/i915:
implement new pm ops for i915), but it went too far trying to split
the freeze/suspend and resume/thaw parts of the code.  As a result,
it introduced another regression, which only is visible on some systems.

Fix the problem by merging i915_drm_suspend() with
i915_drm_freeze() and moving some code from i915_resume()
into i915_drm_thaw(), so that intel_opregion_free() and
intel_opregion_init() are also executed in the freeze and thaw code
paths, respectively.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-and-tested-by: Pedro Ribeiro <pedrib@gmail.com>
Tested-by: Tino Keitel <tino.keitel@tikei.de>
Acked-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_drv.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 79beffcf5936..cf4cb3e9a0c2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -176,6 +176,8 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
 
 static int i915_drm_freeze(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	pci_save_state(dev->pdev);
 
 	/* If KMS is active, we do the leavevt stuff here */
@@ -191,17 +193,12 @@ static int i915_drm_freeze(struct drm_device *dev)
 
 	i915_save_state(dev);
 
-	return 0;
-}
-
-static void i915_drm_suspend(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
 	intel_opregion_free(dev, 1);
 
 	/* Modeset on resume, not lid events */
 	dev_priv->modeset_on_lid = 0;
+
+	return 0;
 }
 
 static int i915_suspend(struct drm_device *dev, pm_message_t state)
@@ -221,8 +218,6 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state)
 	if (error)
 		return error;
 
-	i915_drm_suspend(dev);
-
 	if (state.event == PM_EVENT_SUSPEND) {
 		/* Shut down the device */
 		pci_disable_device(dev->pdev);
@@ -237,6 +232,10 @@ static int i915_drm_thaw(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int error = 0;
 
+	i915_restore_state(dev);
+
+	intel_opregion_init(dev, 1);
+
 	/* KMS EnterVT equivalent */
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		mutex_lock(&dev->struct_mutex);
@@ -263,10 +262,6 @@ static int i915_resume(struct drm_device *dev)
 
 	pci_set_master(dev->pdev);
 
-	i915_restore_state(dev);
-
-	intel_opregion_init(dev, 1);
-
 	return i915_drm_thaw(dev);
 }
 
@@ -423,8 +418,6 @@ static int i915_pm_suspend(struct device *dev)
 	if (error)
 		return error;
 
-	i915_drm_suspend(drm_dev);
-
 	pci_disable_device(pdev);
 	pci_set_power_state(pdev, PCI_D3hot);
 
@@ -464,13 +457,8 @@ static int i915_pm_poweroff(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	int error;
 
-	error = i915_drm_freeze(drm_dev);
-	if (!error)
-		i915_drm_suspend(drm_dev);
-
-	return error;
+	return i915_drm_freeze(drm_dev);
 }
 
 const struct dev_pm_ops i915_pm_ops = {

From d306ebc28649b89877a22158fe0076f06cc46f60 Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Wed, 10 Feb 2010 10:35:31 -0800
Subject: [PATCH 565/640] ACPI: Be in TS_POLLING state during mwait based
 C-state entry

ACPI deep C-state entry had a long standing bug/missing feature, wherein we were sending
resched IPIs when an idle CPU is in mwait based deep C-state. Only mwait based C1 was using
the write to the monitored address to wake up mwait'ing CPU.

This patch changes the code to retain TS_POLLING bit if we are entering an mwait based
deep C-state.

The patch has been verified to reduce the number of resched IPIs in general and also
improves the performance/power on workloads with low system utilization (i.e., when mwait based
deep C-states are being used).

Fixes "netperf ~50% regression with 2.6.33-rc1, bisect to 1b9508f"
http://marc.info/?l=linux-kernel&m=126441481427331&w=4

Reported-by: Lin Ming <ming.m.lin@intel.com>
Tested-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_idle.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index e88e8ae04fdb..cc978a8c00b7 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -880,12 +880,14 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
 		return(acpi_idle_enter_c1(dev, state));
 
 	local_irq_disable();
-	current_thread_info()->status &= ~TS_POLLING;
-	/*
-	 * TS_POLLING-cleared state must be visible before we test
-	 * NEED_RESCHED:
-	 */
-	smp_mb();
+	if (cx->entry_method != ACPI_CSTATE_FFH) {
+		current_thread_info()->status &= ~TS_POLLING;
+		/*
+		 * TS_POLLING-cleared state must be visible before we test
+		 * NEED_RESCHED:
+		 */
+		smp_mb();
+	}
 
 	if (unlikely(need_resched())) {
 		current_thread_info()->status |= TS_POLLING;
@@ -965,12 +967,14 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
 	}
 
 	local_irq_disable();
-	current_thread_info()->status &= ~TS_POLLING;
-	/*
-	 * TS_POLLING-cleared state must be visible before we test
-	 * NEED_RESCHED:
-	 */
-	smp_mb();
+	if (cx->entry_method != ACPI_CSTATE_FFH) {
+		current_thread_info()->status &= ~TS_POLLING;
+		/*
+		 * TS_POLLING-cleared state must be visible before we test
+		 * NEED_RESCHED:
+		 */
+		smp_mb();
+	}
 
 	if (unlikely(need_resched())) {
 		current_thread_info()->status |= TS_POLLING;

From 15cbf627abcd93c3c668d5a92d58d9fec8f953dd Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Mon, 22 Feb 2010 10:43:43 -0800
Subject: [PATCH 566/640] Revert "parisc: HAVE_ARCH_TRACEHOOK"

This reverts commit 81bf550d9cdfe0325eb1504b06c9f6511b442c1a.

HAVE_ARCH_TRACEHOOK requires defining the user_regset interfaces,
including task_user_regset_view().  parisc doesn't do that yet,
so don't lie about it.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/parisc/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 524d9352f17e..f388dc68f605 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -18,7 +18,6 @@ config PARISC
 	select BUG
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
-	select HAVE_ARCH_TRACEHOOK
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
 	  in many of their workstations & servers (HP9000 700 and 800 series,

From 52ab320ac560af3333191a473e56615fb48fff95 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Sat, 20 Feb 2010 21:23:22 +0900
Subject: [PATCH 567/640] MIPS: Highmem: Fix build error

arch/mips/mm/highmem.c: In function 'kmap_init':
arch/mips/mm/highmem.c:130: error: 'init_mm' undeclared (first use in this function)
arch/mips/mm/highmem.c:130: error: (Each undeclared identifier is reported only once
arch/mips/mm/highmem.c:130: error: for each function it appears in.)

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Cc: linux-mips <linux-mips@linux-mips.org>
Patchwork: http://patchwork.linux-mips.org/patch/980/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/highmem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index e274fda329f4..127d732474bf 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
+#include <linux/sched.h>
 #include <linux/smp.h>
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>

From 84a6fcb368a080620d12fc4d79e07902dbee7335 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 20 Feb 2010 19:51:20 +0100
Subject: [PATCH 568/640] MIPS: BCM47xx: Fix 128MB RAM support

Ignoring the last page when ddr size is 128M. Cached accesses to last page
is causing the processor to prefetch using address above 128M stepping out
of the DDR address space.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/981/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm47xx/prom.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
index c51405e57921..29d3cbf9555f 100644
--- a/arch/mips/bcm47xx/prom.c
+++ b/arch/mips/bcm47xx/prom.c
@@ -141,6 +141,14 @@ static __init void prom_init_mem(void)
 			break;
 	}
 
+	/* Ignoring the last page when ddr size is 128M. Cached
+	 * accesses to last page is causing the processor to prefetch
+	 * using address above 128M stepping out of the ddr address
+	 * space.
+	 */
+	if (mem == 0x8000000)
+		mem -= 0x1000;
+
 	add_memory_region(0, mem, BOOT_MEM_RAM);
 }
 

From e7e65caefd57913260c82fb751f07655671ec47e Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Wed, 10 Feb 2010 19:25:58 +0100
Subject: [PATCH 569/640] drm/nouveau: Fix up pre-nv17 analog load detection.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv04_dac.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
index d0e038d28948..1d73b15d70da 100644
--- a/drivers/gpu/drm/nouveau/nv04_dac.c
+++ b/drivers/gpu/drm/nouveau/nv04_dac.c
@@ -119,7 +119,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder,
 						 struct drm_connector *connector)
 {
 	struct drm_device *dev = encoder->dev;
-	uint8_t saved_seq1, saved_pi, saved_rpc1;
+	uint8_t saved_seq1, saved_pi, saved_rpc1, saved_cr_mode;
 	uint8_t saved_palette0[3], saved_palette_mask;
 	uint32_t saved_rtest_ctrl, saved_rgen_ctrl;
 	int i;
@@ -135,6 +135,9 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder,
 		/* only implemented for head A for now */
 		NVSetOwner(dev, 0);
 
+	saved_cr_mode = NVReadVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX);
+	NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode | 0x80);
+
 	saved_seq1 = NVReadVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX);
 	NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1 & ~0x20);
 
@@ -203,6 +206,7 @@ out:
 	NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_PIXEL_INDEX, saved_pi);
 	NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_RPC1_INDEX, saved_rpc1);
 	NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1);
+	NVWriteVgaCrtc(dev, 0, NV_CIO_CR_MODE_INDEX, saved_cr_mode);
 
 	if (blue == 0x18) {
 		NV_INFO(dev, "Load detected on head A\n");

From 66b6ebaccb176a2068bbe328f162614dce524621 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 Feb 2010 10:23:30 +1000
Subject: [PATCH 570/640] drm/nv50: make nv50_mem_vm_{bind,unbind} operate only
 on vram

GART is handled elsewhere, no reason to have the code for it here too.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_mem.c | 43 ++++++++-------------------
 1 file changed, 13 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 8f3a12f614ed..04885d2fb15f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -285,53 +285,36 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
 			uint32_t flags, uint64_t phys)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj **pgt;
-	unsigned psz, pfl, pages;
+	unsigned pages;
 
-	if (virt >= dev_priv->vm_gart_base &&
-	    (virt + size) < (dev_priv->vm_gart_base + dev_priv->vm_gart_size)) {
-		psz = 12;
-		pgt = &dev_priv->gart_info.sg_ctxdma;
-		pfl = 0x21;
-		virt -= dev_priv->vm_gart_base;
-	} else
-	if (virt >= dev_priv->vm_vram_base &&
-	    (virt + size) < (dev_priv->vm_vram_base + dev_priv->vm_vram_size)) {
-		psz = 16;
-		pgt = dev_priv->vm_vram_pt;
-		pfl = 0x01;
-		virt -= dev_priv->vm_vram_base;
-	} else {
-		NV_ERROR(dev, "Invalid address: 0x%16llx-0x%16llx\n",
-			 virt, virt + size - 1);
-		return -EINVAL;
-	}
-
-	pages = size >> psz;
+	virt -= dev_priv->vm_vram_base;
+	pages = size >> 16;
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	if (flags & 0x80000000) {
 		while (pages--) {
-			struct nouveau_gpuobj *pt = pgt[virt >> 29];
-			unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1;
+			struct nouveau_gpuobj *pt =
+				dev_priv->vm_vram_pt[virt >> 29];
+			unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1;
 
 			nv_wo32(dev, pt, pte++, 0x00000000);
 			nv_wo32(dev, pt, pte++, 0x00000000);
 
-			virt += (1 << psz);
+			virt += (1 << 16);
 		}
 	} else {
 		while (pages--) {
-			struct nouveau_gpuobj *pt = pgt[virt >> 29];
-			unsigned pte = ((virt & 0x1fffffffULL) >> psz) << 1;
+			struct nouveau_gpuobj *pt =
+				dev_priv->vm_vram_pt[virt >> 29];
+			unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1;
 			unsigned offset_h = upper_32_bits(phys) & 0xff;
 			unsigned offset_l = lower_32_bits(phys);
 
-			nv_wo32(dev, pt, pte++, offset_l | pfl);
+			nv_wo32(dev, pt, pte++, offset_l | 1);
 			nv_wo32(dev, pt, pte++, offset_h | flags);
 
-			phys += (1 << psz);
-			virt += (1 << psz);
+			phys += (1 << 16);
+			virt += (1 << 16);
 		}
 	}
 	dev_priv->engine.instmem.finish_access(dev);

From 4c27bd339d226175ac0e4dc3ab8289ba696db8be Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 Feb 2010 10:25:53 +1000
Subject: [PATCH 571/640] drm/nv50: more efficient clearing of gpu page table
 entries

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_mem.c | 68 +++++++++++++++++----------
 1 file changed, 44 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 04885d2fb15f..6832c4c969a3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -291,31 +291,17 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
 	pages = size >> 16;
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
-	if (flags & 0x80000000) {
-		while (pages--) {
-			struct nouveau_gpuobj *pt =
-				dev_priv->vm_vram_pt[virt >> 29];
-			unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1;
+	while (pages--) {
+		struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt[virt >> 29];
+		unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1;
+		unsigned offset_h = upper_32_bits(phys) & 0xff;
+		unsigned offset_l = lower_32_bits(phys);
 
-			nv_wo32(dev, pt, pte++, 0x00000000);
-			nv_wo32(dev, pt, pte++, 0x00000000);
+		nv_wo32(dev, pt, pte++, offset_l | 1);
+		nv_wo32(dev, pt, pte++, offset_h | flags);
 
-			virt += (1 << 16);
-		}
-	} else {
-		while (pages--) {
-			struct nouveau_gpuobj *pt =
-				dev_priv->vm_vram_pt[virt >> 29];
-			unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1;
-			unsigned offset_h = upper_32_bits(phys) & 0xff;
-			unsigned offset_l = lower_32_bits(phys);
-
-			nv_wo32(dev, pt, pte++, offset_l | 1);
-			nv_wo32(dev, pt, pte++, offset_h | flags);
-
-			phys += (1 << 16);
-			virt += (1 << 16);
-		}
+		phys += (1 << 16);
+		virt += (1 << 16);
 	}
 	dev_priv->engine.instmem.finish_access(dev);
 
@@ -339,7 +325,41 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
 void
 nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
 {
-	nv50_mem_vm_bind_linear(dev, virt, size, 0x80000000, 0);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *pgt;
+	unsigned pages, pte, end;
+
+	virt -= dev_priv->vm_vram_base;
+	pages = (size >> 16) << 1;
+
+	dev_priv->engine.instmem.prepare_access(dev, true);
+	while (pages) {
+		pgt = dev_priv->vm_vram_pt[virt >> 29];
+		pte = (virt & 0x1ffe0000ULL) >> 15;
+
+		end = pte + pages;
+		if (end > 16384)
+			end = 16384;
+		pages -= (end - pte);
+		virt  += (end - pte) << 15;
+
+		while (pte < end)
+			nv_wo32(dev, pgt, pte++, 0);
+	}
+	dev_priv->engine.instmem.finish_access(dev);
+
+	nv_wr32(dev, 0x100c80, 0x00050001);
+	if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) {
+		NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n");
+		NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80));
+		return;
+	}
+
+	nv_wr32(dev, 0x100c80, 0x00000001);
+	if (!nv_wait(0x100c80, 0x00000001, 0x00000000)) {
+		NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n");
+		NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80));
+	}
 }
 
 /*

From 531e77139f26e8da32ee694b9ee5e6f4c764f1db Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 11 Feb 2010 11:31:44 +1000
Subject: [PATCH 572/640] drm/nv50: improve vram page table construction

This commit changes nouveau to construct PTEs which look very much like
the ones the binary driver creates.

I presume that filling multiple PTEs identically with length flags and
the physical address of the start of a block of VRAM is a hint to the
memory controller that it need not perform additional page table lookups
for that range of addresses.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_mem.c | 44 ++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 6832c4c969a3..134fedbb7669 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -285,23 +285,45 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
 			uint32_t flags, uint64_t phys)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	unsigned pages;
+	struct nouveau_gpuobj *pgt;
+	unsigned block;
+	int i;
 
-	virt -= dev_priv->vm_vram_base;
-	pages = size >> 16;
+	virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1;
+	size = (size >> 16) << 1;
+	phys |= ((uint64_t)flags << 32) | 1;
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
-	while (pages--) {
-		struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt[virt >> 29];
-		unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1;
-		unsigned offset_h = upper_32_bits(phys) & 0xff;
+	while (size) {
+		unsigned offset_h = upper_32_bits(phys);
 		unsigned offset_l = lower_32_bits(phys);
+		unsigned pte, end;
 
-		nv_wo32(dev, pt, pte++, offset_l | 1);
-		nv_wo32(dev, pt, pte++, offset_h | flags);
+		for (i = 7; i >= 0; i--) {
+			block = 1 << (i + 1);
+			if (size >= block && !(virt & (block - 1)))
+				break;
+		}
+		offset_l |= (i << 7);
 
-		phys += (1 << 16);
-		virt += (1 << 16);
+		phys += block << 15;
+		size -= block;
+
+		while (block) {
+			pgt = dev_priv->vm_vram_pt[virt >> 14];
+			pte = virt & 0x3ffe;
+
+			end = pte + block;
+			if (end > 16384)
+				end = 16384;
+			block -= (end - pte);
+			virt  += (end - pte);
+
+			while (pte < end) {
+				nv_wo32(dev, pgt, pte++, offset_l);
+				nv_wo32(dev, pgt, pte++, offset_h);
+			}
+		}
 	}
 	dev_priv->engine.instmem.finish_access(dev);
 

From 76befb8c30cebe2af83fa346bdaf75b430893511 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Sat, 20 Feb 2010 08:06:36 +1000
Subject: [PATCH 573/640] drm/nv50: fix instmem binding on IGPs to point at
 stolen system memory

This also modifies the unused PRAMIN PT entries to be all zeroes, can't
really recall why I used 9/0 initially, just that it didn't work for
some reason.  It was likely masking a bug elsewhere that's since been
fixed.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.h  |  1 +
 drivers/gpu/drm/nouveau/nv50_instmem.c | 58 ++++++++++++++++++--------
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 5445cefdd03e..1c15ef37b71c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -583,6 +583,7 @@ struct drm_nouveau_private {
 	uint64_t vm_end;
 	struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR];
 	int vm_vram_pt_nr;
+	uint64_t vram_sys_base;
 
 	/* the mtrr covering the FB */
 	int fb_mtrr;
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index 94400f777e7f..f0dc4e36ef05 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -76,6 +76,11 @@ nv50_instmem_init(struct drm_device *dev)
 	for (i = 0x1700; i <= 0x1710; i += 4)
 		priv->save1700[(i-0x1700)/4] = nv_rd32(dev, i);
 
+	if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac)
+		dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10) << 12;
+	else
+		dev_priv->vram_sys_base = 0;
+
 	/* Reserve the last MiB of VRAM, we should probably try to avoid
 	 * setting up the below tables over the top of the VBIOS image at
 	 * some point.
@@ -172,16 +177,28 @@ nv50_instmem_init(struct drm_device *dev)
 	 * We map the entire fake channel into the start of the PRAMIN BAR
 	 */
 	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pt_size, 0x1000,
-							0, &priv->pramin_pt);
+				     0, &priv->pramin_pt);
 	if (ret)
 		return ret;
 
-	for (i = 0, v = c_offset; i < pt_size; i += 8, v += 0x1000) {
-		if (v < (c_offset + c_size))
-			BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v | 1);
-		else
-			BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000009);
+	v = c_offset | 1;
+	if (dev_priv->vram_sys_base) {
+		v += dev_priv->vram_sys_base;
+		v |= 0x30;
+	}
+
+	i = 0;
+	while (v < dev_priv->vram_sys_base + c_offset + c_size) {
+		BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, v);
 		BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000);
+		v += 0x1000;
+		i += 8;
+	}
+
+	while (i < pt_size) {
+		BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000000);
+		BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000);
+		i += 8;
 	}
 
 	BAR0_WI32(chan->vm_pd, 0x00, priv->pramin_pt->instance | 0x63);
@@ -416,7 +433,9 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv;
-	uint32_t pte, pte_end, vram;
+	struct nouveau_gpuobj *pramin_pt = priv->pramin_pt->gpuobj;
+	uint32_t pte, pte_end;
+	uint64_t vram;
 
 	if (!gpuobj->im_backing || !gpuobj->im_pramin || gpuobj->im_bound)
 		return -EINVAL;
@@ -424,20 +443,24 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
 	NV_DEBUG(dev, "st=0x%0llx sz=0x%0llx\n",
 		 gpuobj->im_pramin->start, gpuobj->im_pramin->size);
 
-	pte     = (gpuobj->im_pramin->start >> 12) << 3;
-	pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte;
+	pte     = (gpuobj->im_pramin->start >> 12) << 1;
+	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
 	vram    = gpuobj->im_backing_start;
 
 	NV_DEBUG(dev, "pramin=0x%llx, pte=%d, pte_end=%d\n",
 		 gpuobj->im_pramin->start, pte, pte_end);
 	NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
 
+	vram |= 1;
+	if (dev_priv->vram_sys_base) {
+		vram += dev_priv->vram_sys_base;
+		vram |= 0x30;
+	}
+
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	while (pte < pte_end) {
-		nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, vram | 1);
-		nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000);
-
-		pte += 8;
+		nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram));
+		nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram));
 		vram += NV50_INSTMEM_PAGE_SIZE;
 	}
 	dev_priv->engine.instmem.finish_access(dev);
@@ -470,14 +493,13 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
 	if (gpuobj->im_bound == 0)
 		return -EINVAL;
 
-	pte     = (gpuobj->im_pramin->start >> 12) << 3;
-	pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte;
+	pte     = (gpuobj->im_pramin->start >> 12) << 1;
+	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	while (pte < pte_end) {
-		nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 0)/4, 0x00000009);
-		nv_wo32(dev, priv->pramin_pt->gpuobj, (pte + 4)/4, 0x00000000);
-		pte += 8;
+		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
+		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
 	}
 	dev_priv->engine.instmem.finish_access(dev);
 

From 6c42966768b0254f465a8f451333795283f53d22 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Sat, 20 Feb 2010 08:10:11 +1000
Subject: [PATCH 574/640] drm/nv50: fix vram ptes on IGPs to point at stolen
 system memory

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_mem.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 134fedbb7669..2dc09dbd817d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -291,7 +291,13 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
 
 	virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1;
 	size = (size >> 16) << 1;
-	phys |= ((uint64_t)flags << 32) | 1;
+
+	phys |= ((uint64_t)flags << 32);
+	phys |= 1;
+	if (dev_priv->vram_sys_base) {
+		phys += dev_priv->vram_sys_base;
+		phys |= 0x30;
+	}
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	while (size) {

From 5a2d41961dd6815b874b5c0afec0ac96cd90eea4 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 22 Feb 2010 12:44:14 -0800
Subject: [PATCH 575/640] memcg: fix oom killing a child process in an other
 cgroup

Presently the oom-killer is memcg aware and it finds the worst process
from processes under memcg(s) in oom.  Then, it kills victim's child
first.

It may kill a child in another cgroup and may not be any help for
recovery.  And it will break the assumption users have.

This patch fixes it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: David Rientjes <rientjes@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/oom_kill.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52481b1c1e5..237050478f28 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	list_for_each_entry(c, &p->children, sibling) {
 		if (c->mm == p->mm)
 			continue;
+		if (mem && !task_in_mem_cgroup(c, mem))
+			continue;
 		if (!oom_kill_task(c))
 			return 0;
 	}

From 701188374b6f1ef9cf7e4dce4a2e69ef4c0012ac Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 22 Feb 2010 12:44:16 -0800
Subject: [PATCH 576/640] kernel/sys.c: fix missing rcu protection for
 sys_getpriority()

find_task_by_vpid() is not safe without rcu_read_lock().  2.6.33-rc7 got
RCU protection for sys_setpriority() but missed it for sys_getpriority().

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sys.c b/kernel/sys.c
index 26a6b73a6b85..18bde979f346 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 	if (which > PRIO_USER || which < PRIO_PROCESS)
 		return -EINVAL;
 
+	rcu_read_lock();
 	read_lock(&tasklist_lock);
 	switch (which) {
 		case PRIO_PROCESS:
@@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 	}
 out_unlock:
 	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 	return retval;
 }

From d2e7276b6b5e4bc2148891a056d5862c5314342d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 22 Feb 2010 12:44:19 -0800
Subject: [PATCH 577/640] idr: fix a critical misallocation bug, take#2

This is retry of reverted 859ddf09743a8cc680af33f7259ccd0fd36bfe9d
("idr: fix a critical misallocation bug") which contained two bugs.

* pa[idp->layers] should be cleared even if it's not used by
  sub_alloc() because it's used by mark idr_mark_full().

* The original condition check also assigned pa[l] to p which the new
  code didn't do thus leaving p pointing at the wrong layer.

Both problems have been fixed and the idr code has received good amount
testing using userland testing setup where simple bitmap allocator is
run parallel to verify the result of idr allocation.

The bug this patch fixes is caused by sub_alloc() optimization path
bypassing out-of-room condition check and restarting allocation loop
with starting value higher than maximum allowed value.  For detailed
description, please read commit message of 859ddf09.

Signed-off-by: Tejun Heo <tj@kernel.org>
Based-on-patch-from: Eric Paris <eparis@redhat.com>
Reported-by: Eric Paris <eparis@redhat.com>
Tested-by: Stefan Lippers-Hollmann <s.l-h@gmx.de>
Tested-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/idr.c b/lib/idr.c
index 1cac726c44bc..0dc782216d4b 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -156,10 +156,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
 
 			/* if already at the top layer, we need to grow */
-			if (!(p = pa[l])) {
+			if (id >= 1 << (idp->layers * IDR_BITS)) {
 				*starting_id = id;
 				return IDR_NEED_TO_GROW;
 			}
+			p = pa[l];
+			BUG_ON(!p);
 
 			/* If we need to go up one layer, continue the
 			 * loop; otherwise, restart from the top.

From 115079aad949cba31755eb4e2576edba7fddfdbc Mon Sep 17 00:00:00 2001
From: Jens Rottmann <JRottmann@LiPPERTEmbedded.de>
Date: Mon, 22 Feb 2010 12:44:20 -0800
Subject: [PATCH 578/640] geode-mfgpt: restore previous behavior for selecting
 IRQ

geode-mfgpt: restore previous behavior for selecting IRQ

The MFGPT IRQ used to be, in order of decreasing priority,
 * IRQ supplied by the user as a boot-time parameter,
 * IRQ previously set by the BIOS or another driver,
 * default IRQ given at compile time.

Return to this behavior, which got broken when splitting the
MFGPT/clocksource driver for 2.6.33-rc1.

Signed-off-by: Jens Rottmann <JRottmann@LiPPERTEmbedded.de>
Acked-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: Jordan Crouse <jordan.crouse@amd.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/clocksource/cs5535-clockevt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c
index 27d20fac19d1..b314a999aabe 100644
--- a/drivers/clocksource/cs5535-clockevt.c
+++ b/drivers/clocksource/cs5535-clockevt.c
@@ -21,7 +21,7 @@
 
 #define DRV_NAME "cs5535-clockevt"
 
-static int timer_irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ;
+static int timer_irq;
 module_param_named(irq, timer_irq, int, 0644);
 MODULE_PARM_DESC(irq, "Which IRQ to use for the clock source MFGPT ticks.");
 

From 89f3f2199084a160a3a45fa6d9af235696321758 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Mon, 22 Feb 2010 12:44:22 -0800
Subject: [PATCH 579/640] efifb: fix framebuffer handoff

Commit 4410f3910947dcea8672280b3adecd53cec4e85e ("fbdev: add support for
handoff from firmware to hw framebuffers") didn't add fb_destroy
operation to efifb.  Fix it and change aperture_size to match size
passed to request_mem_region.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=15151

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Reported-by: Alex Zhavnerchik <alex.vizor@gmail.com>
Tested-by: Alex Zhavnerchik <alex.vizor@gmail.com>
Acked-by: Peter Jones <pjones@redhat.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/efifb.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index eb12182b2059..d25df51bb0d2 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -161,8 +161,17 @@ static int efifb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return 0;
 }
 
+static void efifb_destroy(struct fb_info *info)
+{
+	if (info->screen_base)
+		iounmap(info->screen_base);
+	release_mem_region(info->aperture_base, info->aperture_size);
+	framebuffer_release(info);
+}
+
 static struct fb_ops efifb_ops = {
 	.owner		= THIS_MODULE,
+	.fb_destroy	= efifb_destroy,
 	.fb_setcolreg	= efifb_setcolreg,
 	.fb_fillrect	= cfb_fillrect,
 	.fb_copyarea	= cfb_copyarea,
@@ -281,7 +290,7 @@ static int __init efifb_probe(struct platform_device *dev)
 	info->par = NULL;
 
 	info->aperture_base = efifb_fix.smem_start;
-	info->aperture_size = size_total;
+	info->aperture_size = size_remap;
 
 	info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len);
 	if (!info->screen_base) {

From a17e18790a8c47113a73139d54a375dc9ccd8f08 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 22 Feb 2010 12:44:24 -0800
Subject: [PATCH 580/640] fs/exec.c: fix initial stack reservation

803bf5ec259941936262d10ecc84511b76a20921 ("fs/exec.c: restrict initial
stack space expansion to rlimit") attempts to limit the initial stack to
20*PAGE_SIZE.  Unfortunately, in attempting ensure the stack is not
reduced in size, we ended up not changing the stack at all.

This size reduction check is not necessary as the expand_stack call does
this already.

This caused a regression in UML resulting in most guest processes being
killed.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jouni Malinen <j@w1.fi>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/exec.c b/fs/exec.c
index e95c692ef0e4..cce6bbdbdbb1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -637,7 +637,6 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	 * will align it up.
 	 */
 	rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
-	rlim_stack = min(rlim_stack, stack_size);
 #ifdef CONFIG_STACK_GROWSUP
 	if (stack_size + stack_expand > rlim_stack)
 		stack_base = vma->vm_start + rlim_stack;

From 4e4ddd47774313accc86b233d6ca2c6a9037a671 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Sun, 21 Feb 2010 14:54:55 +0000
Subject: [PATCH 581/640] drm/vmwgfx: Fix queries if no dma buffer thrashing is
 occuring.

Intercept query commands and apply relocations to their guest pointers.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 108 ++++++++++++++++++++----
 1 file changed, 92 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index d69caf92ffe7..0897359b3e4e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -182,25 +182,19 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
 }
 
-static int vmw_cmd_dma(struct vmw_private *dev_priv,
-		       struct vmw_sw_context *sw_context,
-		       SVGA3dCmdHeader *header)
+static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
+				   struct vmw_sw_context *sw_context,
+				   SVGAGuestPtr *ptr,
+				   struct vmw_dma_buffer **vmw_bo_p)
 {
-	uint32_t handle;
 	struct vmw_dma_buffer *vmw_bo = NULL;
 	struct ttm_buffer_object *bo;
-	struct vmw_surface *srf = NULL;
-	struct vmw_dma_cmd {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdSurfaceDMA dma;
-	} *cmd;
+	uint32_t handle = ptr->gmrId;
 	struct vmw_relocation *reloc;
-	int ret;
 	uint32_t cur_validate_node;
 	struct ttm_validate_buffer *val_buf;
+	int ret;
 
-	cmd = container_of(header, struct vmw_dma_cmd, header);
-	handle = cmd->dma.guest.ptr.gmrId;
 	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use GMR region.\n");
@@ -209,14 +203,14 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 	bo = &vmw_bo->base;
 
 	if (unlikely(sw_context->cur_reloc >= VMWGFX_MAX_RELOCATIONS)) {
-		DRM_ERROR("Max number of DMA commands per submission"
+		DRM_ERROR("Max number relocations per submission"
 			  " exceeded\n");
 		ret = -EINVAL;
 		goto out_no_reloc;
 	}
 
 	reloc = &sw_context->relocs[sw_context->cur_reloc++];
-	reloc->location = &cmd->dma.guest.ptr;
+	reloc->location = ptr;
 
 	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
 	if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) {
@@ -234,7 +228,89 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
 		++sw_context->cur_val_buf;
 	}
+	*vmw_bo_p = vmw_bo;
+	return 0;
 
+out_no_reloc:
+	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_p = NULL;
+	return ret;
+}
+
+static int vmw_cmd_end_query(struct vmw_private *dev_priv,
+			     struct vmw_sw_context *sw_context,
+			     SVGA3dCmdHeader *header)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_query_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdEndQuery q;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_query_cmd, header);
+	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->q.guestResult,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_dmabuf_unreference(&vmw_bo);
+	return 0;
+}
+
+static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
+			      struct vmw_sw_context *sw_context,
+			      SVGA3dCmdHeader *header)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_query_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdWaitForQuery q;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_query_cmd, header);
+	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->q.guestResult,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_dmabuf_unreference(&vmw_bo);
+	return 0;
+}
+
+
+static int vmw_cmd_dma(struct vmw_private *dev_priv,
+		       struct vmw_sw_context *sw_context,
+		       SVGA3dCmdHeader *header)
+{
+	struct vmw_dma_buffer *vmw_bo = NULL;
+	struct ttm_buffer_object *bo;
+	struct vmw_surface *srf = NULL;
+	struct vmw_dma_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSurfaceDMA dma;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_dma_cmd, header);
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->dma.guest.ptr,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	bo = &vmw_bo->base;
 	ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
 					     cmd->dma.host.sid, &srf);
 	if (ret) {
@@ -379,8 +455,8 @@ static vmw_cmd_func vmw_cmd_funcs[SVGA_3D_CMD_MAX] = {
 	VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw),
 	VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check),
 	VMW_CMD_DEF(SVGA_3D_CMD_BEGIN_QUERY, &vmw_cmd_cid_check),
-	VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_cid_check),
-	VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_cid_check),
+	VMW_CMD_DEF(SVGA_3D_CMD_END_QUERY, &vmw_cmd_end_query),
+	VMW_CMD_DEF(SVGA_3D_CMD_WAIT_FOR_QUERY, &vmw_cmd_wait_query),
 	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT_READBACK, &vmw_cmd_ok),
 	VMW_CMD_DEF(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN,
 		    &vmw_cmd_blt_surf_screen_check)

From 79da0644a8e0838522828f106e4049639eea6baf Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 23 Feb 2010 08:40:43 +0100
Subject: [PATCH 582/640] Revert "block: improve queue_should_plug() by looking
 at IO depths"

This reverts commit fb1e75389bd06fd5987e9cda1b4e0305c782f854.

"Benjamin S." <sbenni@gmx.de> reports that the patch in question
causes a big drop in sequential throughput for him, dropping from
200MB/sec down to only 70MB/sec.

Needs to be investigated more fully, for now lets just revert the
offending commit.

Conflicts:

	include/linux/blkdev.h

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 11 ++---------
 include/linux/blkdev.h |  4 +---
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e6d37f..d1a9a0a64f95 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1147,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
  */
 static inline bool queue_should_plug(struct request_queue *q)
 {
-	return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
+	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
 }
 
 static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1859,15 +1859,8 @@ void blk_dequeue_request(struct request *rq)
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
-	if (blk_account_rq(rq)) {
+	if (blk_account_rq(rq))
 		q->in_flight[rq_is_sync(rq)]++;
-		/*
-		 * Mark this device as supporting hardware queuing, if
-		 * we have more IOs in flight than 4.
-		 */
-		if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
-			set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
-	}
 }
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c8018977efa..1896e868854f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -461,8 +461,7 @@ struct request_queue
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
-#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
+#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -586,7 +585,6 @@ enum {
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
-#define blk_queue_queuing(q)	test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)

From b5abb028e214cca68f4231d4f3bc0847ddbc986e Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 19 Feb 2010 17:54:53 +0000
Subject: [PATCH 583/640] e1000: Fix DMA mapping error handling on RX

Check for error return from pci_map_single/pci_map_page and clean up.

With this and the previous patch the driver was able to handle a significant
percentage of errors (I set the fault injection rate to 10% and could still
download large files at a reasonable speed).

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/e1000/e1000_main.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index d29bb532eccf..765543663a4f 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -4006,11 +4006,21 @@ check_page:
 			}
 		}
 
-		if (!buffer_info->dma)
+		if (!buffer_info->dma) {
 			buffer_info->dma = pci_map_page(pdev,
 			                                buffer_info->page, 0,
 			                                buffer_info->length,
 			                                PCI_DMA_FROMDEVICE);
+			if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
+				put_page(buffer_info->page);
+				dev_kfree_skb(skb);
+				buffer_info->page = NULL;
+				buffer_info->skb = NULL;
+				buffer_info->dma = 0;
+				adapter->alloc_rx_buff_failed++;
+				break; /* while !buffer_info->skb */
+			}
+		}
 
 		rx_desc = E1000_RX_DESC(*rx_ring, i);
 		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
@@ -4101,6 +4111,13 @@ map_skb:
 						  skb->data,
 						  buffer_info->length,
 						  PCI_DMA_FROMDEVICE);
+		if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
+			dev_kfree_skb(skb);
+			buffer_info->skb = NULL;
+			buffer_info->dma = 0;
+			adapter->alloc_rx_buff_failed++;
+			break; /* while !buffer_info->skb */
+		}
 
 		/*
 		 * XXX if it was allocated cleanly it will never map to a

From f5ca8502f70ccc77008b7bee671f5301995240a4 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Mon, 22 Feb 2010 22:34:54 +0000
Subject: [PATCH 584/640] MAINTAINERS: update mv643xx_eth maintenance status

I am no longer with Marvell.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 412eff60c33d..318d2e417168 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3490,9 +3490,9 @@ S:	Maintained
 F:	drivers/net/wireless/libertas/
 
 MARVELL MV643XX ETHERNET DRIVER
-M:	Lennert Buytenhek <buytenh@marvell.com>
+M:	Lennert Buytenhek <buytenh@wantstofly.org>
 L:	netdev@vger.kernel.org
-S:	Supported
+S:	Maintained
 F:	drivers/net/mv643xx_eth.*
 F:	include/linux/mv643xx.h
 

From e79dc48431e7731f5bb6bab8f6b499fe03802ca0 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 22 Feb 2010 12:27:21 +0000
Subject: [PATCH 585/640] IPv6: better document max_addresses parameter

Andrew Morton wrote:
>> >From ip-sysctl.txt file in kernel documentation I can see following description
>> for max_addresses:
>> max_addresses - INTEGER
>>         Number of maximum addresses per interface.  0 disables limitation.
>>         It is recommended not set too large value (or 0) because it would
>>         be too easy way to crash kernel to allow to create too much of
>>         autoconfigured addresses.
           ^^^^^^^^^^^^^^

>> If this parameter applies only for auto-configured IP addressed, please state
>> it more clearly in docs or rename the parameter to show that it refers to
>> auto-configuration.

It did mention autoconfigured in the text, but the below makes it more obvious.

More clearly document IPv6 max_addresses parameter.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 006b39dec87d..e87f3cdc8a6a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1074,10 +1074,10 @@ regen_max_retry - INTEGER
 	Default: 5
 
 max_addresses - INTEGER
-	Number of maximum addresses per interface.  0 disables limitation.
-	It is recommended not set too large value (or 0) because it would
-	be too easy way to crash kernel to allow to create too much of
-	autoconfigured addresses.
+	Maximum number of autoconfigured addresses per interface.  Setting
+	to zero disables the limitation.  It is not recommended to set this
+	value too large (or to zero) because it would be an easy way to
+	crash the kernel by allowing too many addresses to be created.
 	Default: 16
 
 disable_ipv6 - BOOLEAN

From cac43a1b7b091b17113502e4128dcb0ff7e3503d Mon Sep 17 00:00:00 2001
From: Torgny Johansson <torgny.johansson@gmail.com>
Date: Fri, 19 Feb 2010 01:59:15 +0000
Subject: [PATCH 586/640] cdc_ether: new PID for Ericsson C3607w to the
 whitelist (resubmit)

This patch adds a new vid/pid to the cdc_ether whitelist.

Device added:
- Ericsson Mobile Broadband variant C3607w

Signed-off-by: Torgny Johansson <torgny.johansson@gmail.com>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 4f27f022fbf7..5f3b9eaeb04f 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -583,6 +583,11 @@ static const struct usb_device_id	products [] = {
 	USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x1049, USB_CLASS_COMM,
 			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
 	.driver_info = (unsigned long) &mbm_info,
+}, {
+	/* Ericsson C3607w ver 2 */
+	USB_DEVICE_AND_INTERFACE_INFO(0x0bdb, 0x190b, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_MDLM, USB_CDC_PROTO_NONE),
+	.driver_info = (unsigned long) &mbm_info,
 }, {
 	/* Toshiba F3507g */
 	USB_DEVICE_AND_INTERFACE_INFO(0x0930, 0x130b, USB_CLASS_COMM,

From 662a96bd6f020782dfbdc0d0bd177c7dbb556687 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Fri, 19 Feb 2010 05:13:58 +0000
Subject: [PATCH 587/640] tc35815: Remove a wrong netif_wake_queue() call which
 triggers BUG_ON

The netif_wake_queue() is called correctly (i.e. only on !txfull
condition) from txdone routine.  So Unconditional call to the
netif_wake_queue() here is wrong.  This might cause calling of
start_xmit routine on txfull state and trigger BUG_ON.

This bug does not happen when NAPI disabled.  After txdone there
must be at least one free tx slot.  But with NAPI, this is not
true anymore and the BUG_ON can hits on heavy load.

In this driver NAPI was enabled on 2.6.33-rc1 so this is
regression from 2.6.32 kernel.

Reported-by: Ralf Roesch <ralf.roesch@rw-gmbh.de>
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tc35815.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index 75a669d48e5e..d71c1976072e 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1437,7 +1437,6 @@ static int tc35815_do_interrupt(struct net_device *dev, u32 status, int limit)
 		/* Transmit complete. */
 		lp->lstats.tx_ints++;
 		tc35815_txdone(dev);
-		netif_wake_queue(dev);
 		if (ret < 0)
 			ret = 0;
 	}

From f526d68b6ce9ba7a2bd94e663e240a022524c58a Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:27:52 -0600
Subject: [PATCH 588/640] perf/scripts: Fix supported language listing option

'perf trace -s list' prints a list of the supported scripting
languages.  One problem with it is that it falls through and prints
the trace as well.  The use of 'list' for this also makes it easy to
confuse with 'perf trace -l', used for listing available scripts.  So
change 'perf trace -s list' to 'perf trace -s lang' and fixes the
fall-through problem.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-2-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 tools/perf/Documentation/perf-trace.txt | 4 +++-
 tools/perf/builtin-trace.c              | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 60e5900da483..c00a76fcb8d6 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -45,9 +45,11 @@ OPTIONS
 --list=::
         Display a list of available trace scripts.
 
--s::
+-s ['lang']::
 --script=::
         Process trace data with the given script ([lang]:script[.ext]).
+	If the string 'lang' is specified in place of a script name, a
+        list of supported languages will be displayed instead.
 
 -g::
 --gen-script=::
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0b65779e3c10..d5d20c34e221 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -219,9 +219,9 @@ static int parse_scriptname(const struct option *opt __used,
 	const char *script, *ext;
 	int len;
 
-	if (strcmp(str, "list") == 0) {
+	if (strcmp(str, "lang") == 0) {
 		list_available_languages();
-		return 0;
+		exit(0);
 	}
 
 	script = strchr(str, ':');

From e26207a3819684e9b4450a2d30bdd065fa92d9c7 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:27:53 -0600
Subject: [PATCH 589/640] perf/scripts: Fix bug in Util.pm

Fix bogus calculation.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-3-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
index 052f132ced24..f869c48dc9b0 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -44,7 +44,7 @@ sub nsecs_secs {
 sub nsecs_nsecs {
     my ($nsecs) = @_;
 
-    return $nsecs - nsecs_secs($nsecs);
+    return $nsecs % $NSECS_PER_SEC;
 }
 
 sub nsecs_str {

From 7397d80ddde8eef3b1dce6c29e0c53bd322ef824 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:27:54 -0600
Subject: [PATCH 590/640] perf/scripts: Move common code out of Perl-specific
 files

This stuff is needed by all scripting engines; move it from the Perl
engine source to a more common place.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-4-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 .../scripts/perl/Perf-Trace-Util/Context.c    |  5 ++--
 .../scripts/perl/Perf-Trace-Util/Context.xs   |  3 ++-
 tools/perf/util/trace-event-parse.c           | 15 +++++++++++
 tools/perf/util/trace-event-perl.c            | 27 -------------------
 tools/perf/util/trace-event-perl.h            |  8 ------
 tools/perf/util/trace-event.h                 |  9 ++++++-
 6 files changed, 28 insertions(+), 39 deletions(-)

diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
index af78d9a52a7d..01a64ad693f2 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -31,13 +31,14 @@
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
-#include "../../../util/trace-event-perl.h"
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
 
 #ifndef PERL_UNUSED_VAR
 #  define PERL_UNUSED_VAR(var) if (0) var = var
 #endif
 
-#line 41 "Context.c"
+#line 42 "Context.c"
 
 XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */
 XS(XS_Perf__Trace__Context_common_pc)
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
index fb78006c165e..549cf0467d30 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -22,7 +22,8 @@
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
-#include "../../../util/trace-event-perl.h"
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
 
 MODULE = Perf::Trace::Context		PACKAGE = Perf::Trace::Context
 PROTOTYPES: ENABLE
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index c4b3cb8a02b1..9b3c20f42f98 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -3286,3 +3286,18 @@ void parse_set_info(int nr_cpus, int long_sz)
 	cpus = nr_cpus;
 	long_size = long_sz;
 }
+
+int common_pc(struct scripting_context *context)
+{
+	return parse_common_pc(context->event_data);
+}
+
+int common_flags(struct scripting_context *context)
+{
+	return parse_common_flags(context->event_data);
+}
+
+int common_lock_depth(struct scripting_context *context)
+{
+	return parse_common_lock_depth(context->event_data);
+}
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c
index 6d6d76b8a21e..5b49df067df0 100644
--- a/tools/perf/util/trace-event-perl.c
+++ b/tools/perf/util/trace-event-perl.c
@@ -239,33 +239,6 @@ static inline struct event *find_cache_event(int type)
 	return event;
 }
 
-int common_pc(struct scripting_context *context)
-{
-	int pc;
-
-	pc = parse_common_pc(context->event_data);
-
-	return pc;
-}
-
-int common_flags(struct scripting_context *context)
-{
-	int flags;
-
-	flags = parse_common_flags(context->event_data);
-
-	return flags;
-}
-
-int common_lock_depth(struct scripting_context *context)
-{
-	int lock_depth;
-
-	lock_depth = parse_common_lock_depth(context->event_data);
-
-	return lock_depth;
-}
-
 static void perl_process_event(int cpu, void *data,
 			       int size __unused,
 			       unsigned long long nsecs, char *comm)
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
index e88fb26137bb..01efcc9564fb 100644
--- a/tools/perf/util/trace-event-perl.h
+++ b/tools/perf/util/trace-event-perl.h
@@ -44,12 +44,4 @@ void boot_DynaLoader(pTHX_ CV *cv);
 typedef PerlInterpreter * INTERP;
 #endif
 
-struct scripting_context {
-	void *event_data;
-};
-
-int common_pc(struct scripting_context *context);
-int common_flags(struct scripting_context *context);
-int common_lock_depth(struct scripting_context *context);
-
 #endif /* __PERF_TRACE_EVENT_PERL_H */
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 6ad405620c9b..aaf2da2d21e5 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -279,7 +279,14 @@ struct scripting_ops {
 
 int script_spec_register(const char *spec, struct scripting_ops *ops);
 
-extern struct scripting_ops perl_scripting_ops;
 void setup_perl_scripting(void);
 
+struct scripting_context {
+	void *event_data;
+};
+
+int common_pc(struct scripting_context *context);
+int common_flags(struct scripting_context *context);
+int common_lock_depth(struct scripting_context *context);
+
 #endif /* __PERF_TRACE_EVENTS_H */

From 82d156cd5e817055c63ec50247a425c195f4cb14 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:27:55 -0600
Subject: [PATCH 591/640] perf/scripts: Move Perl scripting files to
 scripting-engines dir

Create a scripting-engines directory to contain scripting engine
implementation code, in anticipation of the addition of new scripting
support.  Also removes trace-event-perl.h.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-5-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 tools/perf/Makefile                           |   9 +-
 .../trace-event-perl.c                        |  88 ++-------------
 tools/perf/util/trace-event-perl.h            |  47 --------
 tools/perf/util/trace-event-scripting.c       | 106 ++++++++++++++++++
 4 files changed, 122 insertions(+), 128 deletions(-)
 rename tools/perf/util/{ => scripting-engines}/trace-event-perl.c (87%)
 delete mode 100644 tools/perf/util/trace-event-perl.h
 create mode 100644 tools/perf/util/trace-event-scripting.c

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 3a5fb36ccc97..0a3c0c8b3fc0 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -385,7 +385,6 @@ LIB_H += util/sort.h
 LIB_H += util/hist.h
 LIB_H += util/thread.h
 LIB_H += util/trace-event.h
-LIB_H += util/trace-event-perl.h
 LIB_H += util/probe-finder.h
 LIB_H += util/probe-event.h
 
@@ -428,7 +427,7 @@ LIB_OBJS += util/thread.o
 LIB_OBJS += util/trace-event-parse.o
 LIB_OBJS += util/trace-event-read.o
 LIB_OBJS += util/trace-event-info.o
-LIB_OBJS += util/trace-event-perl.o
+LIB_OBJS += util/trace-event-scripting.o
 LIB_OBJS += util/svghelper.o
 LIB_OBJS += util/sort.o
 LIB_OBJS += util/hist.o
@@ -519,6 +518,7 @@ ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; e
 	BASIC_CFLAGS += -DNO_LIBPERL
 else
 	ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
+	LIB_OBJS += util/scripting-engines/trace-event-perl.o
 	LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o
 endif
 
@@ -893,8 +893,8 @@ util/hweight.o: ../../lib/hweight.c PERF-CFLAGS
 util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
-util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
 
 scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
@@ -1012,6 +1012,7 @@ install: all
 	$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
 	$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+
 ifdef BUILT_INS
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
similarity index 87%
rename from tools/perf/util/trace-event-perl.c
rename to tools/perf/util/scripting-engines/trace-event-perl.c
index 5b49df067df0..5376378e0cfc 100644
--- a/tools/perf/util/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -25,10 +25,16 @@
 #include <ctype.h>
 #include <errno.h>
 
-#include "../perf.h"
-#include "util.h"
-#include "trace-event.h"
-#include "trace-event-perl.h"
+#include "../../perf.h"
+#include "../util.h"
+#include "../trace-event.h"
+
+#include <EXTERN.h>
+#include <perl.h>
+
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
+typedef PerlInterpreter * INTERP;
 
 void xs_init(pTHX);
 
@@ -49,7 +55,7 @@ INTERP my_perl;
 
 struct event *events[FTRACE_MAX_EVENT];
 
-static struct scripting_context *scripting_context;
+extern struct scripting_context *scripting_context;
 
 static char *cur_field_name;
 static int zero_flag_atom;
@@ -560,75 +566,3 @@ struct scripting_ops perl_scripting_ops = {
 	.process_event = perl_process_event,
 	.generate_script = perl_generate_script,
 };
-
-static void print_unsupported_msg(void)
-{
-	fprintf(stderr, "Perl scripting not supported."
-		"  Install libperl and rebuild perf to enable it.\n"
-		"For example:\n  # apt-get install libperl-dev (ubuntu)"
-		"\n  # yum install perl-ExtUtils-Embed (Fedora)"
-		"\n  etc.\n");
-}
-
-static int perl_start_script_unsupported(const char *script __unused,
-					 int argc __unused,
-					 const char **argv __unused)
-{
-	print_unsupported_msg();
-
-	return -1;
-}
-
-static int perl_stop_script_unsupported(void)
-{
-	return 0;
-}
-
-static void perl_process_event_unsupported(int cpu __unused,
-					   void *data __unused,
-					   int size __unused,
-					   unsigned long long nsecs __unused,
-					   char *comm __unused)
-{
-}
-
-static int perl_generate_script_unsupported(const char *outfile __unused)
-{
-	print_unsupported_msg();
-
-	return -1;
-}
-
-struct scripting_ops perl_scripting_unsupported_ops = {
-	.name = "Perl",
-	.start_script = perl_start_script_unsupported,
-	.stop_script = perl_stop_script_unsupported,
-	.process_event = perl_process_event_unsupported,
-	.generate_script = perl_generate_script_unsupported,
-};
-
-static void register_perl_scripting(struct scripting_ops *scripting_ops)
-{
-	int err;
-	err = script_spec_register("Perl", scripting_ops);
-	if (err)
-		die("error registering Perl script extension");
-
-	err = script_spec_register("pl", scripting_ops);
-	if (err)
-		die("error registering pl script extension");
-
-	scripting_context = malloc(sizeof(struct scripting_context));
-}
-
-#ifdef NO_LIBPERL
-void setup_perl_scripting(void)
-{
-	register_perl_scripting(&perl_scripting_unsupported_ops);
-}
-#else
-void setup_perl_scripting(void)
-{
-	register_perl_scripting(&perl_scripting_ops);
-}
-#endif
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
deleted file mode 100644
index 01efcc9564fb..000000000000
--- a/tools/perf/util/trace-event-perl.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef __PERF_TRACE_EVENT_PERL_H
-#define __PERF_TRACE_EVENT_PERL_H
-#ifdef NO_LIBPERL
-typedef int INTERP;
-#define dSP
-#define ENTER
-#define SAVETMPS
-#define PUTBACK
-#define SPAGAIN
-#define FREETMPS
-#define LEAVE
-#define SP
-#define ERRSV
-#define G_SCALAR		(0)
-#define G_DISCARD		(0)
-#define G_NOARGS		(0)
-#define PUSHMARK(a)
-#define SvTRUE(a)		(0)
-#define XPUSHs(s)
-#define sv_2mortal(a)
-#define newSVpv(a,b)
-#define newSVuv(a)
-#define newSViv(a)
-#define get_cv(a,b)		(0)
-#define call_pv(a,b)		(0)
-#define perl_alloc()		(0)
-#define perl_construct(a)	(0)
-#define perl_parse(a,b,c,d,e)	(0)
-#define perl_run(a)		(0)
-#define perl_destruct(a)	(0)
-#define perl_free(a)		(0)
-#define pTHX			void
-#define CV			void
-#define dXSUB_SYS
-#define pTHX_
-static inline void newXS(const char *a, void *b, const char *c) {}
-static void boot_Perf__Trace__Context(pTHX_ CV *cv) {}
-static void boot_DynaLoader(pTHX_ CV *cv) {}
-#else
-#include <EXTERN.h>
-#include <perl.h>
-void boot_Perf__Trace__Context(pTHX_ CV *cv);
-void boot_DynaLoader(pTHX_ CV *cv);
-typedef PerlInterpreter * INTERP;
-#endif
-
-#endif /* __PERF_TRACE_EVENT_PERL_H */
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
new file mode 100644
index 000000000000..9e371965c034
--- /dev/null
+++ b/tools/perf/util/trace-event-scripting.c
@@ -0,0 +1,106 @@
+/*
+ * trace-event-scripting.  Scripting engine common and initialization code.
+ *
+ * Copyright (C) 2009-2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "trace-event.h"
+
+struct scripting_context *scripting_context;
+
+static int stop_script_unsupported(void)
+{
+	return 0;
+}
+
+static void process_event_unsupported(int cpu __unused,
+				      void *data __unused,
+				      int size __unused,
+				      unsigned long long nsecs __unused,
+				      char *comm __unused)
+{
+}
+
+static void print_perl_unsupported_msg(void)
+{
+	fprintf(stderr, "Perl scripting not supported."
+		"  Install libperl and rebuild perf to enable it.\n"
+		"For example:\n  # apt-get install libperl-dev (ubuntu)"
+		"\n  # yum install 'perl(ExtUtils::Embed)' (Fedora)"
+		"\n  etc.\n");
+}
+
+static int perl_start_script_unsupported(const char *script __unused,
+					 int argc __unused,
+					 const char **argv __unused)
+{
+	print_perl_unsupported_msg();
+
+	return -1;
+}
+
+static int perl_generate_script_unsupported(const char *outfile __unused)
+{
+	print_perl_unsupported_msg();
+
+	return -1;
+}
+
+struct scripting_ops perl_scripting_unsupported_ops = {
+	.name = "Perl",
+	.start_script = perl_start_script_unsupported,
+	.stop_script = stop_script_unsupported,
+	.process_event = process_event_unsupported,
+	.generate_script = perl_generate_script_unsupported,
+};
+
+static void register_perl_scripting(struct scripting_ops *scripting_ops)
+{
+	int err;
+	err = script_spec_register("Perl", scripting_ops);
+	if (err)
+		die("error registering Perl script extension");
+
+	err = script_spec_register("pl", scripting_ops);
+	if (err)
+		die("error registering pl script extension");
+
+	scripting_context = malloc(sizeof(struct scripting_context));
+}
+
+#ifdef NO_LIBPERL
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_unsupported_ops);
+}
+#else
+struct scripting_ops perl_scripting_ops;
+
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_ops);
+}
+#endif

From 266fe2f217d1dc9f8041e395c0ab4569a5bad91a Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:27:56 -0600
Subject: [PATCH 592/640] perf/scripts: Remove check-perf-trace from listed
 scripts

The check-perf-trace script only checks Perl functionality, and
doesn't really need to be listed as as user script anyway.

This only removes the '-report' shell script, so although it doesn't
appear in the listing, the '-record' shell script and the check perf
trace perl script itself is still available and can still be run
manually as such:

$ libexec/perf-core/scripts/perl/bin/check-perf-trace-record
$ perf trace -s libexec/perf-core/scripts/perl/check-perf-trace.pl

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-6-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 tools/perf/scripts/perl/bin/check-perf-trace-record | 5 -----
 tools/perf/scripts/perl/bin/check-perf-trace-report | 6 ------
 2 files changed, 11 deletions(-)
 delete mode 100644 tools/perf/scripts/perl/bin/check-perf-trace-report

diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record
index c7ec5de2f535..3c1574498942 100644
--- a/tools/perf/scripts/perl/bin/check-perf-trace-record
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-record
@@ -1,7 +1,2 @@
 #!/bin/bash
 perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry
-
-
-
-
-
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report
deleted file mode 100644
index 7fc4a033dd49..000000000000
--- a/tools/perf/scripts/perl/bin/check-perf-trace-report
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-# description: useless but exhaustive test script
-perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl
-
-
-

From f7624c97b8e5bca49be7854309550bff8ce98c47 Mon Sep 17 00:00:00 2001
From: Hedi Berriche <hedi@sgi.com>
Date: Tue, 23 Feb 2010 23:58:49 +0000
Subject: [PATCH 593/640] [IA64] Fix broken sn2 build

Revert the change made to arch/ia64/sn/kernel/setup.c by commit
204fba4aa303ea4a7bb726a539bf4a5b9e3203d0 as it breaks the build.

Fixing the build the b94b08081fcecf83fa690d6c5664f6316fe72208 way
breaks xpc because genksyms then fails to generate an CRC for
per_cpu____sn_cnodeid_to_nasid because of limitations in the
generic genksyms code.

Signed-off-by: Hedi Berriche <hedi@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index ece1bf994499..e456f062f241 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
 EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
 
-DEFINE_PER_CPU(short [MAX_COMPACT_NODES], __sn_cnodeid_to_nasid);
+DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
 EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
 
 DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);

From c4d49794ff2838038fd9756eae39c39a5a685833 Mon Sep 17 00:00:00 2001
From: Ajit Khaparde <ajitkhaparde@gmail.com>
Date: Tue, 16 Feb 2010 20:25:43 +0000
Subject: [PATCH 594/640] net: bug fix for vlan + gro issue

Traffic (tcp) doesnot start on a vlan interface when gro is enabled.
Even the tcp handshake was not taking place.
This is because, the eth_type_trans call before the netif_receive_skb
in napi_gro_finish() resets the skb->dev to napi->dev from the previously
set vlan netdev interface. This causes the ip_route_input to drop the
incoming packet considering it as a packet coming from a martian source.

I could repro this on 2.6.32.7 (stable) and 2.6.33-rc7.
With this fix, the traffic starts and the test runs fine on both vlan
and non-vlan interfaces.

CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index be9924f60ec3..ec874218b206 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2761,7 +2761,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 	switch (ret) {
 	case GRO_NORMAL:
 	case GRO_HELD:
-		skb->protocol = eth_type_trans(skb, napi->dev);
+		skb->protocol = eth_type_trans(skb, skb->dev);
 
 		if (ret == GRO_HELD)
 			skb_gro_pull(skb, -ETH_HLEN);

From 0d670b24729be268eba98b3920b8571f60798d8d Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Mon, 15 Feb 2010 10:50:42 +0100
Subject: [PATCH 595/640] microblaze: Fix cache loop function for cache range

I create wrong asm code but none test shows that this part of code is wrong.
I am not convinces that were good idea to create asm optimized macros
for caches. The reason is that there is not optimization with previous code
that's why make sense to add old code and do some benchmarking which
functions are faster.

Signed-off-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/kernel/cpu/cache.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c
index d9d63831cc2f..2a56bccce4e0 100644
--- a/arch/microblaze/kernel/cpu/cache.c
+++ b/arch/microblaze/kernel/cpu/cache.c
@@ -172,16 +172,15 @@ do {									\
 /* It is used only first parameter for OP - for wic, wdc */
 #define CACHE_RANGE_LOOP_1(start, end, line_length, op)			\
 do {									\
-	int step = -line_length;					\
-	int count = end - start;					\
-	BUG_ON(count <= 0);						\
+	int volatile temp;						\
+	BUG_ON(end - start <= 0);					\
 									\
-	__asm__ __volatile__ (" 1:	addk	%0, %0, %1;		\
-					" #op " %0, r0;			\
-					bgtid   %1, 1b;			\
-					addk    %1, %1, %2;		\
-					" : : "r" (start), "r" (count),	\
-					"r" (step) : "memory");		\
+	__asm__ __volatile__ (" 1:	" #op " %1, r0;			\
+					cmpu	%0, %1, %2;		\
+					bgtid	%0, 1b;			\
+					addk	%1, %1, %3;		\
+				" : : "r" (temp), "r" (start), "r" (end),\
+					"r" (line_length) : "memory");	\
 } while (0);
 
 static void __flush_icache_range_msr_irq(unsigned long start, unsigned long end)
@@ -313,16 +312,6 @@ static void __invalidate_dcache_all_wb(void)
 	pr_debug("%s\n", __func__);
 	CACHE_ALL_LOOP2(cpuinfo.dcache_size, cpuinfo.dcache_line_length,
 					wdc.clear)
-
-#if 0
-	unsigned int i;
-
-	pr_debug("%s\n", __func__);
-
-	/* Just loop through cache size and invalidate it */
-	for (i = 0; i < cpuinfo.dcache_size; i += cpuinfo.dcache_line_length)
-			__invalidate_dcache(0, i);
-#endif
 }
 
 static void __invalidate_dcache_range_wb(unsigned long start,

From 83b4d17d8841a9a7b8ed02ac99ca92afada154e1 Mon Sep 17 00:00:00 2001
From: "Steven J. Magnani" <steve@digidescorp.com>
Date: Mon, 22 Feb 2010 09:25:42 -0600
Subject: [PATCH 596/640] microblaze: Fix out_le32() macro

Trailing semicolon causes compilation involving out_le32() to fail.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Signed-off-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/include/asm/io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index fc9997b73c09..267c7c779e53 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -217,7 +217,7 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size,
  * Little endian
  */
 
-#define out_le32(a, v) __raw_writel(__cpu_to_le32(v), (a));
+#define out_le32(a, v) __raw_writel(__cpu_to_le32(v), (a))
 #define out_le16(a, v) __raw_writew(__cpu_to_le16(v), (a))
 
 #define in_le32(a) __le32_to_cpu(__raw_readl(a))

From 5fd4514bb351b5ecb0da3692fff70741e5ed200c Mon Sep 17 00:00:00 2001
From: Carlos O'Donell <carlos@codesourcery.com>
Date: Mon, 22 Feb 2010 23:25:59 +0000
Subject: [PATCH 597/640] parisc: Set PCI CLS early in boot.

Set the PCI CLS early in the boot process to prevent
device failures. In pcibios_set_master use the new
pci_cache_line_size instead of a hard-coded value.

Signed-off-by: Carlos O'Donell <carlos@codesourcery.com>
Reviewed-by: Grant Grundler <grundler@google.com>
Signed-off-by: Kyle McMartin <kyle@redhat.com>
---
 arch/parisc/kernel/pci.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index f7064abc3bb6..9e74bfe071dc 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -18,7 +18,6 @@
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/cache.h>		/* for L1_CACHE_BYTES */
 #include <asm/superio.h>
 
 #define DEBUG_RESOURCES 0
@@ -123,6 +122,10 @@ static int __init pcibios_init(void)
 	} else {
 		printk(KERN_WARNING "pci_bios != NULL but init() is!\n");
 	}
+
+	/* Set the CLS for PCI as early as possible. */
+	pci_cache_line_size = pci_dfl_cache_line_size;
+
 	return 0;
 }
 
@@ -171,7 +174,7 @@ void pcibios_set_master(struct pci_dev *dev)
 	** upper byte is PCI_LATENCY_TIMER.
 	*/
 	pci_write_config_word(dev, PCI_CACHE_LINE_SIZE,
-				(0x80 << 8) | (L1_CACHE_BYTES / sizeof(u32)));
+			      (0x80 << 8) | pci_cache_line_size);
 }
 
 
From 60b341b778cc2929df16c0a504c91621b3c6a4ad Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 24 Feb 2010 10:52:17 -0800
Subject: [PATCH 598/640] Linux 2.6.33

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 12b1aa1103ee..1b24895212d8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 33
-EXTRAVERSION = -rc8
+EXTRAVERSION =
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*

From 7e4b21b84c43bb8a80b916e40718ca4ed1fc52e6 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:27:57 -0600
Subject: [PATCH 599/640] perf/scripts: Add Python scripting engine

Add base support for Python scripting to perf trace.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-6-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 tools/perf/Makefile                           |  21 +
 tools/perf/builtin-trace.c                    |   1 +
 .../scripts/python/Perf-Trace-Util/Context.c  |  88 +++
 .../Perf-Trace-Util/lib/Perf/Trace/Core.py    |  91 +++
 .../Perf-Trace-Util/lib/Perf/Trace/Util.py    |  25 +
 .../scripting-engines/trace-event-python.c    | 576 ++++++++++++++++++
 tools/perf/util/trace-event-scripting.c       |  61 ++
 tools/perf/util/trace-event.h                 |   1 +
 8 files changed, 864 insertions(+)
 create mode 100644 tools/perf/scripts/python/Perf-Trace-Util/Context.c
 create mode 100644 tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
 create mode 100644 tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
 create mode 100644 tools/perf/util/scripting-engines/trace-event-python.c

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0a3c0c8b3fc0..14273164db04 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -522,6 +522,19 @@ else
 	LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o
 endif
 
+ifndef NO_LIBPYTHON
+PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null`
+PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
+endif
+
+ifneq ($(shell sh -c "(echo '\#include <Python.h>'; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o /dev/null $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
+	BASIC_CFLAGS += -DNO_LIBPYTHON
+else
+	ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS)
+	LIB_OBJS += util/scripting-engines/trace-event-python.o
+	LIB_OBJS += scripts/python/Perf-Trace-Util/Context.o
+endif
+
 ifdef NO_DEMANGLE
 	BASIC_CFLAGS += -DNO_DEMANGLE
 else
@@ -899,6 +912,12 @@ util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-pe
 scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
 
+util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-python.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+
+scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o scripts/python/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+
 perf-%$X: %.o $(PERFLIBS)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
@@ -1012,6 +1031,8 @@ install: all
 	$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
 	$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
+	$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
 
 ifdef BUILT_INS
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d5d20c34e221..5db687fc13de 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -44,6 +44,7 @@ static void setup_scripting(void)
 	perf_set_argv_exec_path(perf_exec_path());
 
 	setup_perl_scripting();
+	setup_python_scripting();
 
 	scripting_ops = &default_scripting_ops;
 }
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
new file mode 100644
index 000000000000..957085dd5d8d
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -0,0 +1,88 @@
+/*
+ * Context.c.  Python interfaces for perf trace.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <Python.h>
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
+
+PyMODINIT_FUNC initperf_trace_context(void);
+
+static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args)
+{
+	static struct scripting_context *scripting_context;
+	PyObject *context;
+	int retval;
+
+	if (!PyArg_ParseTuple(args, "O", &context))
+		return NULL;
+
+	scripting_context = PyCObject_AsVoidPtr(context);
+	retval = common_pc(scripting_context);
+
+	return Py_BuildValue("i", retval);
+}
+
+static PyObject *perf_trace_context_common_flags(PyObject *self,
+						 PyObject *args)
+{
+	static struct scripting_context *scripting_context;
+	PyObject *context;
+	int retval;
+
+	if (!PyArg_ParseTuple(args, "O", &context))
+		return NULL;
+
+	scripting_context = PyCObject_AsVoidPtr(context);
+	retval = common_flags(scripting_context);
+
+	return Py_BuildValue("i", retval);
+}
+
+static PyObject *perf_trace_context_common_lock_depth(PyObject *self,
+						      PyObject *args)
+{
+	static struct scripting_context *scripting_context;
+	PyObject *context;
+	int retval;
+
+	if (!PyArg_ParseTuple(args, "O", &context))
+		return NULL;
+
+	scripting_context = PyCObject_AsVoidPtr(context);
+	retval = common_lock_depth(scripting_context);
+
+	return Py_BuildValue("i", retval);
+}
+
+static PyMethodDef ContextMethods[] = {
+	{ "common_pc", perf_trace_context_common_pc, METH_VARARGS,
+	  "Get the common preempt count event field value."},
+	{ "common_flags", perf_trace_context_common_flags, METH_VARARGS,
+	  "Get the common flags event field value."},
+	{ "common_lock_depth", perf_trace_context_common_lock_depth,
+	  METH_VARARGS,	"Get the common lock depth event field value."},
+	{ NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC initperf_trace_context(void)
+{
+	(void) Py_InitModule("perf_trace_context", ContextMethods);
+}
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
new file mode 100644
index 000000000000..1dc464ee2ca8
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -0,0 +1,91 @@
+# Core.py - Python extension for perf trace, core functions
+#
+# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
+#
+# This software may be distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+from collections import defaultdict
+
+def autodict():
+    return defaultdict(autodict)
+
+flag_fields = autodict()
+symbolic_fields = autodict()
+
+def define_flag_field(event_name, field_name, delim):
+    flag_fields[event_name][field_name]['delim'] = delim
+
+def define_flag_value(event_name, field_name, value, field_str):
+    flag_fields[event_name][field_name]['values'][value] = field_str
+
+def define_symbolic_field(event_name, field_name):
+    # nothing to do, really
+    pass
+
+def define_symbolic_value(event_name, field_name, value, field_str):
+    symbolic_fields[event_name][field_name]['values'][value] = field_str
+
+def flag_str(event_name, field_name, value):
+    string = ""
+
+    if flag_fields[event_name][field_name]:
+	print_delim = 0
+        keys = flag_fields[event_name][field_name]['values'].keys()
+        keys.sort()
+        for idx in keys:
+            if not value and not idx:
+                string += flag_fields[event_name][field_name]['values'][idx]
+                break
+            if idx and (value & idx) == idx:
+                if print_delim and flag_fields[event_name][field_name]['delim']:
+                    string += " " + flag_fields[event_name][field_name]['delim'] + " "
+                string += flag_fields[event_name][field_name]['values'][idx]
+                print_delim = 1
+                value &= ~idx
+
+    return string
+
+def symbol_str(event_name, field_name, value):
+    string = ""
+
+    if symbolic_fields[event_name][field_name]:
+        keys = symbolic_fields[event_name][field_name]['values'].keys()
+        keys.sort()
+        for idx in keys:
+            if not value and not idx:
+		string = symbolic_fields[event_name][field_name]['values'][idx]
+                break
+	    if (value == idx):
+		string = symbolic_fields[event_name][field_name]['values'][idx]
+                break
+
+    return string
+
+trace_flags = { 0x00: "NONE", \
+                    0x01: "IRQS_OFF", \
+                    0x02: "IRQS_NOSUPPORT", \
+                    0x04: "NEED_RESCHED", \
+                    0x08: "HARDIRQ", \
+                    0x10: "SOFTIRQ" }
+
+def trace_flag_str(value):
+    string = ""
+    print_delim = 0
+
+    keys = trace_flags.keys()
+
+    for idx in keys:
+	if not value and not idx:
+	    string += "NONE"
+	    break
+
+	if idx and (value & idx) == idx:
+	    if print_delim:
+		string += " | ";
+	    string += trace_flags[idx]
+	    print_delim = 1
+	    value &= ~idx
+
+    return string
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
new file mode 100644
index 000000000000..83e91435ed09
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -0,0 +1,25 @@
+# Util.py - Python extension for perf trace, miscellaneous utility code
+#
+# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
+#
+# This software may be distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+NSECS_PER_SEC    = 1000000000
+
+def avg(total, n):
+    return total / n
+
+def nsecs(secs, nsecs):
+    return secs * NSECS_PER_SEC + nsecs
+
+def nsecs_secs(nsecs):
+    return nsecs / NSECS_PER_SEC
+
+def nsecs_nsecs(nsecs):
+    return nsecs % NSECS_PER_SEC
+
+def nsecs_str(nsecs):
+    str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
+    return str
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
new file mode 100644
index 000000000000..d402f64f9b46
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -0,0 +1,576 @@
+/*
+ * trace-event-python.  Feed trace events to an embedded Python interpreter.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <Python.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "../../perf.h"
+#include "../util.h"
+#include "../trace-event.h"
+
+PyMODINIT_FUNC initperf_trace_context(void);
+
+#define FTRACE_MAX_EVENT				\
+	((1 << (sizeof(unsigned short) * 8)) - 1)
+
+struct event *events[FTRACE_MAX_EVENT];
+
+#define MAX_FIELDS	64
+#define N_COMMON_FIELDS	7
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static PyObject *main_module, *main_dict;
+
+static void handler_call_die(const char *handler_name)
+{
+	PyErr_Print();
+	Py_FatalError("problem in Python trace event handler");
+}
+
+static void define_value(enum print_arg_type field_type,
+			 const char *ev_name,
+			 const char *field_name,
+			 const char *field_value,
+			 const char *field_str)
+{
+	const char *handler_name = "define_flag_value";
+	PyObject *handler, *t, *retval;
+	unsigned long long value;
+	unsigned n = 0;
+
+	if (field_type == PRINT_SYMBOL)
+		handler_name = "define_symbolic_value";
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	value = eval_flag(field_value);
+
+	PyTuple_SetItem(t, n++, PyString_FromString(ev_name));
+	PyTuple_SetItem(t, n++, PyString_FromString(field_name));
+	PyTuple_SetItem(t, n++, PyInt_FromLong(value));
+	PyTuple_SetItem(t, n++, PyString_FromString(field_str));
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	handler = PyDict_GetItemString(main_dict, handler_name);
+	if (handler && PyCallable_Check(handler)) {
+		retval = PyObject_CallObject(handler, t);
+		if (retval == NULL)
+			handler_call_die(handler_name);
+	}
+
+	Py_DECREF(t);
+}
+
+static void define_values(enum print_arg_type field_type,
+			  struct print_flag_sym *field,
+			  const char *ev_name,
+			  const char *field_name)
+{
+	define_value(field_type, ev_name, field_name, field->value,
+		     field->str);
+
+	if (field->next)
+		define_values(field_type, field->next, ev_name, field_name);
+}
+
+static void define_field(enum print_arg_type field_type,
+			 const char *ev_name,
+			 const char *field_name,
+			 const char *delim)
+{
+	const char *handler_name = "define_flag_field";
+	PyObject *handler, *t, *retval;
+	unsigned n = 0;
+
+	if (field_type == PRINT_SYMBOL)
+		handler_name = "define_symbolic_field";
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	PyTuple_SetItem(t, n++, PyString_FromString(ev_name));
+	PyTuple_SetItem(t, n++, PyString_FromString(field_name));
+	if (field_type == PRINT_FLAGS)
+		PyTuple_SetItem(t, n++, PyString_FromString(delim));
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	handler = PyDict_GetItemString(main_dict, handler_name);
+	if (handler && PyCallable_Check(handler)) {
+		retval = PyObject_CallObject(handler, t);
+		if (retval == NULL)
+			handler_call_die(handler_name);
+	}
+
+	Py_DECREF(t);
+}
+
+static void define_event_symbols(struct event *event,
+				 const char *ev_name,
+				 struct print_arg *args)
+{
+	switch (args->type) {
+	case PRINT_NULL:
+		break;
+	case PRINT_ATOM:
+		define_value(PRINT_FLAGS, ev_name, cur_field_name, "0",
+			     args->atom.atom);
+		zero_flag_atom = 0;
+		break;
+	case PRINT_FIELD:
+		if (cur_field_name)
+			free(cur_field_name);
+		cur_field_name = strdup(args->field.name);
+		break;
+	case PRINT_FLAGS:
+		define_event_symbols(event, ev_name, args->flags.field);
+		define_field(PRINT_FLAGS, ev_name, cur_field_name,
+			     args->flags.delim);
+		define_values(PRINT_FLAGS, args->flags.flags, ev_name,
+			      cur_field_name);
+		break;
+	case PRINT_SYMBOL:
+		define_event_symbols(event, ev_name, args->symbol.field);
+		define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL);
+		define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,
+			      cur_field_name);
+		break;
+	case PRINT_STRING:
+		break;
+	case PRINT_TYPE:
+		define_event_symbols(event, ev_name, args->typecast.item);
+		break;
+	case PRINT_OP:
+		if (strcmp(args->op.op, ":") == 0)
+			zero_flag_atom = 1;
+		define_event_symbols(event, ev_name, args->op.left);
+		define_event_symbols(event, ev_name, args->op.right);
+		break;
+	default:
+		/* we should warn... */
+		return;
+	}
+
+	if (args->next)
+		define_event_symbols(event, ev_name, args->next);
+}
+
+static inline struct event *find_cache_event(int type)
+{
+	static char ev_name[256];
+	struct event *event;
+
+	if (events[type])
+		return events[type];
+
+	events[type] = event = trace_find_event(type);
+	if (!event)
+		return NULL;
+
+	sprintf(ev_name, "%s__%s", event->system, event->name);
+
+	define_event_symbols(event, ev_name, event->print_fmt.args);
+
+	return event;
+}
+
+static void python_process_event(int cpu, void *data,
+				 int size __unused,
+				 unsigned long long nsecs, char *comm)
+{
+	PyObject *handler, *retval, *context, *t;
+	static char handler_name[256];
+	struct format_field *field;
+	unsigned long long val;
+	unsigned long s, ns;
+	struct event *event;
+	unsigned n = 0;
+	int type;
+	int pid;
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	type = trace_parse_common_type(data);
+
+	event = find_cache_event(type);
+	if (!event)
+		die("ug! no event found for type %d", type);
+
+	pid = trace_parse_common_pid(data);
+
+	sprintf(handler_name, "%s__%s", event->system, event->name);
+
+	s = nsecs / NSECS_PER_SEC;
+	ns = nsecs - s * NSECS_PER_SEC;
+
+	scripting_context->event_data = data;
+
+	context = PyCObject_FromVoidPtr(scripting_context, NULL);
+
+	PyTuple_SetItem(t, n++, PyString_FromString(handler_name));
+	PyTuple_SetItem(t, n++,
+			PyCObject_FromVoidPtr(scripting_context, NULL));
+	PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
+	PyTuple_SetItem(t, n++, PyInt_FromLong(s));
+	PyTuple_SetItem(t, n++, PyInt_FromLong(ns));
+	PyTuple_SetItem(t, n++, PyInt_FromLong(pid));
+	PyTuple_SetItem(t, n++, PyString_FromString(comm));
+
+	for (field = event->format.fields; field; field = field->next) {
+		if (field->flags & FIELD_IS_STRING) {
+			int offset;
+			if (field->flags & FIELD_IS_DYNAMIC) {
+				offset = *(int *)(data + field->offset);
+				offset &= 0xffff;
+			} else
+				offset = field->offset;
+			PyTuple_SetItem(t, n++,
+				PyString_FromString((char *)data + offset));
+		} else { /* FIELD_IS_NUMERIC */
+			val = read_size(data + field->offset, field->size);
+			if (field->flags & FIELD_IS_SIGNED) {
+				PyTuple_SetItem(t, n++, PyInt_FromLong(val));
+			} else {
+				PyTuple_SetItem(t, n++, PyInt_FromLong(val));
+			}
+		}
+	}
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	handler = PyDict_GetItemString(main_dict, handler_name);
+	if (handler && PyCallable_Check(handler)) {
+		retval = PyObject_CallObject(handler, t);
+		if (retval == NULL)
+			handler_call_die(handler_name);
+	} else {
+		handler = PyDict_GetItemString(main_dict, "trace_unhandled");
+		if (handler && PyCallable_Check(handler)) {
+			if (_PyTuple_Resize(&t, N_COMMON_FIELDS) == -1)
+				Py_FatalError("error resizing Python tuple");
+
+			retval = PyObject_CallObject(handler, t);
+			if (retval == NULL)
+				handler_call_die("trace_unhandled");
+		}
+	}
+
+	Py_DECREF(t);
+}
+
+static int run_start_sub(void)
+{
+	PyObject *handler, *retval;
+	int err = 0;
+
+	main_module = PyImport_AddModule("__main__");
+	if (main_module == NULL)
+		return -1;
+	Py_INCREF(main_module);
+
+	main_dict = PyModule_GetDict(main_module);
+	if (main_dict == NULL) {
+		err = -1;
+		goto error;
+	}
+	Py_INCREF(main_dict);
+
+	handler = PyDict_GetItemString(main_dict, "trace_begin");
+	if (handler == NULL || !PyCallable_Check(handler))
+		goto out;
+
+	retval = PyObject_CallObject(handler, NULL);
+	if (retval == NULL)
+		handler_call_die("trace_begin");
+
+	Py_DECREF(retval);
+	return err;
+error:
+	Py_XDECREF(main_dict);
+	Py_XDECREF(main_module);
+out:
+	return err;
+}
+
+/*
+ * Start trace script
+ */
+static int python_start_script(const char *script, int argc, const char **argv)
+{
+	const char **command_line;
+	char buf[PATH_MAX];
+	int i, err = 0;
+	FILE *fp;
+
+	command_line = malloc((argc + 1) * sizeof(const char *));
+	command_line[0] = script;
+	for (i = 1; i < argc + 1; i++)
+		command_line[i] = argv[i - 1];
+
+	Py_Initialize();
+
+	initperf_trace_context();
+
+	PySys_SetArgv(argc + 1, (char **)command_line);
+
+	fp = fopen(script, "r");
+	if (!fp) {
+		sprintf(buf, "Can't open python script \"%s\"", script);
+		perror(buf);
+		err = -1;
+		goto error;
+	}
+
+	err = PyRun_SimpleFile(fp, script);
+	if (err) {
+		fprintf(stderr, "Error running python script %s\n", script);
+		goto error;
+	}
+
+	err = run_start_sub();
+	if (err) {
+		fprintf(stderr, "Error starting python script %s\n", script);
+		goto error;
+	}
+
+	free(command_line);
+	fprintf(stderr, "perf trace started with Python script %s\n\n",
+		script);
+
+	return err;
+error:
+	Py_Finalize();
+	free(command_line);
+
+	return err;
+}
+
+/*
+ * Stop trace script
+ */
+static int python_stop_script(void)
+{
+	PyObject *handler, *retval;
+	int err = 0;
+
+	handler = PyDict_GetItemString(main_dict, "trace_end");
+	if (handler == NULL || !PyCallable_Check(handler))
+		goto out;
+
+	retval = PyObject_CallObject(handler, NULL);
+	if (retval == NULL)
+		handler_call_die("trace_end");
+	else
+		Py_DECREF(retval);
+out:
+	Py_XDECREF(main_dict);
+	Py_XDECREF(main_module);
+	Py_Finalize();
+
+	fprintf(stderr, "\nperf trace Python script stopped\n");
+
+	return err;
+}
+
+static int python_generate_script(const char *outfile)
+{
+	struct event *event = NULL;
+	struct format_field *f;
+	char fname[PATH_MAX];
+	int not_first, count;
+	FILE *ofp;
+
+	sprintf(fname, "%s.py", outfile);
+	ofp = fopen(fname, "w");
+	if (ofp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", fname);
+		return -1;
+	}
+	fprintf(ofp, "# perf trace event handlers, "
+		"generated by perf trace -g python\n");
+
+	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+		" License version 2\n\n");
+
+	fprintf(ofp, "# The common_* event handler fields are the most useful "
+		"fields common to\n");
+
+	fprintf(ofp, "# all events.  They don't necessarily correspond to "
+		"the 'common_*' fields\n");
+
+	fprintf(ofp, "# in the format files.  Those fields not available as "
+		"handler params can\n");
+
+	fprintf(ofp, "# be retrieved using Python functions of the form "
+		"common_*(context).\n");
+
+	fprintf(ofp, "# See the perf-trace-python Documentation for the list "
+		"of available functions.\n\n");
+
+	fprintf(ofp, "import os\n");
+	fprintf(ofp, "import sys\n\n");
+
+	fprintf(ofp, "sys.path.append(os.environ['PERF_EXEC_PATH'] + \\\n");
+	fprintf(ofp, "\t'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')\n");
+	fprintf(ofp, "\nfrom perf_trace_context import *\n");
+	fprintf(ofp, "from Core import *\n\n\n");
+
+	fprintf(ofp, "def trace_begin():\n");
+	fprintf(ofp, "\tprint \"in trace_begin\"\n\n");
+
+	fprintf(ofp, "def trace_end():\n");
+	fprintf(ofp, "\tprint \"in trace_end\"\n\n");
+
+	while ((event = trace_find_next_event(event))) {
+		fprintf(ofp, "def %s__%s(", event->system, event->name);
+		fprintf(ofp, "event_name, ");
+		fprintf(ofp, "context, ");
+		fprintf(ofp, "common_cpu,\n");
+		fprintf(ofp, "\tcommon_secs, ");
+		fprintf(ofp, "common_nsecs, ");
+		fprintf(ofp, "common_pid, ");
+		fprintf(ofp, "common_comm,\n\t");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t");
+
+			fprintf(ofp, "%s", f->name);
+		}
+		fprintf(ofp, "):\n");
+
+		fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
+			"common_secs, common_nsecs,\n\t\t\t"
+			"common_pid, common_comm)\n\n");
+
+		fprintf(ofp, "\t\tprint \"");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (count && count % 3 == 0) {
+				fprintf(ofp, "\" \\\n\t\t\"");
+			}
+			count++;
+
+			fprintf(ofp, "%s=", f->name);
+			if (f->flags & FIELD_IS_STRING ||
+			    f->flags & FIELD_IS_FLAG ||
+			    f->flags & FIELD_IS_SYMBOLIC)
+				fprintf(ofp, "%%s");
+			else if (f->flags & FIELD_IS_SIGNED)
+				fprintf(ofp, "%%d");
+			else
+				fprintf(ofp, "%%u");
+		}
+
+		fprintf(ofp, "\\n\" %% \\\n\t\t(");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t\t");
+
+			if (f->flags & FIELD_IS_FLAG) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t\t");
+					count = 4;
+				}
+				fprintf(ofp, "flag_str(\"");
+				fprintf(ofp, "%s__%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", %s)", f->name,
+					f->name);
+			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t\t");
+					count = 4;
+				}
+				fprintf(ofp, "symbol_str(\"");
+				fprintf(ofp, "%s__%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", %s)", f->name,
+					f->name);
+			} else
+				fprintf(ofp, "%s", f->name);
+		}
+
+		fprintf(ofp, "),\n\n");
+	}
+
+	fprintf(ofp, "def trace_unhandled(event_name, context, "
+		"common_cpu, common_secs, common_nsecs,\n\t\t"
+		"common_pid, common_comm):\n");
+
+	fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
+		"common_secs, common_nsecs,\n\t\tcommon_pid, "
+		"common_comm)\n\n");
+
+	fprintf(ofp, "def print_header("
+		"event_name, cpu, secs, nsecs, pid, comm):\n"
+		"\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+		"(event_name, cpu, secs, nsecs, pid, comm),\n");
+
+	fclose(ofp);
+
+	fprintf(stderr, "generated Python script: %s\n", fname);
+
+	return 0;
+}
+
+struct scripting_ops python_scripting_ops = {
+	.name = "Python",
+	.start_script = python_start_script,
+	.stop_script = python_stop_script,
+	.process_event = python_process_event,
+	.generate_script = python_generate_script,
+};
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 9e371965c034..7ea983acfaea 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -44,6 +44,67 @@ static void process_event_unsupported(int cpu __unused,
 {
 }
 
+static void print_python_unsupported_msg(void)
+{
+	fprintf(stderr, "Python scripting not supported."
+		"  Install libpython and rebuild perf to enable it.\n"
+		"For example:\n  # apt-get install python-dev (ubuntu)"
+		"\n  # yum install python-devel (Fedora)"
+		"\n  etc.\n");
+}
+
+static int python_start_script_unsupported(const char *script __unused,
+					   int argc __unused,
+					   const char **argv __unused)
+{
+	print_python_unsupported_msg();
+
+	return -1;
+}
+
+static int python_generate_script_unsupported(const char *outfile __unused)
+{
+	print_python_unsupported_msg();
+
+	return -1;
+}
+
+struct scripting_ops python_scripting_unsupported_ops = {
+	.name = "Python",
+	.start_script = python_start_script_unsupported,
+	.stop_script = stop_script_unsupported,
+	.process_event = process_event_unsupported,
+	.generate_script = python_generate_script_unsupported,
+};
+
+static void register_python_scripting(struct scripting_ops *scripting_ops)
+{
+	int err;
+	err = script_spec_register("Python", scripting_ops);
+	if (err)
+		die("error registering Python script extension");
+
+	err = script_spec_register("py", scripting_ops);
+	if (err)
+		die("error registering py script extension");
+
+	scripting_context = malloc(sizeof(struct scripting_context));
+}
+
+#ifdef NO_LIBPYTHON
+void setup_python_scripting(void)
+{
+	register_python_scripting(&python_scripting_unsupported_ops);
+}
+#else
+struct scripting_ops python_scripting_ops;
+
+void setup_python_scripting(void)
+{
+	register_python_scripting(&python_scripting_ops);
+}
+#endif
+
 static void print_perl_unsupported_msg(void)
 {
 	fprintf(stderr, "Perl scripting not supported."
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index aaf2da2d21e5..c3269b937db4 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -280,6 +280,7 @@ struct scripting_ops {
 int script_spec_register(const char *spec, struct scripting_ops *ops);
 
 void setup_perl_scripting(void);
+void setup_python_scripting(void);
 
 struct scripting_context {
 	void *event_data;

From 4d161f0360d00d46a89827b3fd6da395f00c5d90 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:27:58 -0600
Subject: [PATCH 600/640] perf/scripts: Add syscall tracing scripts

Adds a set of scripts that aggregate system call totals and system
call errors.  Most are Python scripts that also test basic
functionality of the new Python engine, but there's also one Perl
script added for comparison and for reference in some new
Documentation contained in a later patch.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-8-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 tools/perf/Makefile                           |  3 +
 .../scripts/perl/bin/check-perf-trace-record  |  2 +-
 .../scripts/perl/bin/failed-syscalls-record   |  2 +
 .../scripts/perl/bin/failed-syscalls-report   |  4 +
 tools/perf/scripts/perl/failed-syscalls.pl    | 38 +++++++++
 .../python/bin/failed-syscalls-by-pid-record  |  2 +
 .../python/bin/failed-syscalls-by-pid-report  |  4 +
 .../python/bin/syscall-counts-by-pid-record   |  2 +
 .../python/bin/syscall-counts-by-pid-report   |  4 +
 .../scripts/python/bin/syscall-counts-record  |  2 +
 .../scripts/python/bin/syscall-counts-report  |  4 +
 tools/perf/scripts/python/check-perf-trace.py | 83 +++++++++++++++++++
 .../scripts/python/failed-syscalls-by-pid.py  | 68 +++++++++++++++
 .../scripts/python/syscall-counts-by-pid.py   | 64 ++++++++++++++
 tools/perf/scripts/python/syscall-counts.py   | 58 +++++++++++++
 15 files changed, 339 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/scripts/perl/bin/failed-syscalls-record
 create mode 100644 tools/perf/scripts/perl/bin/failed-syscalls-report
 create mode 100644 tools/perf/scripts/perl/failed-syscalls.pl
 create mode 100644 tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
 create mode 100644 tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
 create mode 100644 tools/perf/scripts/python/bin/syscall-counts-by-pid-record
 create mode 100644 tools/perf/scripts/python/bin/syscall-counts-by-pid-report
 create mode 100644 tools/perf/scripts/python/bin/syscall-counts-record
 create mode 100644 tools/perf/scripts/python/bin/syscall-counts-report
 create mode 100644 tools/perf/scripts/python/check-perf-trace.py
 create mode 100644 tools/perf/scripts/python/failed-syscalls-by-pid.py
 create mode 100644 tools/perf/scripts/python/syscall-counts-by-pid.py
 create mode 100644 tools/perf/scripts/python/syscall-counts.py

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 14273164db04..54a5b50ff312 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -1032,7 +1032,10 @@ install: all
 	$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
 	$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
+	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
 	$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
+	$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
+	$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
 
 ifdef BUILT_INS
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record
index 3c1574498942..e6cb1474f8e8 100644
--- a/tools/perf/scripts/perl/bin/check-perf-trace-record
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-record
@@ -1,2 +1,2 @@
 #!/bin/bash
-perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry
+perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record
new file mode 100644
index 000000000000..f8885d389e6f
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
new file mode 100644
index 000000000000..8bfc660e5056
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: system-wide failed syscalls
+# args: [comm]
+perf trace -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $1
diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl
new file mode 100644
index 000000000000..c18e7e27a84b
--- /dev/null
+++ b/tools/perf/scripts/perl/failed-syscalls.pl
@@ -0,0 +1,38 @@
+# failed system call counts
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide failed system call totals
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Context;
+use Perf::Trace::Util;
+
+my %failed_syscalls;
+
+sub raw_syscalls::sys_exit
+{
+	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	    $common_pid, $common_comm,
+	    $id, $ret) = @_;
+
+	if ($ret < 0) {
+	    $failed_syscalls{$common_comm}++;
+	}
+}
+
+sub trace_end
+{
+    printf("\nfailed syscalls by comm:\n\n");
+
+    printf("%-20s  %10s\n", "comm", "# errors");
+    printf("%-20s  %6s  %10s\n", "--------------------", "----------");
+
+    foreach my $comm (sort {$failed_syscalls{$b} <=> $failed_syscalls{$a}}
+		      keys %failed_syscalls) {
+	    printf("%-20s  %10s\n", $comm, $failed_syscalls{$comm});
+    }
+}
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
new file mode 100644
index 000000000000..f8885d389e6f
--- /dev/null
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
new file mode 100644
index 000000000000..1e0c0a860c87
--- /dev/null
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: system-wide failed syscalls, by pid
+# args: [comm]
+perf trace -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $1
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
new file mode 100644
index 000000000000..45a8c50359da
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
new file mode 100644
index 000000000000..f8044d192271
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: system-wide syscall counts, by pid
+# args: [comm]
+perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $1
diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record
new file mode 100644
index 000000000000..45a8c50359da
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
new file mode 100644
index 000000000000..a366aa61612f
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: system-wide syscall counts
+# args: [comm]
+perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py $1
diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py
new file mode 100644
index 000000000000..964d934395ff
--- /dev/null
+++ b/tools/perf/scripts/python/check-perf-trace.py
@@ -0,0 +1,83 @@
+# perf trace event handlers, generated by perf trace -g python
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# This script tests basic functionality such as flag and symbol
+# strings, common_xxx() calls back into perf, begin, end, unhandled
+# events, etc.  Basically, if this script runs successfully and
+# displays expected results, Python scripting support should be ok.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from Core import *
+from perf_trace_context import *
+
+unhandled = autodict()
+
+def trace_begin():
+	print "trace_begin"
+	pass
+
+def trace_end():
+        print_unhandled()
+
+def irq__softirq_entry(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	vec):
+		print_header(event_name, common_cpu, common_secs, common_nsecs,
+			common_pid, common_comm)
+
+                print_uncommon(context)
+
+		print "vec=%s\n" % \
+		(symbol_str("irq__softirq_entry", "vec", vec)),
+
+def kmem__kmalloc(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	call_site, ptr, bytes_req, bytes_alloc,
+	gfp_flags):
+		print_header(event_name, common_cpu, common_secs, common_nsecs,
+			common_pid, common_comm)
+
+                print_uncommon(context)
+
+		print "call_site=%u, ptr=%u, bytes_req=%u, " \
+		"bytes_alloc=%u, gfp_flags=%s\n" % \
+		(call_site, ptr, bytes_req, bytes_alloc,
+
+		flag_str("kmem__kmalloc", "gfp_flags", gfp_flags)),
+
+def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
+		common_pid, common_comm):
+    try:
+        unhandled[event_name] += 1
+    except TypeError:
+        unhandled[event_name] = 1
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+	print "%-20s %5u %05u.%09u %8u %-20s " % \
+	(event_name, cpu, secs, nsecs, pid, comm),
+
+# print trace fields not included in handler args
+def print_uncommon(context):
+    print "common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, " \
+        % (common_pc(context), trace_flag_str(common_flags(context)), \
+               common_lock_depth(context))
+
+def print_unhandled():
+    keys = unhandled.keys()
+    if not keys:
+        return
+
+    print "\nunhandled events:\n\n",
+
+    print "%-40s  %10s\n" % ("event", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "-----------"),
+
+    for event_name in keys:
+	print "%-40s  %10d\n" % (event_name, unhandled[event_name])
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
new file mode 100644
index 000000000000..0ca02278fe69
--- /dev/null
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -0,0 +1,68 @@
+# failed system call counts, by pid
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide failed system call totals, broken down by pid.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+
+for_comm = None
+
+if len(sys.argv) > 2:
+	sys.exit(usage)
+
+if len(sys.argv) > 1:
+	for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+	pass
+
+def trace_end():
+	print_error_totals()
+
+def raw_syscalls__sys_exit(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, ret):
+	if for_comm is not None:
+		if common_comm != for_comm:
+			return
+
+	if ret < 0:
+		try:
+			syscalls[common_comm][common_pid][id][ret] += 1
+		except TypeError:
+			syscalls[common_comm][common_pid][id][ret] = 1
+
+def print_error_totals():
+    if for_comm is not None:
+	    print "\nsyscall errors for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall errors:\n\n",
+
+    print "%-30s  %10s\n" % ("comm [pid]", "count"),
+    print "%-30s  %10s\n" % ("------------------------------", \
+                                 "----------"),
+
+    comm_keys = syscalls.keys()
+    for comm in comm_keys:
+	    pid_keys = syscalls[comm].keys()
+	    for pid in pid_keys:
+		    print "\n%s [%d]\n" % (comm, pid),
+		    id_keys = syscalls[comm][pid].keys()
+		    for id in id_keys:
+			    print "  syscall: %-16d\n" % (id),
+			    ret_keys = syscalls[comm][pid][id].keys()
+			    for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k),  reverse = True):
+				    print "    err = %-20d  %10d\n" % (ret, val),
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
new file mode 100644
index 000000000000..af722d6a4b3f
--- /dev/null
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -0,0 +1,64 @@
+# system call counts, by pid
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide system call totals, broken down by syscall.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+
+for_comm = None
+
+if len(sys.argv) > 2:
+	sys.exit(usage)
+
+if len(sys.argv) > 1:
+	for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+	pass
+
+def trace_end():
+	print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+	if for_comm is not None:
+		if common_comm != for_comm:
+			return
+	try:
+		syscalls[common_comm][common_pid][id] += 1
+	except TypeError:
+		syscalls[common_comm][common_pid][id] = 1
+
+def print_syscall_totals():
+    if for_comm is not None:
+	    print "\nsyscall events for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall events by comm/pid:\n\n",
+
+    print "%-40s  %10s\n" % ("comm [pid]/syscalls", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "----------"),
+
+    comm_keys = syscalls.keys()
+    for comm in comm_keys:
+	    pid_keys = syscalls[comm].keys()
+	    for pid in pid_keys:
+		    print "\n%s [%d]\n" % (comm, pid),
+		    id_keys = syscalls[comm][pid].keys()
+		    for id, val in sorted(syscalls[comm][pid].iteritems(), \
+				  key = lambda(k, v): (v, k),  reverse = True):
+			    print "  %-38d  %10d\n" % (id, val),
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
new file mode 100644
index 000000000000..f977e85ff049
--- /dev/null
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -0,0 +1,58 @@
+# system call counts
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide system call totals, broken down by syscall.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+usage = "perf trace -s syscall-counts.py [comm]\n";
+
+for_comm = None
+
+if len(sys.argv) > 2:
+	sys.exit(usage)
+
+if len(sys.argv) > 1:
+	for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+	pass
+
+def trace_end():
+	print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+	if for_comm is not None:
+		if common_comm != for_comm:
+			return
+	try:
+		syscalls[id] += 1
+	except TypeError:
+		syscalls[id] = 1
+
+def print_syscall_totals():
+    if for_comm is not None:
+	    print "\nsyscall events for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall events:\n\n",
+
+    print "%-40s  %10s\n" % ("event", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "-----------"),
+
+    for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+				  reverse = True):
+	    print "%-40d  %10d\n" % (id, val),

From 44ad9cd8f0893b9ae0ac729a7dc2a1ebcd170ac6 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Mon, 22 Feb 2010 01:12:59 -0600
Subject: [PATCH 601/640] perf/scripts: Remove unnecessary PyTuple resizes

If we know the size of a tuple in advance, there's no need to resize
it - start out with the known size in the first place.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1266822779.6426.4.camel@tropicana>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 .../util/scripting-engines/trace-event-python.c     | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index d402f64f9b46..33a414bbba3e 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -68,7 +68,7 @@ static void define_value(enum print_arg_type field_type,
 	if (field_type == PRINT_SYMBOL)
 		handler_name = "define_symbolic_value";
 
-	t = PyTuple_New(MAX_FIELDS);
+	t = PyTuple_New(4);
 	if (!t)
 		Py_FatalError("couldn't create Python tuple");
 
@@ -79,9 +79,6 @@ static void define_value(enum print_arg_type field_type,
 	PyTuple_SetItem(t, n++, PyInt_FromLong(value));
 	PyTuple_SetItem(t, n++, PyString_FromString(field_str));
 
-	if (_PyTuple_Resize(&t, n) == -1)
-		Py_FatalError("error resizing Python tuple");
-
 	handler = PyDict_GetItemString(main_dict, handler_name);
 	if (handler && PyCallable_Check(handler)) {
 		retval = PyObject_CallObject(handler, t);
@@ -116,7 +113,10 @@ static void define_field(enum print_arg_type field_type,
 	if (field_type == PRINT_SYMBOL)
 		handler_name = "define_symbolic_field";
 
-	t = PyTuple_New(MAX_FIELDS);
+	if (field_type == PRINT_FLAGS)
+		t = PyTuple_New(3);
+	else
+		t = PyTuple_New(2);
 	if (!t)
 		Py_FatalError("couldn't create Python tuple");
 
@@ -125,9 +125,6 @@ static void define_field(enum print_arg_type field_type,
 	if (field_type == PRINT_FLAGS)
 		PyTuple_SetItem(t, n++, PyString_FromString(delim));
 
-	if (_PyTuple_Resize(&t, n) == -1)
-		Py_FatalError("error resizing Python tuple");
-
 	handler = PyDict_GetItemString(main_dict, handler_name);
 	if (handler && PyCallable_Check(handler)) {
 		retval = PyObject_CallObject(handler, t);

From cff68e582237cae3cf456f01153202175961dfbe Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 27 Jan 2010 02:28:03 -0600
Subject: [PATCH 602/640] perf/scripts: Add perf-trace-python Documentation

Also small update to perf-trace-perl and perf-trace docs.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1264580883-15324-13-git-send-email-tzanussi@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 tools/perf/Documentation/perf-trace-perl.txt  |   3 +-
 .../perf/Documentation/perf-trace-python.txt  | 624 ++++++++++++++++++
 tools/perf/Documentation/perf-trace.txt       |  11 +-
 3 files changed, 636 insertions(+), 2 deletions(-)
 create mode 100644 tools/perf/Documentation/perf-trace-python.txt

diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt
index c5f55f439091..d2206c3c7aa6 100644
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-trace-perl.txt
@@ -8,7 +8,7 @@ perf-trace-perl - Process trace data with a Perl script
 SYNOPSIS
 --------
 [verse]
-'perf trace' [-s [lang]:script[.ext] ]
+'perf trace' [-s [Perl]:script[.pl] ]
 
 DESCRIPTION
 -----------
@@ -213,6 +213,7 @@ Various utility functions for use with perf trace:
   nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
   nsecs_str($nsecs) - returns printable string in the form secs.nsecs
   avg($total, $n) - returns average given a sum and a total number of values
+  syscall_name($id) - returns the syscall name for the specified syscall_nr
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-trace-python.txt
new file mode 100644
index 000000000000..119d5deba1db
--- /dev/null
+++ b/tools/perf/Documentation/perf-trace-python.txt
@@ -0,0 +1,624 @@
+perf-trace-python(1)
+==================
+
+NAME
+----
+perf-trace-python - Process trace data with a Python script
+
+SYNOPSIS
+--------
+[verse]
+'perf trace' [-s [Python]:script[.py] ]
+
+DESCRIPTION
+-----------
+
+This perf trace option is used to process perf trace data using perf's
+built-in Python interpreter.  It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Python script, if any.
+
+A QUICK EXAMPLE
+---------------
+
+This section shows the process, start to finish, of creating a working
+Python script that aggregates and extracts useful information from a
+raw perf trace stream.  You can avoid reading the rest of this
+document if an example is enough for you; the rest of the document
+provides more details on each step and lists the library functions
+available to script writers.
+
+This example actually details the steps that were used to create the
+'syscall-counts' script you see when you list the available perf trace
+scripts via 'perf trace -l'.  As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf trace'
+scripts listed by that command.
+
+The syscall-counts script is a simple script, but demonstrates all the
+basic ideas necessary to create a useful script.  Here's an example
+of its output:
+
+----
+syscall events:
+
+event                                          count
+----------------------------------------  -----------
+sys_write                                     455067
+sys_getdents                                    4072
+sys_close                                       3037
+sys_swapoff                                     1769
+sys_read                                         923
+sys_sched_setparam                               826
+sys_open                                         331
+sys_newfstat                                     326
+sys_mmap                                         217
+sys_munmap                                       216
+sys_futex                                        141
+sys_select                                       102
+sys_poll                                          84
+sys_setitimer                                     12
+sys_writev                                         8
+15                                                 8
+sys_lseek                                          7
+sys_rt_sigprocmask                                 6
+sys_wait4                                          3
+sys_ioctl                                          3
+sys_set_robust_list                                1
+sys_exit                                           1
+56                                                 1
+sys_access                                         1
+----
+
+Basically our task is to keep a per-syscall tally that gets updated
+every time a system call occurs in the system.  Our script will do
+that, but first we need to record the data that will be processed by
+that script.  Theoretically, there are a couple of ways we could do
+that:
+
+- we could enable every event under the tracing/events/syscalls
+  directory, but this is over 600 syscalls, well beyond the number
+  allowable by perf.  These individual syscall events will however be
+  useful if we want to later use the guidance we get from the
+  general-purpose scripts to drill down and get more detail about
+  individual syscalls of interest.
+
+- we can enable the sys_enter and/or sys_exit syscalls found under
+  tracing/events/raw_syscalls.  These are called for all syscalls; the
+  'id' field can be used to distinguish between individual syscall
+  numbers.
+
+For this script, we only need to know that a syscall was entered; we
+don't care how it exited, so we'll use 'perf record' to record only
+the sys_enter events:
+
+----
+# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
+
+^C[ perf record: Woken up 1 times to write data ]
+[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
+----
+
+The options basically say to collect data for every syscall event
+system-wide and multiplex the per-cpu output into a single stream.
+That single stream will be recorded in a file in the current directory
+called perf.data.
+
+Once we have a perf.data file containing our data, we can use the -g
+'perf trace' option to generate a Python script that will contain a
+callback handler for each event type found in the perf.data trace
+stream (for more details, see the STARTER SCRIPTS section).
+
+----
+# perf trace -g python
+generated Python script: perf-trace.py
+
+The output file created also in the current directory is named
+perf-trace.py.  Here's the file in its entirety:
+
+# perf trace event handlers, generated by perf trace -g python
+# Licensed under the terms of the GNU GPL License version 2
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the format files.  Those fields not available as handler params can
+# be retrieved using Python functions of the form common_*(context).
+# See the perf-trace-python Documentation for the list of available functions.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_begin():
+	print "in trace_begin"
+
+def trace_end():
+	print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+		print_header(event_name, common_cpu, common_secs, common_nsecs,
+			common_pid, common_comm)
+
+		print "id=%d, args=%s\n" % \
+		(id, args),
+
+def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
+		common_pid, common_comm):
+		print_header(event_name, common_cpu, common_secs, common_nsecs,
+		common_pid, common_comm)
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+	print "%-20s %5u %05u.%09u %8u %-20s " % \
+	(event_name, cpu, secs, nsecs, pid, comm),
+----
+
+At the top is a comment block followed by some import statements and a
+path append which every perf trace script should include.
+
+Following that are a couple generated functions, trace_begin() and
+trace_end(), which are called at the beginning and the end of the
+script respectively (for more details, see the SCRIPT_LAYOUT section
+below).
+
+Following those are the 'event handler' functions generated one for
+every event in the 'perf record' output.  The handler functions take
+the form subsystem__event_name, and contain named parameters, one for
+each field in the event; in this case, there's only one event,
+raw_syscalls__sys_enter().  (see the EVENT HANDLERS section below for
+more info on event handlers).
+
+The final couple of functions are, like the begin and end functions,
+generated for every script.  The first, trace_unhandled(), is called
+every time the script finds an event in the perf.data file that
+doesn't correspond to any event handler in the script.  This could
+mean either that the record step recorded event types that it wasn't
+really interested in, or the script was run against a trace file that
+doesn't correspond to the script.
+
+The script generated by -g option option simply prints a line for each
+event found in the trace stream i.e. it basically just dumps the event
+and its parameter values to stdout.  The print_header() function is
+simply a utility function used for that purpose.  Let's rename the
+script and run it to see the default output:
+
+----
+# mv perf-trace.py syscall-counts.py
+# perf trace -s syscall-counts.py
+
+raw_syscalls__sys_enter     1 00840.847582083     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847595764     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847620860     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847710478     6533 npviewer.bin          id=78, args=
+raw_syscalls__sys_enter     1 00840.847719204     6533 npviewer.bin          id=142, args=
+raw_syscalls__sys_enter     1 00840.847755445     6533 npviewer.bin          id=3, args=
+raw_syscalls__sys_enter     1 00840.847775601     6533 npviewer.bin          id=3, args=
+raw_syscalls__sys_enter     1 00840.847781820     6533 npviewer.bin          id=3, args=
+.
+.
+.
+----
+
+Of course, for this script, we're not interested in printing every
+trace event, but rather aggregating it in a useful way.  So we'll get
+rid of everything to do with printing as well as the trace_begin() and
+trace_unhandled() functions, which we won't be using.  That leaves us
+with this minimalistic skeleton:
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_end():
+	print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+----
+
+In trace_end(), we'll simply print the results, but first we need to
+generate some results to print.  To do that we need to have our
+sys_enter() handler do the necessary tallying until all events have
+been counted.  A hash table indexed by syscall id is a good way to
+store that information; every time the sys_enter() handler is called,
+we simply increment a count associated with that hash entry indexed by
+that syscall id:
+
+----
+  syscalls = autodict()
+
+  try:
+    syscalls[id] += 1
+  except TypeError:
+    syscalls[id] = 1
+----
+
+The syscalls 'autodict' object is a special kind of Python dictionary
+(implemented in Core.py) that implements Perl's 'autovivifying' hashes
+in Python i.e. with autovivifying hashes, you can assign nested hash
+values without having to go to the trouble of creating intermediate
+levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
+the intermediate hash levels and finally assign the value 1 to the
+hash entry for 'id' (because the value being assigned isn't a hash
+object itself, the initial value is assigned in the TypeError
+exception.  Well, there may be a better way to do this in Python but
+that's what works for now).
+
+Putting that code into the raw_syscalls__sys_enter() handler, we
+effectively end up with a single-level dictionary keyed on syscall id
+and having the counts we've tallied as values.
+
+The print_syscall_totals() function iterates over the entries in the
+dictionary and displays a line for each entry containing the syscall
+name (the dictonary keys contain the syscall ids, which are passed to
+the Util function syscall_name(), which translates the raw syscall
+numbers to the corresponding syscall name strings).  The output is
+displayed after all the events in the trace have been processed, by
+calling the print_syscall_totals() function from the trace_end()
+handler called at the end of script processing.
+
+The final script producing the output shown above is shown in its
+entirety below:
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+syscalls = autodict()
+
+def trace_end():
+	print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+	try:
+		syscalls[id] += 1
+	except TypeError:
+		syscalls[id] = 1
+
+def print_syscall_totals():
+    if for_comm is not None:
+	    print "\nsyscall events for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall events:\n\n",
+
+    print "%-40s  %10s\n" % ("event", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "-----------"),
+
+    for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+				  reverse = True):
+	    print "%-40s  %10d\n" % (syscall_name(id), val),
+----
+
+The script can be run just as before:
+
+  # perf trace -s syscall-counts.py
+
+So those are the essential steps in writing and running a script.  The
+process can be generalized to any tracepoint or set of tracepoints
+you're interested in - basically find the tracepoint(s) you're
+interested in by looking at the list of available events shown by
+'perf list' and/or look in /sys/kernel/debug/tracing events for
+detailed event and field info, record the corresponding trace data
+using 'perf record', passing it the list of interesting events,
+generate a skeleton script using 'perf trace -g python' and modify the
+code to aggregate and display it for your particular needs.
+
+After you've done that you may end up with a general-purpose script
+that you want to keep around and have available for future use.  By
+writing a couple of very simple shell scripts and putting them in the
+right place, you can have your script listed alongside the other
+scripts listed by the 'perf trace -l' command e.g.:
+
+----
+root@tropicana:~# perf trace -l
+List of available trace scripts:
+  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
+  wakeup-latency                       system-wide min/max/avg wakeup latency
+  rw-by-file <comm>                    r/w activity for a program, by file
+  rw-by-pid                            system-wide r/w activity
+----
+
+A nice side effect of doing this is that you also then capture the
+probably lengthy 'perf record' command needed to record the events for
+the script.
+
+To have the script appear as a 'built-in' script, you write two simple
+scripts, one for recording and one for 'reporting'.
+
+The 'record' script is a shell script with the same base name as your
+script, but with -record appended.  The shell script should be put
+into the perf/scripts/python/bin directory in the kernel source tree.
+In that script, you write the 'perf record' command-line needed for
+your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
+
+#!/bin/bash
+perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
+----
+
+The 'report' script is also a shell script with the same base name as
+your script, but with -report appended.  It should also be located in
+the perf/scripts/python/bin directory.  In that script, you write the
+'perf trace -s' command-line needed for running your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
+
+#!/bin/bash
+# description: system-wide syscall counts
+perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+----
+
+Note that the location of the Python script given in the shell script
+is in the libexec/perf-core/scripts/python directory - this is where
+the script will be copied by 'make install' when you install perf.
+For the installation to install your script there, your script needs
+to be located in the perf/scripts/python directory in the kernel
+source tree:
+
+----
+# ls -al kernel-source/tools/perf/scripts/python
+
+root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
+total 32
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
+drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
+-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
+----
+
+Once you've done that (don't forget to do a new 'make install',
+otherwise your script won't show up at run-time), 'perf trace -l'
+should show a new entry for your script:
+
+----
+root@tropicana:~# perf trace -l
+List of available trace scripts:
+  workqueue-stats                      workqueue stats (ins/exe/create/destroy)
+  wakeup-latency                       system-wide min/max/avg wakeup latency
+  rw-by-file <comm>                    r/w activity for a program, by file
+  rw-by-pid                            system-wide r/w activity
+  syscall-counts                       system-wide syscall counts
+----
+
+You can now perform the record step via 'perf trace record':
+
+  # perf trace record syscall-counts
+
+and display the output using 'perf trace report':
+
+  # perf trace report syscall-counts
+
+STARTER SCRIPTS
+---------------
+
+You can quickly get started writing a script for a particular set of
+trace data by generating a skeleton script using 'perf trace -g
+python' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/python for typical examples showing how to
+do basic things like aggregate event data, print results, etc.  Also,
+the check-perf-trace.py script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf trace is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace.  If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_handled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above options: -c 1 says to sample every event, -a to enable
+system-wide collection, -M to multiplex the output, and -R to collect
+raw samples.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+        field:unsigned short common_type;
+        field:unsigned char common_flags;
+        field:unsigned char common_preempt_count;
+        field:int common_pid;
+        field:int common_lock_depth;
+
+        field:char comm[TASK_COMM_LEN];
+        field:pid_t pid;
+        field:int prio;
+        field:int success;
+        field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
+       common_nsecs, common_pid, common_comm,
+       comm, pid, prio, success, target_cpu):
+       pass
+----
+
+The handler function takes the form subsystem__event_name.
+
+The common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ event_name 	  	    the name of the event as text
+ context		    an opaque 'cookie' used in calls back into perf
+ common_cpu		    the cpu the event occurred on
+ common_secs		    the secs portion of the event timestamp
+ common_nsecs		    the nsecs portion of the event timestamp
+ common_pid		    the pid of the current task
+ common_comm		    the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script.  The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf trace Python script should start by setting up a Python
+module search path and 'import'ing a few support modules (see module
+descriptions below):
+
+----
+ import os
+ import sys
+
+ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	      '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+ from perf_trace_context import *
+ from Core import *
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+def trace_begin:
+    pass
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+def trace_end:
+    pass
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it.  The standard set
+ of common arguments are passed into it:
+
+----
+def trace_unhandled(event_name, context, common_cpu, common_secs,
+        common_nsecs, common_pid, common_comm):
+    pass
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf trace Python modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various perf trace Python modules.  To use the functions and
+variables from the given module, add the corresponding 'from XXXX
+import' line to your perf trace script.
+
+Core.py Module
+~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields.  These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+  flag_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the flag field field_name of event event_name
+  symbol_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the symbolic field field_name of event event_name
+
+The *autodict* function returns a special special kind of Python
+dictionary that implements Perl's 'autovivifying' hashes in Python
+i.e. with autovivifying hashes, you can assign nested hash values
+without having to go to the trouble of creating intermediate levels if
+they don't exist.
+
+  autodict() - returns an autovivifying dictionary instance
+
+
+perf_trace_context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+perf_trace_context defines a set of functions that can be used to
+access this data in the context of the current event.  Each of these
+functions expects a context variable, which is the same as the
+context variable passed into every event handler as the second
+argument.
+
+ common_pc(context) - returns common_preempt count for the current event
+ common_flags(context) - returns common_flags for the current event
+ common_lock_depth(context) - returns common_lock_depth for the current event
+
+Util.py Module
+~~~~~~~~~~~~~~
+
+Various utility functions for use with perf trace:
+
+  nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
+  nsecs_secs(nsecs) - returns whole secs portion given nsecs
+  nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
+  nsecs_str(nsecs) - returns printable string in the form secs.nsecs
+  avg(total, n) - returns average given a sum and a total number of values
+  syscall_name(id) - returns the syscall name for the specified syscall_nr
+
+SEE ALSO
+--------
+linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index c00a76fcb8d6..8879299cd9df 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -19,6 +19,11 @@ There are several variants of perf trace:
   'perf trace' to see a detailed trace of the workload that was
   recorded.
 
+  You can also run a set of pre-canned scripts that aggregate and
+  summarize the raw trace data in various ways (the list of scripts is
+  available via 'perf trace -l').  The following variants allow you to
+  record and run those scripts:
+
   'perf trace record <script>' to record the events required for 'perf
   trace report'.  <script> is the name displayed in the output of
   'perf trace --list' i.e. the actual script name minus any language
@@ -31,6 +36,9 @@ There are several variants of perf trace:
   record <script>' is used and should be present for this command to
   succeed.
 
+  See the 'SEE ALSO' section for links to language-specific
+  information on how to write and run your own trace scripts.
+
 OPTIONS
 -------
 -D::
@@ -58,4 +66,5 @@ OPTIONS
 
 SEE ALSO
 --------
-linkperf:perf-record[1], linkperf:perf-trace-perl[1]
+linkperf:perf-record[1], linkperf:perf-trace-perl[1],
+linkperf:perf-trace-python[1]

From c2fbaa4b489b738cd9e7666edfeceaaa38e71c9f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 25 Feb 2010 03:03:52 +0100
Subject: [PATCH 603/640] perf/scripts: Tag syscall_name helper as not yet
 available

syscall_name() helper, which resolves a syscall arch number to
its name, is not yet available as we first need to implement
event injection for it to work.

Remove it from the documentation or tag its references as
unavailable yet. Once it's implemented, we can just revert
the current patch.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Keiichi KII <k-keiichi@bx.jp.nec.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace-perl.txt   | 1 -
 tools/perf/Documentation/perf-trace-python.txt | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt
index d2206c3c7aa6..d729cee8d987 100644
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-trace-perl.txt
@@ -213,7 +213,6 @@ Various utility functions for use with perf trace:
   nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
   nsecs_str($nsecs) - returns printable string in the form secs.nsecs
   avg($total, $n) - returns average given a sum and a total number of values
-  syscall_name($id) - returns the syscall name for the specified syscall_nr
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-trace-python.txt
index 119d5deba1db..a241aca77184 100644
--- a/tools/perf/Documentation/perf-trace-python.txt
+++ b/tools/perf/Documentation/perf-trace-python.txt
@@ -36,7 +36,8 @@ scripts listed by that command.
 
 The syscall-counts script is a simple script, but demonstrates all the
 basic ideas necessary to create a useful script.  Here's an example
-of its output:
+of its output (syscall names are not yet supported, they will appear
+as numbers):
 
 ----
 syscall events:
@@ -270,7 +271,8 @@ calling the print_syscall_totals() function from the trace_end()
 handler called at the end of script processing.
 
 The final script producing the output shown above is shown in its
-entirety below:
+entirety below (syscall_name() helper is not yet available, you can
+only deal with id's for now):
 
 ----
 import os
@@ -617,7 +619,6 @@ Various utility functions for use with perf trace:
   nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
   nsecs_str(nsecs) - returns printable string in the form secs.nsecs
   avg(total, n) - returns average given a sum and a total number of values
-  syscall_name(id) - returns the syscall name for the specified syscall_nr
 
 SEE ALSO
 --------

From 37fe5fcb7a5b5235c8b71bf5469ce4c7246e3fab Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Thu, 25 Feb 2010 11:00:51 +0800
Subject: [PATCH 604/640] perf symbols: Check the right return variable

In function dso__split_kallsyms(), curr_map saves the return value
of map__new2. So check it instead of var map after the call returns.

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: <stable@kernel.org> # for .33.x
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1267066851.1726.9.camel@localhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ee9c37efdd36..320b15178e95 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -527,7 +527,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
 				return -1;
 
 			curr_map = map__new2(pos->start, dso, map->type);
-			if (map == NULL) {
+			if (curr_map == NULL) {
 				dso__delete(dso);
 				return -1;
 			}

From c7ad21af2c8b7accb893a576b100296c61c5d610 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 22 Feb 2010 16:14:22 -0300
Subject: [PATCH 605/640] perf top: Use a macro instead of a constant variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To overcome a silly gcc warning:

 cc1: warnings being treated as errors
 builtin-top.c: In function ‘lookup_sym_source’:
 builtin-top.c:291: warning: not protecting local variables:
 variable length buffer make: *** [builtin-top.o] Error 1
 make: *** Waiting for unfinished jobs....

That is emitted for this:

	const size_t pattern_len = BITS_PER_LONG / 4 + 2;
	char pattern[pattern_len + 1];

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1266866062-6287-1-git-send-email-acme@infradead.org>
[ -v2: macroify the naming style ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-top.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c72ab50d65ca..c6706984b7b3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -287,19 +287,20 @@ out_unlock:
 	pthread_mutex_unlock(&syme->src->lock);
 }
 
+#define PATTERN_LEN		(BITS_PER_LONG / 4 + 2)
+
 static void lookup_sym_source(struct sym_entry *syme)
 {
 	struct symbol *symbol = sym_entry__symbol(syme);
 	struct source_line *line;
-	const size_t pattern_len = BITS_PER_LONG / 4 + 2;
-	char pattern[pattern_len + 1];
+	char pattern[PATTERN_LEN + 1];
 
 	sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4,
 		map__rip_2objdump(syme->map, symbol->start));
 
 	pthread_mutex_lock(&syme->src->lock);
 	for (line = syme->src->lines; line; line = line->next) {
-		if (memcmp(line->line, pattern, pattern_len) == 0) {
+		if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
 			syme->src->source = line;
 			break;
 		}

From 3846df2e0a99a2bf10023de0e9c1496592012d4c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 22 Feb 2010 16:15:39 -0300
Subject: [PATCH 606/640] perf symbols: Improve debugging information about
 symtab origins
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Be more clear about DSO long names and tell from which file
kernel symbols were obtained, all in --verbose mode:

    [root@mica ~]# perf report -v > /dev/null
    Looking at the vmlinux_path (5 entries long)
    Using /lib/modules/2.6.33-rc8-tip-00777-g0918527-dirty/build/vmlinux for symbols
    [root@mica ~]# mv /lib/modules/2.6.33-rc8-tip-00777-g0918527-dirty/build/vmlinux /tmp/dd
    [root@mica ~]# perf report -v > /dev/null
    Looking at the vmlinux_path (5 entries long)
    Using /proc/kallsyms for symbols
    [root@mica ~]#

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1266866139-6361-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/map.c    |  5 +++++
 tools/perf/util/map.h    |  2 ++
 tools/perf/util/symbol.c | 16 ++++++++++++++--
 tools/perf/util/thread.c |  5 -----
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 138e3cb2b727..e509cd59c67d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -5,6 +5,11 @@
 #include <stdio.h>
 #include "debug.h"
 
+const char *map_type__name[MAP__NR_TYPES] = {
+	[MAP__FUNCTION] = "Functions",
+	[MAP__VARIABLE] = "Variables",
+};
+
 static inline int is_anon_memory(const char *filename)
 {
 	return strcmp(filename, "//anon") == 0;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 86f77cb1d060..b756368076c6 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -13,6 +13,8 @@ enum map_type {
 
 #define MAP__NR_TYPES (MAP__VARIABLE + 1)
 
+extern const char *map_type__name[MAP__NR_TYPES];
+
 struct dso;
 struct ref_reloc_sym;
 struct map_groups;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 320b15178e95..7aab4e5f3669 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -367,6 +367,10 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
 	struct rb_node *nd;
 	size_t ret = fprintf(fp, "dso: %s (", self->short_name);
 
+	if (self->short_name != self->long_name)
+		ret += fprintf(fp, "%s, ", self->long_name);
+	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
+		       self->loaded ? "" : "NOT ");
 	ret += dso__fprintf_buildid(self, fp);
 	ret += fprintf(fp, ")\n");
 	for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) {
@@ -1580,6 +1584,9 @@ static int dso__load_vmlinux(struct dso *self, struct map *map,
 	err = dso__load_sym(self, map, vmlinux, fd, filter, 0);
 	close(fd);
 
+	if (err > 0)
+		pr_debug("Using %s for symbols\n", vmlinux);
+
 	return err;
 }
 
@@ -1594,7 +1601,6 @@ int dso__load_vmlinux_path(struct dso *self, struct map *map,
 	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
 		err = dso__load_vmlinux(self, map, vmlinux_path[i], filter);
 		if (err > 0) {
-			pr_debug("Using %s for symbols\n", vmlinux_path[i]);
 			dso__set_long_name(self, strdup(vmlinux_path[i]));
 			break;
 		}
@@ -1661,12 +1667,16 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 
 		if (asprintf(&kallsyms_allocated_filename,
 			     "%s/.debug/[kernel.kallsyms]/%s",
-			     getenv("HOME"), sbuild_id) == -1)
+			     getenv("HOME"), sbuild_id) == -1) {
+			pr_err("Not enough memory for kallsyms file lookup\n");
 			return -1;
+		}
 
 		kallsyms_filename = kallsyms_allocated_filename;
 
 		if (access(kallsyms_filename, F_OK)) {
+			pr_err("No kallsyms or vmlinux with build-id %s "
+			       "was found\n", sbuild_id);
 			free(kallsyms_allocated_filename);
 			return -1;
 		}
@@ -1680,6 +1690,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 
 do_kallsyms:
 	err = dso__load_kallsyms(self, kallsyms_filename, map, filter);
+	if (err > 0)
+		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	free(kallsyms_allocated_filename);
 
 out_try_fixup:
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 9e8995eaf2b6..c090654cb6c0 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -53,11 +53,6 @@ int thread__comm_len(struct thread *self)
 	return self->comm_len;
 }
 
-static const char *map_type__name[MAP__NR_TYPES] = {
-	[MAP__FUNCTION] = "Functions",
-	[MAP__VARIABLE] = "Variables",
-};
-
 static size_t __map_groups__fprintf_maps(struct map_groups *self,
 					 enum map_type type, FILE *fp)
 {

From 628ada0cb03666dd463f7c25947eaccdf440c309 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@infradead.org>
Date: Thu, 25 Feb 2010 12:57:40 -0300
Subject: [PATCH 607/640] perf annotate: Defer allocating sym_priv->hist array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because symbol->end is not fixed up at symbol_filter time, only
after all symbols for a DSO are loaded, and that, for asm
symbols, may be bogus, causing segfaults when hits happen in
these symbols.

Reported-by: David Miller <davem@davemloft.net>
Reported-by: Anton Blanchard <anton@samba.org>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: <stable@kernel.org> # for .33.x. Does not apply cleanly, needs backport.
LKML-Reference: <20100225155740.GB8553@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-annotate.c | 57 ++++++++++++++++++-----------------
 tools/perf/util/symbol.c      |  2 +-
 tools/perf/util/symbol.h      |  2 ++
 3 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 28ea4e0c3658..e47dd1587e39 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -53,32 +53,20 @@ struct sym_priv {
 
 static const char *sym_hist_filter;
 
-static int symbol_filter(struct map *map __used, struct symbol *sym)
+static int sym__alloc_hist(struct symbol *self)
 {
-	if (sym_hist_filter == NULL ||
-	    strcmp(sym->name, sym_hist_filter) == 0) {
-		struct sym_priv *priv = symbol__priv(sym);
-		const int size = (sizeof(*priv->hist) +
-				  (sym->end - sym->start) * sizeof(u64));
+	struct sym_priv *priv = symbol__priv(self);
+	const int size = (sizeof(*priv->hist) +
+			  (self->end - self->start) * sizeof(u64));
 
-		priv->hist = malloc(size);
-		if (priv->hist)
-			memset(priv->hist, 0, size);
-		return 0;
-	}
-	/*
-	 * FIXME: We should really filter it out, as we don't want to go thru symbols
-	 * we're not interested, and if a DSO ends up with no symbols, delete it too,
-	 * but right now the kernel loading routines in symbol.c bail out if no symbols
-	 * are found, fix it later.
-	 */
-	return 0;
+	priv->hist = zalloc(size);
+	return priv->hist == NULL ? -1 : 0;
 }
 
 /*
  * collect histogram counts
  */
-static void hist_hit(struct hist_entry *he, u64 ip)
+static int annotate__hist_hit(struct hist_entry *he, u64 ip)
 {
 	unsigned int sym_size, offset;
 	struct symbol *sym = he->sym;
@@ -88,11 +76,11 @@ static void hist_hit(struct hist_entry *he, u64 ip)
 	he->count++;
 
 	if (!sym || !he->map)
-		return;
+		return 0;
 
 	priv = symbol__priv(sym);
-	if (!priv->hist)
-		return;
+	if (priv->hist == NULL && sym__alloc_hist(sym) < 0)
+		return -ENOMEM;
 
 	sym_size = sym->end - sym->start;
 	offset = ip - sym->start;
@@ -100,7 +88,7 @@ static void hist_hit(struct hist_entry *he, u64 ip)
 	pr_debug3("%s: ip=%#Lx\n", __func__, he->map->unmap_ip(he->map, ip));
 
 	if (offset >= sym_size)
-		return;
+		return 0;
 
 	h = priv->hist;
 	h->sum++;
@@ -108,18 +96,31 @@ static void hist_hit(struct hist_entry *he, u64 ip)
 
 	pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", he->sym->start,
 		  he->sym->name, ip, ip - he->sym->start, h->ip[offset]);
+	return 0;
 }
 
 static int perf_session__add_hist_entry(struct perf_session *self,
 					struct addr_location *al, u64 count)
 {
 	bool hit;
-	struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL,
-							       count, &hit);
+	struct hist_entry *he;
+
+	if (sym_hist_filter != NULL &&
+	    (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
+		/* We're only interested in a symbol named sym_hist_filter */
+		if (al->sym != NULL) {
+			rb_erase(&al->sym->rb_node,
+				 &al->map->dso->symbols[al->map->type]);
+			symbol__delete(al->sym);
+		}
+		return 0;
+	}
+
+	he = __perf_session__add_hist_entry(self, al, NULL, count, &hit);
 	if (he == NULL)
 		return -ENOMEM;
-	hist_hit(he, al->addr);
-	return 0;
+
+	return annotate__hist_hit(he, al->addr);
 }
 
 static int process_sample_event(event_t *event, struct perf_session *session)
@@ -129,7 +130,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
 		    event->ip.pid, event->ip.ip);
 
-	if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) {
+	if (event__preprocess_sample(event, session, &al, NULL) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 7aab4e5f3669..323c0aea0a91 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -144,7 +144,7 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name)
 	return self;
 }
 
-static void symbol__delete(struct symbol *self)
+void symbol__delete(struct symbol *self)
 {
 	free(((void *)self) - symbol_conf.priv_size);
 }
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1b4192ee5300..280dadd32a08 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -51,6 +51,8 @@ struct symbol {
 	char		name[0];
 };
 
+void symbol__delete(struct symbol *self);
+
 struct strlist;
 
 struct symbol_conf {

From 3a0304e90aa5a2c0c308a05d28f7d109a48d8539 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 26 Feb 2010 10:33:41 +0100
Subject: [PATCH 608/640] perf_events: Report the MMAP pgoff value in bytes

DaveM reported that currently perf interprets the pgoff value reported by
the MMAP events as a byte range, but the kernel reports it as a page
offset.

Since its broken (and unusable) anyway, change the kernel behaviour (ABI)
to report bytes indeed, avoiding the need for userspace to deal with
PAGE_SIZE things.

Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 087025fe3ba1..5a69abb05ac3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3749,7 +3749,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)
 			/* .tid */
 			.start  = vma->vm_start,
 			.len    = vma->vm_end - vma->vm_start,
-			.pgoff  = vma->vm_pgoff,
+			.pgoff  = (u64)vma->vm_pgoff << PAGE_SHIFT,
 		},
 	};
 

From d76a0812ac4139ceb54daab3cc70e1bd8bd9d43a Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 8 Feb 2010 17:06:01 +0200
Subject: [PATCH 609/640] perf_events: Add new start/stop PMU callbacks

In certain situations, the kernel may need to stop and start the same
event rapidly. The current PMU callbacks do not distinguish between stop
and release (i.e., stop + free the resource). Thus, a counter may be
released, then it will be immediately re-acquired. Event scheduling will
again take place with no guarantee to assign the same counter. On some
processors, this may event yield to failure to assign the event back due
to competion between cores.

This patch is adding a new pair of callback to stop and restart a counter
without actually release the underlying counter resource. On stop, the
counter is stopped, its values saved and that's it. On start, the value
is reloaded and counter is restarted (on x86, actual restart is delayed
until perf_enable()).

Signed-off-by: Stephane Eranian <eranian@google.com>
[ added fallback to ->enable/->disable for all other PMUs
  fixed x86_pmu_start() to call x86_pmu.enable()
  merged __x86_pmu_disable into x86_pmu_stop() ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4b703875.0a04d00a.7896.ffffb824@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++++++++++++----
 include/linux/perf_event.h       |  2 ++
 kernel/perf_event.c              | 20 ++++++++++++++++++--
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a920f173a220..9173ea95f918 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1495,7 +1495,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
 		hwc->last_tag == cpuc->tags[i];
 }
 
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+static void x86_pmu_stop(struct perf_event *event);
 
 void hw_perf_enable(void)
 {
@@ -1533,7 +1533,7 @@ void hw_perf_enable(void)
 			    match_prev_assignment(hwc, cpuc, i))
 				continue;
 
-			__x86_pmu_disable(event, cpuc);
+			x86_pmu_stop(event);
 
 			hwc->idx = -1;
 		}
@@ -1801,6 +1801,19 @@ static int x86_pmu_enable(struct perf_event *event)
 	return 0;
 }
 
+static int x86_pmu_start(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->idx == -1)
+		return -EAGAIN;
+
+	x86_perf_event_set_period(event, hwc, hwc->idx);
+	x86_pmu.enable(hwc, hwc->idx);
+
+	return 0;
+}
+
 static void x86_pmu_unthrottle(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1924,8 +1937,9 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
 	event->pending_kill = POLL_IN;
 }
 
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
+static void x86_pmu_stop(struct perf_event *event)
 {
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
@@ -1954,7 +1968,7 @@ static void x86_pmu_disable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int i;
 
-	__x86_pmu_disable(event, cpuc);
+	x86_pmu_stop(event);
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i]) {
@@ -2667,6 +2681,8 @@ static inline void x86_pmu_read(struct perf_event *event)
 static const struct pmu pmu = {
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
+	.start		= x86_pmu_start,
+	.stop		= x86_pmu_stop,
 	.read		= x86_pmu_read,
 	.unthrottle	= x86_pmu_unthrottle,
 };
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 071a7db52549..b08dfdad08cb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -513,6 +513,8 @@ struct perf_event;
 struct pmu {
 	int (*enable)			(struct perf_event *event);
 	void (*disable)			(struct perf_event *event);
+	int (*start)			(struct perf_event *event);
+	void (*stop)			(struct perf_event *event);
 	void (*read)			(struct perf_event *event);
 	void (*unthrottle)		(struct perf_event *event);
 };
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5a69abb05ac3..74c60021cdbc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1493,6 +1493,22 @@ do {					\
 	return div64_u64(dividend, divisor);
 }
 
+static void perf_event_stop(struct perf_event *event)
+{
+	if (!event->pmu->stop)
+		return event->pmu->disable(event);
+
+	return event->pmu->stop(event);
+}
+
+static int perf_event_start(struct perf_event *event)
+{
+	if (!event->pmu->start)
+		return event->pmu->enable(event);
+
+	return event->pmu->start(event);
+}
+
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -1513,9 +1529,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 
 	if (atomic64_read(&hwc->period_left) > 8*sample_period) {
 		perf_disable();
-		event->pmu->disable(event);
+		perf_event_stop(event);
 		atomic64_set(&hwc->period_left, 0);
-		event->pmu->enable(event);
+		perf_event_start(event);
 		perf_enable();
 	}
 }

From 38331f62c20456454eed9ebea2525f072c6f1d2e Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 8 Feb 2010 17:17:01 +0200
Subject: [PATCH 610/640] perf_events, x86: AMD event scheduling

This patch adds correct AMD NorthBridge event scheduling.

NB events are events measuring L3 cache, Hypertransport traffic. They are
identified by an event code >= 0xe0. They measure events on the
Northbride which is shared by all cores on a package. NB events are
counted on a shared set of counters. When a NB event is programmed in a
counter, the data actually comes from a shared counter. Thus, access to
those counters needs to be synchronized.

We implement the synchronization such that no two cores can be measuring
NB events using the same counters. Thus, we maintain a per-NB allocation
table. The available slot is propagated using the event_constraint
structure.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4b703957.0702d00a.6bf2.7b7d@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 265 ++++++++++++++++++++++++++++++-
 kernel/perf_event.c              |   5 +
 2 files changed, 267 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9173ea95f918..aa12f36e4711 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -80,6 +80,13 @@ struct event_constraint {
 	int	weight;
 };
 
+struct amd_nb {
+	int nb_id;  /* NorthBridge id */
+	int refcnt; /* reference count */
+	struct perf_event *owners[X86_PMC_IDX_MAX];
+	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
 struct cpu_hw_events {
 	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -92,6 +99,7 @@ struct cpu_hw_events {
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
 	u64			tags[X86_PMC_IDX_MAX];
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+	struct amd_nb		*amd_nb;
 };
 
 #define __EVENT_CONSTRAINT(c, n, m, w) {\
@@ -153,6 +161,8 @@ struct x86_pmu {
 
 static struct x86_pmu x86_pmu __read_mostly;
 
+static raw_spinlock_t amd_nb_lock;
+
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
@@ -802,7 +812,7 @@ static u64 amd_pmu_event_map(int hw_event)
 
 static u64 amd_pmu_raw_event(u64 hw_event)
 {
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
 #define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
 #define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
 #define K7_EVNTSEL_INV_MASK	0x000800000ULL
@@ -2210,6 +2220,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 }
 
 static struct event_constraint unconstrained;
+static struct event_constraint emptyconstraint;
 
 static struct event_constraint bts_constraint =
 	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
@@ -2249,10 +2260,146 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	return &unconstrained;
 }
 
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+	return (hwc->config & 0xe0) == 0xe0;
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	int i;
+
+	/*
+	 * only care about NB events
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return;
+
+	/*
+	 * need to scan whole list because event may not have
+	 * been assigned during scheduling
+	 *
+	 * no race condition possible because event can only
+	 * be removed on one CPU at a time AND PMU is disabled
+	 * when we come here
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (nb->owners[i] == event) {
+			cmpxchg(nb->owners+i, event, NULL);
+			break;
+		}
+	}
+}
+
+ /*
+  * AMD64 NorthBridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  *
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will eventually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling amd_put_event_constraints().
+  *
+  * Non NB events are not impacted by this restriction.
+  */
 static struct event_constraint *
 amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	return &unconstrained;
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	struct perf_event *old = NULL;
+	int max = x86_pmu.num_events;
+	int i, j, k = -1;
+
+	/*
+	 * if not NB event or no NB, then no constraints
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return &unconstrained;
+
+	/*
+	 * detect if already present, if so reuse
+	 *
+	 * cannot merge with actual allocation
+	 * because of possible holes
+	 *
+	 * event can already be present yet not assigned (in hwc->idx)
+	 * because of successive calls to x86_schedule_events() from
+	 * hw_perf_group_sched_in() without hw_perf_enable()
+	 */
+	for (i = 0; i < max; i++) {
+		/*
+		 * keep track of first free slot
+		 */
+		if (k == -1 && !nb->owners[i])
+			k = i;
+
+		/* already present, reuse */
+		if (nb->owners[i] == event)
+			goto done;
+	}
+	/*
+	 * not present, so grab a new slot
+	 * starting either at:
+	 */
+	if (hwc->idx != -1) {
+		/* previous assignment */
+		i = hwc->idx;
+	} else if (k != -1) {
+		/* start from free slot found */
+		i = k;
+	} else {
+		/*
+		 * event not found, no slot found in
+		 * first pass, try again from the
+		 * beginning
+		 */
+		i = 0;
+	}
+	j = i;
+	do {
+		old = cmpxchg(nb->owners+i, NULL, event);
+		if (!old)
+			break;
+		if (++i == max)
+			i = 0;
+	} while (i != j);
+done:
+	if (!old)
+		return &nb->event_constraints[i];
+
+	return &emptyconstraint;
 }
 
 static int x86_event_sched_in(struct perf_event *event,
@@ -2465,7 +2612,8 @@ static __initconst struct x86_pmu amd_pmu = {
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints
+	.get_event_constraints	= amd_get_event_constraints,
+	.put_event_constraints	= amd_put_event_constraints
 };
 
 static __init int p6_pmu_init(void)
@@ -2589,6 +2737,91 @@ static __init int intel_pmu_init(void)
 	return 0;
 }
 
+static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+{
+	struct amd_nb *nb;
+	int i;
+
+	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+	if (!nb)
+		return NULL;
+
+	memset(nb, 0, sizeof(*nb));
+	nb->nb_id = nb_id;
+
+	/*
+	 * initialize all possible NB constraints
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		set_bit(i, nb->event_constraints[i].idxmsk);
+		nb->event_constraints[i].weight = 1;
+	}
+	return nb;
+}
+
+static void amd_pmu_cpu_online(int cpu)
+{
+	struct cpu_hw_events *cpu1, *cpu2;
+	struct amd_nb *nb = NULL;
+	int i, nb_id;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	/*
+	 * function may be called too early in the
+	 * boot process, in which case nb_id is bogus
+	 */
+	nb_id = amd_get_nb_id(cpu);
+	if (nb_id == BAD_APICID)
+		return;
+
+	cpu1 = &per_cpu(cpu_hw_events, cpu);
+	cpu1->amd_nb = NULL;
+
+	raw_spin_lock(&amd_nb_lock);
+
+	for_each_online_cpu(i) {
+		cpu2 = &per_cpu(cpu_hw_events, i);
+		nb = cpu2->amd_nb;
+		if (!nb)
+			continue;
+		if (nb->nb_id == nb_id)
+			goto found;
+	}
+
+	nb = amd_alloc_nb(cpu, nb_id);
+	if (!nb) {
+		pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
+		raw_spin_unlock(&amd_nb_lock);
+		return;
+	}
+found:
+	nb->refcnt++;
+	cpu1->amd_nb = nb;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
+static void amd_pmu_cpu_offline(int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	raw_spin_lock(&amd_nb_lock);
+
+	if (--cpuhw->amd_nb->refcnt == 0)
+		kfree(cpuhw->amd_nb);
+
+	cpuhw->amd_nb = NULL;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
 static __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
@@ -2601,6 +2834,11 @@ static __init int amd_pmu_init(void)
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
 
+	/*
+	 * explicitly initialize the boot cpu, other cpus will get
+	 * the cpu hotplug callbacks from smp_init()
+	 */
+	amd_pmu_cpu_online(smp_processor_id());
 	return 0;
 }
 
@@ -2934,4 +3172,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 void hw_perf_event_setup_online(int cpu)
 {
 	init_debug_store_on_cpu(cpu);
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_pmu_cpu_online(cpu);
+		break;
+	default:
+		return;
+	}
+}
+
+void hw_perf_event_setup_offline(int cpu)
+{
+	init_debug_store_on_cpu(cpu);
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_pmu_cpu_offline(cpu);
+		break;
+	default:
+		return;
+	}
 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 74c60021cdbc..fb4e56eb58f4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -98,6 +98,7 @@ void __weak hw_perf_enable(void)		{ barrier(); }
 
 void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
 void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
+void __weak hw_perf_event_setup_offline(int cpu)	{ barrier(); }
 
 int __weak
 hw_perf_group_sched_in(struct perf_event *group_leader,
@@ -5462,6 +5463,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 		perf_event_exit_cpu(cpu);
 		break;
 
+	case CPU_DEAD:
+		hw_perf_event_setup_offline(cpu);
+		break;
+
 	default:
 		break;
 	}

From 6e37738a2fac964583debe91099bc3248554f6e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 11 Feb 2010 13:21:58 +0100
Subject: [PATCH 611/640] perf_events: Simplify code by removing cpu argument
 to hw_perf_group_sched_in()

Since the cpu argument to hw_perf_group_sched_in() is always
smp_processor_id(), simplify the code a little by removing this argument
and using the current cpu where needed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1265890918.5396.3.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c | 10 +++----
 arch/sparc/kernel/perf_event.c   | 10 +++----
 arch/x86/kernel/cpu/perf_event.c | 18 ++++++-------
 include/linux/perf_event.h       |  2 +-
 kernel/perf_event.c              | 45 +++++++++++++-------------------
 5 files changed, 38 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 1eb85fbf53a5..b6cf8f1f4d35 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -718,10 +718,10 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-static void event_sched_in(struct perf_event *event, int cpu)
+static void event_sched_in(struct perf_event *event)
 {
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;
+	event->oncpu = smp_processor_id();
 	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 	if (is_software_event(event))
 		event->pmu->enable(event);
@@ -735,7 +735,7 @@ static void event_sched_in(struct perf_event *event, int cpu)
  */
 int hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu)
+	       struct perf_event_context *ctx)
 {
 	struct cpu_hw_events *cpuhw;
 	long i, n, n0;
@@ -766,10 +766,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 	cpuctx->active_oncpu += n;
 	n = 1;
-	event_sched_in(group_leader, cpu);
+	event_sched_in(group_leader);
 	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
 		if (sub->state != PERF_EVENT_STATE_OFF) {
-			event_sched_in(sub, cpu);
+			event_sched_in(sub);
 			++n;
 		}
 	}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index e856456ec02f..9f2b2bac8b2b 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -980,10 +980,10 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-static void event_sched_in(struct perf_event *event, int cpu)
+static void event_sched_in(struct perf_event *event)
 {
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;
+	event->oncpu = smp_processor_id();
 	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 	if (is_software_event(event))
 		event->pmu->enable(event);
@@ -991,7 +991,7 @@ static void event_sched_in(struct perf_event *event, int cpu)
 
 int hw_perf_group_sched_in(struct perf_event *group_leader,
 			   struct perf_cpu_context *cpuctx,
-			   struct perf_event_context *ctx, int cpu)
+			   struct perf_event_context *ctx)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *sub;
@@ -1015,10 +1015,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
 
 	cpuctx->active_oncpu += n;
 	n = 1;
-	event_sched_in(group_leader, cpu);
+	event_sched_in(group_leader);
 	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
 		if (sub->state != PERF_EVENT_STATE_OFF) {
-			event_sched_in(sub, cpu);
+			event_sched_in(sub);
 			n++;
 		}
 	}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index aa12f36e4711..ad096562d694 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2403,12 +2403,12 @@ done:
 }
 
 static int x86_event_sched_in(struct perf_event *event,
-			  struct perf_cpu_context *cpuctx, int cpu)
+			  struct perf_cpu_context *cpuctx)
 {
 	int ret = 0;
 
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;
+	event->oncpu = smp_processor_id();
 	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 
 	if (!is_x86_event(event))
@@ -2424,7 +2424,7 @@ static int x86_event_sched_in(struct perf_event *event,
 }
 
 static void x86_event_sched_out(struct perf_event *event,
-			    struct perf_cpu_context *cpuctx, int cpu)
+			    struct perf_cpu_context *cpuctx)
 {
 	event->state = PERF_EVENT_STATE_INACTIVE;
 	event->oncpu = -1;
@@ -2452,9 +2452,9 @@ static void x86_event_sched_out(struct perf_event *event,
  */
 int hw_perf_group_sched_in(struct perf_event *leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu)
+	       struct perf_event_context *ctx)
 {
-	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *sub;
 	int assign[X86_PMC_IDX_MAX];
 	int n0, n1, ret;
@@ -2468,14 +2468,14 @@ int hw_perf_group_sched_in(struct perf_event *leader,
 	if (ret)
 		return ret;
 
-	ret = x86_event_sched_in(leader, cpuctx, cpu);
+	ret = x86_event_sched_in(leader, cpuctx);
 	if (ret)
 		return ret;
 
 	n1 = 1;
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		if (sub->state > PERF_EVENT_STATE_OFF) {
-			ret = x86_event_sched_in(sub, cpuctx, cpu);
+			ret = x86_event_sched_in(sub, cpuctx);
 			if (ret)
 				goto undo;
 			++n1;
@@ -2500,11 +2500,11 @@ int hw_perf_group_sched_in(struct perf_event *leader,
 	 */
 	return 1;
 undo:
-	x86_event_sched_out(leader, cpuctx, cpu);
+	x86_event_sched_out(leader, cpuctx);
 	n0  = 1;
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
-			x86_event_sched_out(sub, cpuctx, cpu);
+			x86_event_sched_out(sub, cpuctx);
 			if (++n0 == n1)
 				break;
 		}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b08dfdad08cb..d0e072c5b58a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -772,7 +772,7 @@ extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern int hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu);
+	       struct perf_event_context *ctx);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fb4e56eb58f4..05b6c6b825e3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -103,7 +103,7 @@ void __weak hw_perf_event_setup_offline(int cpu)	{ barrier(); }
 int __weak
 hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu)
+	       struct perf_event_context *ctx)
 {
 	return 0;
 }
@@ -633,14 +633,13 @@ void perf_event_disable(struct perf_event *event)
 static int
 event_sched_in(struct perf_event *event,
 		 struct perf_cpu_context *cpuctx,
-		 struct perf_event_context *ctx,
-		 int cpu)
+		 struct perf_event_context *ctx)
 {
 	if (event->state <= PERF_EVENT_STATE_OFF)
 		return 0;
 
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+	event->oncpu = smp_processor_id();
 	/*
 	 * The new state must be visible before we turn it on in the hardware:
 	 */
@@ -667,8 +666,7 @@ event_sched_in(struct perf_event *event,
 static int
 group_sched_in(struct perf_event *group_event,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx,
-	       int cpu)
+	       struct perf_event_context *ctx)
 {
 	struct perf_event *event, *partial_group;
 	int ret;
@@ -676,18 +674,18 @@ group_sched_in(struct perf_event *group_event,
 	if (group_event->state == PERF_EVENT_STATE_OFF)
 		return 0;
 
-	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu);
+	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx);
 	if (ret)
 		return ret < 0 ? ret : 0;
 
-	if (event_sched_in(group_event, cpuctx, ctx, cpu))
+	if (event_sched_in(group_event, cpuctx, ctx))
 		return -EAGAIN;
 
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
-		if (event_sched_in(event, cpuctx, ctx, cpu)) {
+		if (event_sched_in(event, cpuctx, ctx)) {
 			partial_group = event;
 			goto group_error;
 		}
@@ -761,7 +759,6 @@ static void __perf_install_in_context(void *info)
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *leader = event->group_leader;
-	int cpu = smp_processor_id();
 	int err;
 
 	/*
@@ -808,7 +805,7 @@ static void __perf_install_in_context(void *info)
 	if (!group_can_go_on(event, cpuctx, 1))
 		err = -EEXIST;
 	else
-		err = event_sched_in(event, cpuctx, ctx, cpu);
+		err = event_sched_in(event, cpuctx, ctx);
 
 	if (err) {
 		/*
@@ -950,11 +947,9 @@ static void __perf_event_enable(void *info)
 	} else {
 		perf_disable();
 		if (event == leader)
-			err = group_sched_in(event, cpuctx, ctx,
-					     smp_processor_id());
+			err = group_sched_in(event, cpuctx, ctx);
 		else
-			err = event_sched_in(event, cpuctx, ctx,
-					       smp_processor_id());
+			err = event_sched_in(event, cpuctx, ctx);
 		perf_enable();
 	}
 
@@ -1281,19 +1276,18 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
 
 static void
 ctx_pinned_sched_in(struct perf_event_context *ctx,
-		    struct perf_cpu_context *cpuctx,
-		    int cpu)
+		    struct perf_cpu_context *cpuctx)
 {
 	struct perf_event *event;
 
 	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
 		if (event->state <= PERF_EVENT_STATE_OFF)
 			continue;
-		if (event->cpu != -1 && event->cpu != cpu)
+		if (event->cpu != -1 && event->cpu != smp_processor_id())
 			continue;
 
 		if (group_can_go_on(event, cpuctx, 1))
-			group_sched_in(event, cpuctx, ctx, cpu);
+			group_sched_in(event, cpuctx, ctx);
 
 		/*
 		 * If this pinned group hasn't been scheduled,
@@ -1308,8 +1302,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
 
 static void
 ctx_flexible_sched_in(struct perf_event_context *ctx,
-		      struct perf_cpu_context *cpuctx,
-		      int cpu)
+		      struct perf_cpu_context *cpuctx)
 {
 	struct perf_event *event;
 	int can_add_hw = 1;
@@ -1322,11 +1315,11 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of events:
 		 */
-		if (event->cpu != -1 && event->cpu != cpu)
+		if (event->cpu != -1 && event->cpu != smp_processor_id())
 			continue;
 
 		if (group_can_go_on(event, cpuctx, can_add_hw))
-			if (group_sched_in(event, cpuctx, ctx, cpu))
+			if (group_sched_in(event, cpuctx, ctx))
 				can_add_hw = 0;
 	}
 }
@@ -1336,8 +1329,6 @@ ctx_sched_in(struct perf_event_context *ctx,
 	     struct perf_cpu_context *cpuctx,
 	     enum event_type_t event_type)
 {
-	int cpu = smp_processor_id();
-
 	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	if (likely(!ctx->nr_events))
@@ -1352,11 +1343,11 @@ ctx_sched_in(struct perf_event_context *ctx,
 	 * in order to give them the best chance of going on.
 	 */
 	if (event_type & EVENT_PINNED)
-		ctx_pinned_sched_in(ctx, cpuctx, cpu);
+		ctx_pinned_sched_in(ctx, cpuctx);
 
 	/* Then walk through the lower prio flexible groups */
 	if (event_type & EVENT_FLEXIBLE)
-		ctx_flexible_sched_in(ctx, cpuctx, cpu);
+		ctx_flexible_sched_in(ctx, cpuctx);
 
 	perf_enable();
  out:

From 6667661df4bc76083edf1e08831c20f64429709d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Feb 2010 16:10:48 +0100
Subject: [PATCH 612/640] perf_events, x86: Remove superflous MSR writes

We re-program the event control register every time we reset the count,
this appears to be superflous, hence remove it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ad096562d694..dd09ccc867d3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2009,9 +2009,6 @@ static int intel_pmu_save_and_restart(struct perf_event *event)
 	x86_perf_event_update(event, hwc, idx);
 	ret = x86_perf_event_set_period(event, hwc, idx);
 
-	if (event->state == PERF_EVENT_STATE_ACTIVE)
-		intel_pmu_enable_event(hwc, idx);
-
 	return ret;
 }
 

From 18b4a4d59e97e7ff13ee84b5bec79f3fc70a9f0a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 10 Feb 2010 10:03:34 +0100
Subject: [PATCH 613/640] oprofile: remove tracing build dependency

The commit

 1155de4 ring-buffer: Make it generally available

already made ring-buffer available without the TRACING option
enabled. This patch removes the TRACING dependency from oprofile.

Fixes also oprofile configuration on ia64.

The patch also applies to the 2.6.32-stable kernel.

Reported-by: Tony Jones <tonyj@suse.de>
Cc: stable@kernel.org
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/Kconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 9d055b4f0585..25e69f727a2e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -6,8 +6,6 @@ config OPROFILE
 	tristate "OProfile system profiling (EXPERIMENTAL)"
 	depends on PROFILING
 	depends on HAVE_OPROFILE
-	depends on TRACING_SUPPORT
-	select TRACING
 	select RING_BUFFER
 	select RING_BUFFER_ALLOW_SWAP
 	help

From b309a294e5b24692d0f7ea1defa168074cea619e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 26 Feb 2010 15:01:23 +0100
Subject: [PATCH 614/640] oprofile: remove EXPERIMENTAL from the config option
 description

OProfile is already used for a long time and no longer experimental.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/Kconfig | 2 +-
 init/Kconfig | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 25e69f727a2e..d67787241813 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -3,7 +3,7 @@
 #
 
 config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
+	tristate "OProfile system profiling"
 	depends on PROFILING
 	depends on HAVE_OPROFILE
 	select RING_BUFFER
diff --git a/init/Kconfig b/init/Kconfig
index d95ca7cd5d45..b7c583956107 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1112,7 +1112,7 @@ config MMAP_ALLOW_UNINITIALIZED
 	  See Documentation/nommu-mmap.txt for more information.
 
 config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
+	bool "Profiling support"
 	help
 	  Say Y here to enable the extended profiling support mechanisms used
 	  by profilers such as OProfile.

From 013cfc50672bbb638796545231683231647edb07 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 28 Jan 2010 18:05:26 +0100
Subject: [PATCH 615/640] oprofile/x86: remove OPROFILE_IBS config option

OProfile support for IBS is now for several versions in the
kernel. The feature is stable now and the code can be activated
permanently.

As a side effect IBS now works also on nosmp configs.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/Kconfig                     | 14 --------------
 arch/x86/oprofile/op_model_amd.c | 31 +------------------------------
 2 files changed, 1 insertion(+), 44 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index d67787241813..06a13729c8df 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -15,20 +15,6 @@ config OPROFILE
 
 	  If unsure, say N.
 
-config OPROFILE_IBS
-	bool "OProfile AMD IBS support (EXPERIMENTAL)"
-	default n
-	depends on OPROFILE && SMP && X86
-	help
-          Instruction-Based Sampling (IBS) is a new profiling
-          technique that provides rich, precise program performance
-          information. IBS is introduced by AMD Family10h processors
-          (AMD Opteron Quad-Core processor "Barcelona") to overcome
-          the limitations of conventional performance counter
-          sampling.
-
-	  If unsure, say N.
-
 config OPROFILE_EVENT_MULTIPLEX
 	bool "OProfile multiplexing support (EXPERIMENTAL)"
 	default n
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 39686c29f03a..2b9c68d868ed 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,7 @@
 #include <asm/ptrace.h>
 #include <asm/msr.h>
 #include <asm/nmi.h>
+#include <asm/apic.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -43,8 +44,6 @@
 
 static unsigned long reset_value[NUM_VIRT_COUNTERS];
 
-#ifdef CONFIG_OPROFILE_IBS
-
 /* IbsFetchCtl bits/masks */
 #define IBS_FETCH_RAND_EN		(1ULL<<57)
 #define IBS_FETCH_VAL			(1ULL<<49)
@@ -72,8 +71,6 @@ struct op_ibs_config {
 
 static struct op_ibs_config ibs_config;
 
-#endif
-
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
 static void op_mux_fill_in_addresses(struct op_msrs * const msrs)
@@ -185,8 +182,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	}
 }
 
-#ifdef CONFIG_OPROFILE_IBS
-
 static inline void
 op_amd_handle_ibs(struct pt_regs * const regs,
 		  struct op_msrs const * const msrs)
@@ -272,15 +267,6 @@ static void op_amd_stop_ibs(void)
 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
-#else
-
-static inline void op_amd_handle_ibs(struct pt_regs * const regs,
-				    struct op_msrs const * const msrs) { }
-static inline void op_amd_start_ibs(void) { }
-static inline void op_amd_stop_ibs(void) { }
-
-#endif
-
 static int op_amd_check_ctrs(struct pt_regs * const regs,
 			     struct op_msrs const * const msrs)
 {
@@ -355,8 +341,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 	}
 }
 
-#ifdef CONFIG_OPROFILE_IBS
-
 static u8 ibs_eilvt_off;
 
 static inline void apic_init_ibs_nmi_per_cpu(void *arg)
@@ -507,19 +491,6 @@ static void op_amd_exit(void)
 	ibs_exit();
 }
 
-#else
-
-/* no IBS support */
-
-static int op_amd_init(struct oprofile_operations *ops)
-{
-	return 0;
-}
-
-static void op_amd_exit(void) {}
-
-#endif /* CONFIG_OPROFILE_IBS */
-
 struct op_x86_model_spec op_amd_spec = {
 	.num_counters		= NUM_COUNTERS,
 	.num_controls		= NUM_CONTROLS,

From 89baaaa98a10cad5cc8516c7208b02d9fc711890 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 28 Jan 2010 16:50:45 +0100
Subject: [PATCH 616/640] oprofile/x86: remove node check in AMD IBS
 initialization

Standard AMD systems have the same number of nodes as there are
northbridge devices. However, there may kernel configurations
(especially for 32 bit) or system setups exist, where the node number
is different or it can not be detected properly. Thus the check is not
reliable and may fail though IBS setup was fine. For this reason it is
better to remove the check.

Cc: stable <stable@kernel.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 2b9c68d868ed..4eb30715b1d5 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -389,16 +389,6 @@ static int init_ibs_nmi(void)
 		return 1;
 	}
 
-#ifdef CONFIG_NUMA
-	/* Sanity check */
-	/* Works only for 64bit with proper numa implementation. */
-	if (nodes != num_possible_nodes()) {
-		printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
-			"found: %d, expected %d",
-			nodes, num_possible_nodes());
-		return 1;
-	}
-#endif
 	return 0;
 }
 

From 64683da6643e8c6c93f1f99548399b08c029fd13 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 4 Feb 2010 10:57:23 +0100
Subject: [PATCH 617/640] oprofile/x86: implement IBS cpuid feature detection

This patch adds IBS feature detection using cpuid flags. An IBS
capability mask is introduced to test for certain IBS features. The
bit mask is the same as for IBS cpuid feature flags (Fn8000_001B_EAX),
but bit 0 is used to indicate the existence of IBS.

The patch also changes the handling of the IbsOpCntCtl bit (periodic
op counter count control). The oprofilefs file for this feature
(ibs_op/dispatched_ops) will be only exposed if the feature is
available, also the default for the bit is set to count clock cycles.

In general, the userland can detect the availability of a feature by
checking for the corresponding file in oprofilefs. If it exists, the
feature also exists. This may lead to a dynamic file layout depending
on the cpu type with that the userland has to deal with. Current
opcontrol is compatible.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 80 +++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 17 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 4eb30715b1d5..6557683c190e 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -23,6 +23,8 @@
 #include <asm/msr.h>
 #include <asm/nmi.h>
 #include <asm/apic.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -58,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
 #define IBS_FETCH_SIZE			6
 #define IBS_OP_SIZE			12
 
-static int has_ibs;	/* AMD Family10h and later */
+static u32 ibs_caps;
 
 struct op_ibs_config {
 	unsigned long op_enabled;
@@ -71,6 +73,40 @@ struct op_ibs_config {
 
 static struct op_ibs_config ibs_config;
 
+/*
+ * IBS cpuid feature detection
+ */
+
+#define IBS_CPUID_FEATURES      0x8000001b
+
+/*
+ * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
+ * bit 0 is used to indicate the existence of IBS.
+ */
+#define IBS_CAPS_AVAIL			(1LL<<0)
+#define IBS_CAPS_OPCNT			(1LL<<4)
+
+static u32 get_ibs_caps(void)
+{
+	u32 ibs_caps;
+	unsigned int max_level;
+
+	if (!boot_cpu_has(X86_FEATURE_IBS))
+		return 0;
+
+	/* check IBS cpuid feature flags */
+	max_level = cpuid_eax(0x80000000);
+	if (max_level < IBS_CPUID_FEATURES)
+		return IBS_CAPS_AVAIL;
+
+	ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
+	if (!(ibs_caps & IBS_CAPS_AVAIL))
+		/* cpuid flags not valid */
+		return IBS_CAPS_AVAIL;
+
+	return ibs_caps;
+}
+
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
 static void op_mux_fill_in_addresses(struct op_msrs * const msrs)
@@ -189,7 +225,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 	u64 val, ctl;
 	struct op_entry entry;
 
-	if (!has_ibs)
+	if (!ibs_caps)
 		return;
 
 	if (ibs_config.fetch_enabled) {
@@ -241,16 +277,21 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 static inline void op_amd_start_ibs(void)
 {
 	u64 val;
-	if (has_ibs && ibs_config.fetch_enabled) {
+
+	if (!ibs_caps)
+		return;
+
+	if (ibs_config.fetch_enabled) {
 		val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
 		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
 		val |= IBS_FETCH_ENABLE;
 		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
 	}
 
-	if (has_ibs && ibs_config.op_enabled) {
+	if (ibs_config.op_enabled) {
 		val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
-		val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+		if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
+			val |= IBS_OP_CNT_CTL;
 		val |= IBS_OP_ENABLE;
 		wrmsrl(MSR_AMD64_IBSOPCTL, val);
 	}
@@ -258,11 +299,14 @@ static inline void op_amd_start_ibs(void)
 
 static void op_amd_stop_ibs(void)
 {
-	if (has_ibs && ibs_config.fetch_enabled)
+	if (!ibs_caps)
+		return;
+
+	if (ibs_config.fetch_enabled)
 		/* clear max count and enable */
 		wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
 
-	if (has_ibs && ibs_config.op_enabled)
+	if (ibs_config.op_enabled)
 		/* clear max count and enable */
 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
@@ -395,29 +439,30 @@ static int init_ibs_nmi(void)
 /* uninitialize the APIC for the IBS interrupts if needed */
 static void clear_ibs_nmi(void)
 {
-	if (has_ibs)
+	if (ibs_caps)
 		on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
 }
 
 /* initialize the APIC for the IBS interrupts if available */
 static void ibs_init(void)
 {
-	has_ibs = boot_cpu_has(X86_FEATURE_IBS);
+	ibs_caps = get_ibs_caps();
 
-	if (!has_ibs)
+	if (!ibs_caps)
 		return;
 
 	if (init_ibs_nmi()) {
-		has_ibs = 0;
+		ibs_caps = 0;
 		return;
 	}
 
-	printk(KERN_INFO "oprofile: AMD IBS detected\n");
+	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
+	       (unsigned)ibs_caps);
 }
 
 static void ibs_exit(void)
 {
-	if (!has_ibs)
+	if (!ibs_caps)
 		return;
 
 	clear_ibs_nmi();
@@ -437,7 +482,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 	if (ret)
 		return ret;
 
-	if (!has_ibs)
+	if (!ibs_caps)
 		return ret;
 
 	/* model specific files */
@@ -447,7 +492,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 	ibs_config.fetch_enabled = 0;
 	ibs_config.max_cnt_op = 250000;
 	ibs_config.op_enabled = 0;
-	ibs_config.dispatched_ops = 1;
+	ibs_config.dispatched_ops = 0;
 
 	dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
 	oprofilefs_create_ulong(sb, dir, "enable",
@@ -462,8 +507,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 				&ibs_config.op_enabled);
 	oprofilefs_create_ulong(sb, dir, "max_count",
 				&ibs_config.max_cnt_op);
-	oprofilefs_create_ulong(sb, dir, "dispatched_ops",
-				&ibs_config.dispatched_ops);
+	if (ibs_caps & IBS_CAPS_OPCNT)
+		oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+					&ibs_config.dispatched_ops);
 
 	return 0;
 }

From f125be1469303f7b9324447f251d74a0da24952f Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Mon, 18 Jan 2010 11:25:45 -0600
Subject: [PATCH 618/640] oprofile/x86: implement lsfr pseudo-random number
 generator for IBS

This patch implements a linear feedback shift register (LFSR) for
pseudo-random number generation for IBS.

For IBS measurements it would be good to minimize memory traffic in
the interrupt handler since every access pollutes the data
caches. Computing a maximal period LFSR just needs shifts and ORs.

The LFSR method is good enough to randomize the ops at low
overhead. 16 pseudo-random bits are enough for the implementation and
it doesn't matter that the pattern repeats with a fairly short
cycle. It only needs to break up (hard) periodic sampling behavior.

The logic was designed by Paul Drongowski.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 6557683c190e..97c84ebe3f24 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -218,6 +218,29 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	}
 }
 
+/*
+ * 16-bit Linear Feedback Shift Register (LFSR)
+ *
+ *                       16   14   13    11
+ * Feedback polynomial = X  + X  + X  +  X  + 1
+ */
+static unsigned int lfsr_random(void)
+{
+	static unsigned int lfsr_value = 0xF00D;
+	unsigned int bit;
+
+	/* Compute next bit to shift in */
+	bit = ((lfsr_value >> 0) ^
+	       (lfsr_value >> 2) ^
+	       (lfsr_value >> 3) ^
+	       (lfsr_value >> 5)) & 0x0001;
+
+	/* Advance to next register value */
+	lfsr_value = (lfsr_value >> 1) | (bit << 15);
+
+	return lfsr_value;
+}
+
 static inline void
 op_amd_handle_ibs(struct pt_regs * const regs,
 		  struct op_msrs const * const msrs)

From ba52078e1917c5116c0802298d88ad0e54a6728b Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 23 Feb 2010 15:46:49 +0100
Subject: [PATCH 619/640] oprofile/x86: implement randomization for IBS
 periodic op counter

IBS selects an op (execution operation) for sampling by counting
either cycles or dispatched ops. Better statistical samples can be
produced by adding a software generated random offset to the periodic
op counter value with each sample.

This patch adds software randomization to the IBS periodic op
counter. The lower 12 bits of the 20 bit counter are
randomized. IbsOpCurCnt is initialized with a 12 bit random value.

There is a work around if the hw can not write to IbsOpCurCnt. Then
the lower 8 bits of the 16 bit IbsOpMaxCnt [15:0] value are randomized
in the range of -128 to +127 by adding/subtracting an offset to the
maximum count (IbsOpMaxCnt).

The linear feedback shift register (LFSR) algorithm is used for
pseudo-random number generation to have low impact to the memory
system.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 69 +++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 97c84ebe3f24..a9d194734a8e 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -52,7 +52,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
 #define IBS_FETCH_ENABLE		(1ULL<<48)
 #define IBS_FETCH_CNT_MASK		0xFFFF0000ULL
 
-/*IbsOpCtl bits */
+/* IbsOpCtl bits */
 #define IBS_OP_CNT_CTL			(1ULL<<19)
 #define IBS_OP_VAL			(1ULL<<18)
 #define IBS_OP_ENABLE			(1ULL<<17)
@@ -72,6 +72,7 @@ struct op_ibs_config {
 };
 
 static struct op_ibs_config ibs_config;
+static u64 ibs_op_ctl;
 
 /*
  * IBS cpuid feature detection
@@ -84,8 +85,16 @@ static struct op_ibs_config ibs_config;
  * bit 0 is used to indicate the existence of IBS.
  */
 #define IBS_CAPS_AVAIL			(1LL<<0)
+#define IBS_CAPS_RDWROPCNT		(1LL<<3)
 #define IBS_CAPS_OPCNT			(1LL<<4)
 
+/*
+ * IBS randomization macros
+ */
+#define IBS_RANDOM_BITS			12
+#define IBS_RANDOM_MASK			((1ULL << IBS_RANDOM_BITS) - 1)
+#define IBS_RANDOM_MAXCNT_OFFSET	(1ULL << (IBS_RANDOM_BITS - 5))
+
 static u32 get_ibs_caps(void)
 {
 	u32 ibs_caps;
@@ -241,6 +250,38 @@ static unsigned int lfsr_random(void)
 	return lfsr_value;
 }
 
+/*
+ * IBS software randomization
+ *
+ * The IBS periodic op counter is randomized in software. The lower 12
+ * bits of the 20 bit counter are randomized. IbsOpCurCnt is
+ * initialized with a 12 bit random value.
+ */
+static inline u64 op_amd_randomize_ibs_op(u64 val)
+{
+	unsigned int random = lfsr_random();
+
+	if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
+		/*
+		 * Work around if the hw can not write to IbsOpCurCnt
+		 *
+		 * Randomize the lower 8 bits of the 16 bit
+		 * IbsOpMaxCnt [15:0] value in the range of -128 to
+		 * +127 by adding/subtracting an offset to the
+		 * maximum count (IbsOpMaxCnt).
+		 *
+		 * To avoid over or underflows and protect upper bits
+		 * starting at bit 16, the initial value for
+		 * IbsOpMaxCnt must fit in the range from 0x0081 to
+		 * 0xff80.
+		 */
+		val += (s8)(random >> 4);
+	else
+		val |= (u64)(random & IBS_RANDOM_MASK) << 32;
+
+	return val;
+}
+
 static inline void
 op_amd_handle_ibs(struct pt_regs * const regs,
 		  struct op_msrs const * const msrs)
@@ -290,8 +331,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
-			ctl &= ~IBS_OP_VAL & 0xFFFFFFFF;
-			ctl |= IBS_OP_ENABLE;
+			ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
 			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
 		}
 	}
@@ -312,10 +352,27 @@ static inline void op_amd_start_ibs(void)
 	}
 
 	if (ibs_config.op_enabled) {
-		val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
+		ibs_op_ctl = ibs_config.max_cnt_op >> 4;
+		if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
+			/*
+			 * IbsOpCurCnt not supported.  See
+			 * op_amd_randomize_ibs_op() for details.
+			 */
+			ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
+		} else {
+			/*
+			 * The start value is randomized with a
+			 * positive offset, we need to compensate it
+			 * with the half of the randomized range. Also
+			 * avoid underflows.
+			 */
+			ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
+					 0xFFFFULL);
+		}
 		if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
-			val |= IBS_OP_CNT_CTL;
-		val |= IBS_OP_ENABLE;
+			ibs_op_ctl |= IBS_OP_CNT_CTL;
+		ibs_op_ctl |= IBS_OP_ENABLE;
+		val = op_amd_randomize_ibs_op(ibs_op_ctl);
 		wrmsrl(MSR_AMD64_IBSOPCTL, val);
 	}
 }

From 98a2e73a0690b3610f049a64154d8145e5771713 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Tue, 23 Feb 2010 18:14:58 +0100
Subject: [PATCH 620/640] oprofile/x86: warn user if a counter is already
 active

This patch generates a warning if a counter is already active.

Implemented for AMD and P6 models. P4 is not supported.

Cc: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Cc: Shashi Belur <shashi-kiran.belur@hp.com>
Cc: Tony Jones <tonyj@suse.de>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c  | 11 ++++++++++-
 arch/x86/oprofile/op_model_ppro.c | 11 ++++++++++-
 arch/x86/oprofile/op_x86_model.h  | 11 +++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index a9d194734a8e..ef9d735dea35 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -194,9 +194,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* clear all counters */
 	for (i = 0; i < NUM_CONTROLS; ++i) {
-		if (unlikely(!msrs->controls[i].addr))
+		if (unlikely(!msrs->controls[i].addr)) {
+			if (counter_config[i].enabled && !smp_processor_id())
+				/*
+				 * counter is reserved, this is on all
+				 * cpus, so report only for cpu #0
+				 */
+				op_x86_warn_reserved(i);
 			continue;
+		}
 		rdmsrl(msrs->controls[i].addr, val);
+		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+			op_x86_warn_in_use(i);
 		val &= model->reserved;
 		wrmsrl(msrs->controls[i].addr, val);
 	}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 8eb05878554c..c344525ebb55 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -82,9 +82,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* clear all counters */
 	for (i = 0; i < num_counters; ++i) {
-		if (unlikely(!msrs->controls[i].addr))
+		if (unlikely(!msrs->controls[i].addr)) {
+			if (counter_config[i].enabled && !smp_processor_id())
+				/*
+				 * counter is reserved, this is on all
+				 * cpus, so report only for cpu #0
+				 */
+				op_x86_warn_reserved(i);
 			continue;
+		}
 		rdmsrl(msrs->controls[i].addr, val);
+		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+			op_x86_warn_in_use(i);
 		val &= model->reserved;
 		wrmsrl(msrs->controls[i].addr, val);
 	}
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 7b8e75d16081..59fa2bdb0da3 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -57,6 +57,17 @@ struct op_x86_model_spec {
 
 struct op_counter_config;
 
+static inline void op_x86_warn_in_use(int counter)
+{
+	pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
+		   counter, smp_processor_id());
+}
+
+static inline void op_x86_warn_reserved(int counter)
+{
+	pr_warning("oprofile: counter #%d is already reserved\n", counter);
+}
+
 extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 			   struct op_counter_config *counter_config);
 extern int op_x86_phys_to_virt(int phys);

From 8588d1067147e14d1dd521fbadd1d2564f8cc794 Mon Sep 17 00:00:00 2001
From: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Date: Tue, 23 Feb 2010 18:14:58 +0100
Subject: [PATCH 621/640] oprofile/x86: add comment to counter-in-use warning

Currently, oprofile fails silently on platforms where a non-OS entity
such as the system firmware "enables" and uses a performance
counter. There is a warning in the code for this case.

The warning indicates an already running counter. If oprofile doesn't
collect data, then try using a different performance counter on your
platform to monitor the desired event. Delete the counter from the
desired event by editing the

 /usr/share/oprofile/<cpu_type>/<cpu>/events

file. If the event cannot be monitored by any other counter, contact
your hardware or BIOS vendor.

Cc: Shashi Belur <shashi-kiran.belur@hp.com>
Cc: Tony Jones <tonyj@suse.de>
Signed-off-by: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_x86_model.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 59fa2bdb0da3..ff82a755edd4 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -59,6 +59,15 @@ struct op_counter_config;
 
 static inline void op_x86_warn_in_use(int counter)
 {
+	/*
+	 * The warning indicates an already running counter. If
+	 * oprofile doesn't collect data, then try using a different
+	 * performance counter on your platform to monitor the desired
+	 * event. Delete counter #%d from the desired event by editing
+	 * the /usr/share/oprofile/%s/<cpu>/events file. If the event
+	 * cannot be monitored by any other counter, contact your
+	 * hardware or BIOS vendor.
+	 */
 	pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
 		   counter, smp_processor_id());
 }

From 68dc819ce829f7e7977a56524e710473bdb55115 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 25 Feb 2010 19:16:46 +0100
Subject: [PATCH 622/640] oprofile/x86: fix perfctr nmi reservation for
 mulitplexing

Multiple virtual counters share one physical counter. The reservation
of virtual counters fails due to duplicate allocation of the same
counter. The counters are already reserved. Thus, virtual counter
reservation may removed at all. This also makes the code easier.

Cc: stable@kernel.org
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c      | 11 ++++++-----
 arch/x86/oprofile/op_model_amd.c | 19 -------------------
 2 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3347f696edc7..7170d1e29896 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
 		if (counter_config[i].enabled) {
 			multiplex[i].saved = -(u64)counter_config[i].count;
 		} else {
-			multiplex[i].addr  = 0;
 			multiplex[i].saved = 0;
 		}
 	}
@@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
 
 static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
 {
+	struct op_msr *counters = msrs->counters;
 	struct op_msr *multiplex = msrs->multiplex;
 	int i;
 
 	for (i = 0; i < model->num_counters; ++i) {
 		int virt = op_x86_phys_to_virt(i);
-		if (multiplex[virt].addr)
-			rdmsrl(multiplex[virt].addr, multiplex[virt].saved);
+		if (counters[i].addr)
+			rdmsrl(counters[i].addr, multiplex[virt].saved);
 	}
 }
 
 static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
 {
+	struct op_msr *counters = msrs->counters;
 	struct op_msr *multiplex = msrs->multiplex;
 	int i;
 
 	for (i = 0; i < model->num_counters; ++i) {
 		int virt = op_x86_phys_to_virt(i);
-		if (multiplex[virt].addr)
-			wrmsrl(multiplex[virt].addr, multiplex[virt].saved);
+		if (counters[i].addr)
+			wrmsrl(counters[i].addr, multiplex[virt].saved);
 	}
 }
 
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index ef9d735dea35..2aab018a7a56 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -118,19 +118,6 @@ static u32 get_ibs_caps(void)
 
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
-static void op_mux_fill_in_addresses(struct op_msrs * const msrs)
-{
-	int i;
-
-	for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
-		int hw_counter = op_x86_virt_to_phys(i);
-		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
-			msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
-		else
-			msrs->multiplex[i].addr = 0;
-	}
-}
-
 static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
 			       struct op_msrs const * const msrs)
 {
@@ -149,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
 	}
 }
 
-#else
-
-static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
-
 #endif
 
 /* functions for op_amd_spec */
@@ -174,8 +157,6 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 		else
 			msrs->controls[i].addr = 0;
 	}
-
-	op_mux_fill_in_addresses(msrs);
 }
 
 static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,

From c17c8fbf349482e89b57d1b800e83e9f4cf40c47 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 25 Feb 2010 20:20:25 +0100
Subject: [PATCH 623/640] oprofile/x86: use kzalloc() instead of kmalloc()

Cc: stable@kernel.org
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c       | 6 +++---
 arch/x86/oprofile/op_model_amd.c  | 4 ----
 arch/x86/oprofile/op_model_p4.c   | 6 ------
 arch/x86/oprofile/op_model_ppro.c | 6 +-----
 4 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 7170d1e29896..2c505ee71014 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -159,7 +159,7 @@ static int nmi_setup_mux(void)
 
 	for_each_possible_cpu(i) {
 		per_cpu(cpu_msrs, i).multiplex =
-			kmalloc(multiplex_size, GFP_KERNEL);
+			kzalloc(multiplex_size, GFP_KERNEL);
 		if (!per_cpu(cpu_msrs, i).multiplex)
 			return 0;
 	}
@@ -304,11 +304,11 @@ static int allocate_msrs(void)
 
 	int i;
 	for_each_possible_cpu(i) {
-		per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
+		per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
 							GFP_KERNEL);
 		if (!per_cpu(cpu_msrs, i).counters)
 			return 0;
-		per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
+		per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
 							GFP_KERNEL);
 		if (!per_cpu(cpu_msrs, i).controls)
 			return 0;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 2aab018a7a56..f4ebc4596da8 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -147,15 +147,11 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 	for (i = 0; i < NUM_COUNTERS; i++) {
 		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
 			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
-		else
-			msrs->counters[i].addr = 0;
 	}
 
 	for (i = 0; i < NUM_CONTROLS; i++) {
 		if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
 			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
-		else
-			msrs->controls[i].addr = 0;
 	}
 }
 
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index ac6b354becdf..e6a160a4684a 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
 	setup_num_counters();
 	stag = get_stagger();
 
-	/* initialize some registers */
-	for (i = 0; i < num_counters; ++i)
-		msrs->counters[i].addr = 0;
-	for (i = 0; i < num_controls; ++i)
-		msrs->controls[i].addr = 0;
-
 	/* the counter & cccr registers we pay attention to */
 	for (i = 0; i < num_counters; ++i) {
 		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index c344525ebb55..5d1727ba409e 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 	for (i = 0; i < num_counters; i++) {
 		if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
 			msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
-		else
-			msrs->counters[i].addr = 0;
 	}
 
 	for (i = 0; i < num_counters; i++) {
 		if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
 			msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
-		else
-			msrs->controls[i].addr = 0;
 	}
 }
 
@@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 	int i;
 
 	if (!reset_value) {
-		reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
+		reset_value = kzalloc(sizeof(reset_value[0]) * num_counters,
 					GFP_ATOMIC);
 		if (!reset_value)
 			return;

From cfc9c0b450176a077205ef39092f0dc1a04e020a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 26 Feb 2010 13:45:24 +0100
Subject: [PATCH 624/640] oprofile/x86: fix msr access to reserved counters

During switching virtual counters there is access to perfctr msrs. If
the counter is not available this fails due to an invalid
address. This patch fixes this.

Cc: stable@kernel.org
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f4ebc4596da8..6a58256dce9f 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -127,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 		int virt = op_x86_phys_to_virt(i);
-		if (!counter_config[virt].enabled)
+		if (!reset_value[virt])
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
 		val &= model->reserved;
@@ -163,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* setup reset_value */
 	for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
-		if (counter_config[i].enabled)
+		if (counter_config[i].enabled
+		    && msrs->counters[op_x86_virt_to_phys(i)].addr)
 			reset_value[i] = counter_config[i].count;
 		else
 			reset_value[i] = 0;
@@ -197,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 		int virt = op_x86_phys_to_virt(i);
-		if (!counter_config[virt].enabled)
-			continue;
-		if (!msrs->counters[i].addr)
+		if (!reset_value[virt])
 			continue;
 
 		/* setup counter registers */

From 48fb4fdd6b667ebeccbc6cde0a8a5a148d5c6b68 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 26 Feb 2010 11:23:14 -0300
Subject: [PATCH 625/640] perf annotate: Handle samples not at objdump output
 addr boundaries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this patch we get this for need_resched:

[root@mica ~]# perf annotate need_resched

------------------------------------------------
 Percent |      Source code & Disassembly of vmlinux
------------------------------------------------
         :
         :
         :      Disassembly of section .text:
         :
         :      ffffffff810095ed <need_resched>:
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095ed:       55                      push   %rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
    0.00 :      ffffffff810095ee:       be 03 00 00 00          mov    $0x3,%esi
         :
         :      static inline struct thread_info *current_thread_info(void)
         :      {
         :              struct thread_info *ti;
         :              ti = (void *)(percpu_read_stable(kernel_stack) +
    0.00 :      ffffffff810095f3:       65 48 8b 3c 25 48 b5    mov    %gs:0xb548,%rdi
    0.00 :      ffffffff810095fa:       00 00
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095fc:       48 89 e5                mov    %rsp,%rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
    0.00 :      ffffffff810095ff:       48 81 ef d8 1f 00 00    sub    $0x1fd8,%rdi
    0.00 :      ffffffff81009606:       e8 9d ff ff ff          callq  ffffffff810095a8 <test_ti_thread_flag>
         :      }
    0.00 :      ffffffff8100960b:       c9                      leaveq
    0.00 :      ffffffff8100960c:       85 c0                   test   %eax,%eax
    0.00 :      ffffffff8100960e:       0f 95 c0                setne  %al
    0.00 :      ffffffff81009611:       0f b6 c0                movzbl %al,%eax
         :      Disassembly of section .vsyscall_0:
         :      Disassembly of section .vsyscall_fn:
         :      Disassembly of section .vsyscall_1:
         :      Disassembly of section .vsyscall_2:
         :      Disassembly of section .init.text:
         :      Disassembly of section .altinstr_replacement:
         :      Disassembly of section .exit.text:
[root@mica ~]#

But from the 'perf report' result we know that there are hits
for need_resched on a 4 way machine mostly doing nothing, so
after adding code to show what is in each hist offset and
collapsing IP hits for what happens between objdump lines we
get, for the same perf.data file:

[root@mica ~]# perf annotate -v need_resched

------------------------------------------------
 Percent |      Source code & Disassembly of vmlinux
------------------------------------------------
         :
         :
         :      Disassembly of section .text:
         :
         :      ffffffff810095ed <need_resched>:
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095ed:       55                      push   %rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
   52.78 :      ffffffff810095ee:       be 03 00 00 00          mov    $0x3,%esi
         :
         :      static inline struct thread_info *current_thread_info(void)
         :      {
         :              struct thread_info *ti;
         :              ti = (void *)(percpu_read_stable(kernel_stack) +
    0.00 :      ffffffff810095f3:       65 48 8b 3c 25 48 b5    mov    %gs:0xb548,%rdi
    0.00 :      ffffffff810095fa:       00 00
         :              return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
         :      }
         :
         :      static inline int need_resched(void)
         :      {
    0.00 :      ffffffff810095fc:       48 89 e5                mov    %rsp,%rbp
         :              return unlikely(test_thread_flag(TIF_NEED_RESCHED));
    9.72 :      ffffffff810095ff:       48 81 ef d8 1f 00 00    sub    $0x1fd8,%rdi
    0.00 :      ffffffff81009606:       e8 9d ff ff ff          callq  ffffffff810095a8 <test_ti_thread_flag>
         :      }
    0.00 :      ffffffff8100960b:       c9                      leaveq
    0.00 :      ffffffff8100960c:       85 c0                   test   %eax,%eax
   37.50 :      ffffffff8100960e:       0f 95 c0                setne  %al
    0.00 :      ffffffff81009611:       0f b6 c0                movzbl %al,%eax
         :      Disassembly of section .vsyscall_0:
         :      Disassembly of section .vsyscall_fn:
         :      Disassembly of section .vsyscall_1:
         :      Disassembly of section .vsyscall_2:
         :      Disassembly of section .init.text:
         :      Disassembly of section .altinstr_replacement:
         :      Disassembly of section .exit.text:
[root@mica ~]#

And now 'perf annotate -v', verbose mode, will show the hits per
precise IP, so that one can make sense of the attribution to
each objdumop line:

[root@mica ~]# perf annotate -v need_resched
Looking at the vmlinux_path (5 entries long)
Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux
for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614

------------------------------------------------
 Percent |      Source code & Disassembly of vmlinux
------------------------------------------------
                ffffffff810095f1: 152
                ffffffff81009603: 28
                ffffffff8100960f: 55
                ffffffff81009610: 53
                          h->sum: 288
<SNIP same annotation>

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Miller <davem@davemloft.net>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-annotate.c | 133 ++++++++++++++++++++++++++++------
 1 file changed, 111 insertions(+), 22 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index e47dd1587e39..5ec5de995872 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -145,21 +145,58 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	return 0;
 }
 
-static int parse_line(FILE *file, struct hist_entry *he, u64 len)
+struct objdump_line {
+	struct list_head node;
+	s64		 offset;
+	char		 *line;
+};
+
+static struct objdump_line *objdump_line__new(s64 offset, char *line)
+{
+	struct objdump_line *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->offset = offset;
+		self->line = line;
+	}
+
+	return self;
+}
+
+static void objdump_line__free(struct objdump_line *self)
+{
+	free(self->line);
+	free(self);
+}
+
+static void objdump__add_line(struct list_head *head, struct objdump_line *line)
+{
+	list_add_tail(&line->node, head);
+}
+
+static struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
+						      struct objdump_line *pos)
+{
+	list_for_each_entry_continue(pos, head, node)
+		if (pos->offset >= 0)
+			return pos;
+
+	return NULL;
+}
+
+static int parse_line(FILE *file, struct hist_entry *he,
+		      struct list_head *head)
 {
 	struct symbol *sym = he->sym;
+	struct objdump_line *objdump_line;
 	char *line = NULL, *tmp, *tmp2;
-	static const char *prev_line;
-	static const char *prev_color;
-	unsigned int offset;
 	size_t line_len;
-	u64 start;
-	s64 line_ip;
-	int ret;
+	s64 line_ip, offset = -1;
 	char *c;
 
 	if (getline(&line, &line_len, file) < 0)
 		return -1;
+
 	if (!line)
 		return -1;
 
@@ -168,8 +205,6 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
 		*c = 0;
 
 	line_ip = -1;
-	offset = 0;
-	ret = -2;
 
 	/*
 	 * Strip leading spaces:
@@ -190,9 +225,30 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
 			line_ip = -1;
 	}
 
-	start = map__rip_2objdump(he->map, sym->start);
-
 	if (line_ip != -1) {
+		u64 start = map__rip_2objdump(he->map, sym->start);
+		offset = line_ip - start;
+	}
+
+	objdump_line = objdump_line__new(offset, line);
+	if (objdump_line == NULL) {
+		free(line);
+		return -1;
+	}
+	objdump__add_line(head, objdump_line);
+
+	return 0;
+}
+
+static int objdump_line__print(struct objdump_line *self,
+			       struct list_head *head,
+			       struct hist_entry *he, u64 len)
+{
+	struct symbol *sym = he->sym;
+	static const char *prev_line;
+	static const char *prev_color;
+
+	if (self->offset != -1) {
 		const char *path = NULL;
 		unsigned int hits = 0;
 		double percent = 0.0;
@@ -200,15 +256,22 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
 		struct sym_priv *priv = symbol__priv(sym);
 		struct sym_ext *sym_ext = priv->ext;
 		struct sym_hist *h = priv->hist;
+		s64 offset = self->offset;
+		struct objdump_line *next = objdump__get_next_ip_line(head, self);
 
-		offset = line_ip - start;
-		if (offset < len)
-			hits = h->ip[offset];
+		while (offset < (s64)len &&
+		       (next == NULL || offset < next->offset)) {
+			if (sym_ext) {
+				if (path == NULL)
+					path = sym_ext[offset].path;
+				percent += sym_ext[offset].percent;
+			} else
+				hits += h->ip[offset];
 
-		if (offset < len && sym_ext) {
-			path = sym_ext[offset].path;
-			percent = sym_ext[offset].percent;
-		} else if (h->sum)
+			++offset;
+		}
+
+		if (sym_ext == NULL && h->sum)
 			percent = 100.0 * hits / h->sum;
 
 		color = get_percent_color(percent);
@@ -229,12 +292,12 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
 
 		color_fprintf(stdout, color, " %7.2f", percent);
 		printf(" :	");
-		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line);
+		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
 	} else {
-		if (!*line)
+		if (!*self->line)
 			printf("         :\n");
 		else
-			printf("         :	%s\n", line);
+			printf("         :	%s\n", self->line);
 	}
 
 	return 0;
@@ -360,6 +423,20 @@ static void print_summary(const char *filename)
 	}
 }
 
+static void hist_entry__print_hits(struct hist_entry *self)
+{
+	struct symbol *sym = self->sym;
+	struct sym_priv *priv = symbol__priv(sym);
+	struct sym_hist *h = priv->hist;
+	u64 len = sym->end - sym->start, offset;
+
+	for (offset = 0; offset < len; ++offset)
+		if (h->ip[offset] != 0)
+			printf("%*Lx: %Lu\n", BITS_PER_LONG / 2,
+			       sym->start + offset, h->ip[offset]);
+	printf("%*s: %Lu\n", BITS_PER_LONG / 2, "h->sum", h->sum);
+}
+
 static void annotate_sym(struct hist_entry *he)
 {
 	struct map *map = he->map;
@@ -369,6 +446,8 @@ static void annotate_sym(struct hist_entry *he)
 	u64 len;
 	char command[PATH_MAX*2];
 	FILE *file;
+	LIST_HEAD(head);
+	struct objdump_line *pos, *n;
 
 	if (!filename)
 		return;
@@ -410,11 +489,21 @@ static void annotate_sym(struct hist_entry *he)
 		return;
 
 	while (!feof(file)) {
-		if (parse_line(file, he, len) < 0)
+		if (parse_line(file, he, &head) < 0)
 			break;
 	}
 
 	pclose(file);
+
+	if (verbose)
+		hist_entry__print_hits(he);
+
+	list_for_each_entry_safe(pos, n, &head, node) {
+		objdump_line__print(pos, &head, he, len);
+		list_del(&pos->node);
+		objdump_line__free(pos);
+	}
+
 	if (print_line)
 		free_source_line(he, len);
 }

From f22f54f4491acd987a6c5a92de52b60ca8b58b61 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 26 Feb 2010 12:05:05 +0100
Subject: [PATCH 626/640] perf_events, x86: Split PMU definitions into separate
 files

Split amd,p6,intel into separate files so that we can easily deal with
CONFIG_CPU_SUP_* things, needed to make things build now that perf_event.c
relies on symbols from amd.c

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c       | 1524 +-----------------------
 arch/x86/kernel/cpu/perf_event_amd.c   |  416 +++++++
 arch/x86/kernel/cpu/perf_event_intel.c |  971 +++++++++++++++
 arch/x86/kernel/cpu/perf_event_p6.c    |  157 +++
 4 files changed, 1554 insertions(+), 1514 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_amd.c
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel.c
 create mode 100644 arch/x86/kernel/cpu/perf_event_p6.c

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index dd09ccc867d3..641ccb9dddbc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -161,8 +161,6 @@ struct x86_pmu {
 
 static struct x86_pmu x86_pmu __read_mostly;
 
-static raw_spinlock_t amd_nb_lock;
-
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
@@ -170,140 +168,6 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 static int x86_perf_event_set_period(struct perf_event *event,
 			     struct hw_perf_event *hwc, int idx);
 
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
-	return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Event setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_EVENT			0x0000002EULL
-
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_REG_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
-static struct event_constraint intel_p6_event_constraints[] =
-{
-	INTEL_EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
-	INTEL_EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
-	INTEL_EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
-	INTEL_EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
-	INTEL_EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
-	INTEL_EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
-	EVENT_CONSTRAINT_END
-};
-
-/*
- * Intel PerfMon v3. Used on Core2 and later.
- */
-static const u64 intel_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
-};
-
-static struct event_constraint intel_core_event_constraints[] =
-{
-	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
-	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
-	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
-	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
-	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
-	INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
-	EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_core2_event_constraints[] =
-{
-	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
-	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
-	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
-	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
-	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
-	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
-	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
-	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
-	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
-	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
-	EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_nehalem_event_constraints[] =
-{
-	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
-	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
-	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
-	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
-	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
-	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
-	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
-	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
-	EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_westmere_event_constraints[] =
-{
-	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
-	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
-	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
-	EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_gen_event_constraints[] =
-{
-	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
-	EVENT_CONSTRAINT_END
-};
-
-static u64 intel_pmu_event_map(int hw_event)
-{
-	return intel_perfmon_event_map[hw_event];
-}
-
 /*
  * Generalized hw caching related hw_event table, filled
  * in on a per model basis. A value of 0 means
@@ -319,515 +183,6 @@ static u64 __read_mostly hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
-static __initconst u64 westmere_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static __initconst u64 nehalem_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static __initconst u64 core2_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static __initconst u64 atom_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static u64 intel_pmu_raw_event(u64 hw_event)
-{
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(INTEL_ARCH_EVTSEL_MASK |	\
-	 INTEL_ARCH_UNIT_MASK   |	\
-	 INTEL_ARCH_EDGE_MASK   |	\
-	 INTEL_ARCH_INV_MASK    |	\
-	 INTEL_ARCH_CNT_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
-}
-
-static __initconst u64 amd_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
-		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
-	return amd_perfmon_event_map[hw_event];
-}
-
-static u64 amd_pmu_raw_event(u64 hw_event)
-{
-#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_REG_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
-}
-
 /*
  * Propagate event elapsed time into the generic event.
  * Can only be executed on the CPU where the event is active.
@@ -1079,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
 	return 0;
 }
 
-static void intel_pmu_enable_bts(u64 config)
-{
-	unsigned long debugctlmsr;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr |= X86_DEBUGCTL_TR;
-	debugctlmsr |= X86_DEBUGCTL_BTS;
-	debugctlmsr |= X86_DEBUGCTL_BTINT;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-static void intel_pmu_disable_bts(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	unsigned long debugctlmsr;
-
-	if (!cpuc->ds)
-		return;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr &=
-		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
-		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
-
-	update_debugctlmsr(debugctlmsr);
-}
-
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -1223,26 +542,6 @@ static int __hw_perf_event_init(struct perf_event *event)
 	return 0;
 }
 
-static void p6_pmu_disable_all(void)
-{
-	u64 val;
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_disable_all(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-		intel_pmu_disable_bts();
-}
-
 static void x86_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1278,33 +577,6 @@ void hw_perf_disable(void)
 	x86_pmu.disable_all();
 }
 
-static void p6_pmu_enable_all(void)
-{
-	unsigned long val;
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_enable_all(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-		struct perf_event *event =
-			cpuc->events[X86_PMC_IDX_FIXED_BTS];
-
-		if (WARN_ON_ONCE(!event))
-			return;
-
-		intel_pmu_enable_bts(event->hw.config);
-	}
-}
-
 static void x86_pmu_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1578,20 +850,6 @@ void hw_perf_enable(void)
 	x86_pmu.enable_all();
 }
 
-static inline u64 intel_pmu_get_status(void)
-{
-	u64 status;
-
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
-	return status;
-}
-
-static inline void intel_pmu_ack_status(u64 ack)
-{
-	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
-
 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	(void)checking_wrmsrl(hwc->config_base + idx,
@@ -1603,47 +861,6 @@ static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
 	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
 }
 
-static inline void
-intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, mask;
-
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static inline void
-p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	u64 val = P6_NOP_EVENT;
-
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-static inline void
-intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		intel_pmu_disable_bts();
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_disable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_disable_event(hwc, idx);
-}
-
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 
 /*
@@ -1702,70 +919,6 @@ x86_perf_event_set_period(struct perf_event *event,
 	return ret;
 }
 
-static inline void
-intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, bits, mask;
-	int err;
-
-	/*
-	 * Enable IRQ generation (0x8),
-	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
-	 * if requested:
-	 */
-	bits = 0x8ULL;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
-		bits |= 0x2;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-		bits |= 0x1;
-
-	/*
-	 * ANY bit is supported in v3 and up
-	 */
-	if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
-		bits |= 0x4;
-
-	bits <<= (idx * 4);
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	ctrl_val |= bits;
-	err = checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	u64 val;
-
-	val = hwc->config;
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-
-static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		if (!__get_cpu_var(cpu_hw_events).enabled)
-			return;
-
-		intel_pmu_enable_bts(hwc->config);
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_enable_fixed(hwc, idx);
-		return;
-	}
-
-	__x86_pmu_enable_event(hwc, idx);
-}
-
 static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1887,66 +1040,6 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
-{
-	struct debug_store *ds = cpuc->ds;
-	struct bts_record {
-		u64	from;
-		u64	to;
-		u64	flags;
-	};
-	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
-	struct bts_record *at, *top;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_sample_data data;
-	struct pt_regs regs;
-
-	if (!event)
-		return;
-
-	if (!ds)
-		return;
-
-	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-	top = (struct bts_record *)(unsigned long)ds->bts_index;
-
-	if (top <= at)
-		return;
-
-	ds->bts_index = ds->bts_buffer_base;
-
-
-	data.period	= event->hw.last_period;
-	data.addr	= 0;
-	data.raw	= NULL;
-	regs.ip		= 0;
-
-	/*
-	 * Prepare a generic sample, i.e. fill in the invariant fields.
-	 * We will overwrite the from and to address before we output
-	 * the sample.
-	 */
-	perf_prepare_sample(&header, &data, event, &regs);
-
-	if (perf_output_begin(&handle, event,
-			      header.size * (top - at), 1, 1))
-		return;
-
-	for (; at < top; at++) {
-		data.ip		= at->from;
-		data.addr	= at->to;
-
-		perf_output_sample(&handle, &header, &data, event);
-	}
-
-	perf_output_end(&handle);
-
-	/* There's new data available. */
-	event->hw.interrupts++;
-	event->pending_kill = POLL_IN;
-}
-
 static void x86_pmu_stop(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1966,10 +1059,6 @@ static void x86_pmu_stop(struct perf_event *event)
 	 */
 	x86_perf_event_update(event, hwc, idx);
 
-	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
-		intel_pmu_drain_bts_buffer(cpuc);
-
 	cpuc->events[idx] = NULL;
 }
 
@@ -1996,114 +1085,6 @@ static void x86_pmu_disable(struct perf_event *event)
 	perf_event_update_userpage(event);
 }
 
-/*
- * Save and restart an expired event. Called by NMI contexts,
- * so it has to be careful about preempting normal event ops:
- */
-static int intel_pmu_save_and_restart(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-	int ret;
-
-	x86_perf_event_update(event, hwc, idx);
-	ret = x86_perf_event_set_period(event, hwc, idx);
-
-	return ret;
-}
-
-static void intel_pmu_reset(void)
-{
-	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
-	unsigned long flags;
-	int idx;
-
-	if (!x86_pmu.num_events)
-		return;
-
-	local_irq_save(flags);
-
-	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
-
-	for (idx = 0; idx < x86_pmu.num_events; idx++) {
-		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
-		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
-	}
-	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
-		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-	}
-	if (ds)
-		ds->bts_index = ds->bts_buffer_base;
-
-	local_irq_restore(flags);
-}
-
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
-	int bit, loops;
-	u64 ack, status;
-
-	data.addr = 0;
-	data.raw = NULL;
-
-	cpuc = &__get_cpu_var(cpu_hw_events);
-
-	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc);
-	status = intel_pmu_get_status();
-	if (!status) {
-		perf_enable();
-		return 0;
-	}
-
-	loops = 0;
-again:
-	if (++loops > 100) {
-		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
-		perf_event_print_debug();
-		intel_pmu_reset();
-		perf_enable();
-		return 1;
-	}
-
-	inc_irq_stat(apic_perf_irqs);
-	ack = status;
-	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-		struct perf_event *event = cpuc->events[bit];
-
-		clear_bit(bit, (unsigned long *) &status);
-		if (!test_bit(bit, cpuc->active_mask))
-			continue;
-
-		if (!intel_pmu_save_and_restart(event))
-			continue;
-
-		data.period = event->hw.last_period;
-
-		if (perf_event_overflow(event, 1, &data, regs))
-			intel_pmu_disable_event(&event->hw, bit);
-	}
-
-	intel_pmu_ack_status(ack);
-
-	/*
-	 * Repeat if there is more work to be done:
-	 */
-	status = intel_pmu_get_status();
-	if (status)
-		goto again;
-
-	perf_enable();
-
-	return 1;
-}
-
 static int x86_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
@@ -2216,37 +1197,20 @@ perf_event_nmi_handler(struct notifier_block *self,
 	return NOTIFY_STOP;
 }
 
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+	.next			= NULL,
+	.priority		= 1
+};
+
 static struct event_constraint unconstrained;
 static struct event_constraint emptyconstraint;
 
-static struct event_constraint bts_constraint =
-	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
-
 static struct event_constraint *
-intel_special_constraints(struct perf_event *event)
-{
-	unsigned int hw_event;
-
-	hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
-
-	if (unlikely((hw_event ==
-		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
-		     (event->hw.sample_period == 1))) {
-
-		return &bts_constraint;
-	}
-	return NULL;
-}
-
-static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
 	struct event_constraint *c;
 
-	c = intel_special_constraints(event);
-	if (c)
-		return c;
-
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
 			if ((event->hw.config & c->cmask) == c->code)
@@ -2257,148 +1221,6 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	return &unconstrained;
 }
 
-/*
- * AMD64 events are detected based on their event codes.
- */
-static inline int amd_is_nb_event(struct hw_perf_event *hwc)
-{
-	return (hwc->config & 0xe0) == 0xe0;
-}
-
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
-				      struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	struct amd_nb *nb = cpuc->amd_nb;
-	int i;
-
-	/*
-	 * only care about NB events
-	 */
-	if (!(nb && amd_is_nb_event(hwc)))
-		return;
-
-	/*
-	 * need to scan whole list because event may not have
-	 * been assigned during scheduling
-	 *
-	 * no race condition possible because event can only
-	 * be removed on one CPU at a time AND PMU is disabled
-	 * when we come here
-	 */
-	for (i = 0; i < x86_pmu.num_events; i++) {
-		if (nb->owners[i] == event) {
-			cmpxchg(nb->owners+i, event, NULL);
-			break;
-		}
-	}
-}
-
- /*
-  * AMD64 NorthBridge events need special treatment because
-  * counter access needs to be synchronized across all cores
-  * of a package. Refer to BKDG section 3.12
-  *
-  * NB events are events measuring L3 cache, Hypertransport
-  * traffic. They are identified by an event code >= 0xe00.
-  * They measure events on the NorthBride which is shared
-  * by all cores on a package. NB events are counted on a
-  * shared set of counters. When a NB event is programmed
-  * in a counter, the data actually comes from a shared
-  * counter. Thus, access to those counters needs to be
-  * synchronized.
-  *
-  * We implement the synchronization such that no two cores
-  * can be measuring NB events using the same counters. Thus,
-  * we maintain a per-NB allocation table. The available slot
-  * is propagated using the event_constraint structure.
-  *
-  * We provide only one choice for each NB event based on
-  * the fact that only NB events have restrictions. Consequently,
-  * if a counter is available, there is a guarantee the NB event
-  * will be assigned to it. If no slot is available, an empty
-  * constraint is returned and scheduling will eventually fail
-  * for this event.
-  *
-  * Note that all cores attached the same NB compete for the same
-  * counters to host NB events, this is why we use atomic ops. Some
-  * multi-chip CPUs may have more than one NB.
-  *
-  * Given that resources are allocated (cmpxchg), they must be
-  * eventually freed for others to use. This is accomplished by
-  * calling amd_put_event_constraints().
-  *
-  * Non NB events are not impacted by this restriction.
-  */
-static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-	struct amd_nb *nb = cpuc->amd_nb;
-	struct perf_event *old = NULL;
-	int max = x86_pmu.num_events;
-	int i, j, k = -1;
-
-	/*
-	 * if not NB event or no NB, then no constraints
-	 */
-	if (!(nb && amd_is_nb_event(hwc)))
-		return &unconstrained;
-
-	/*
-	 * detect if already present, if so reuse
-	 *
-	 * cannot merge with actual allocation
-	 * because of possible holes
-	 *
-	 * event can already be present yet not assigned (in hwc->idx)
-	 * because of successive calls to x86_schedule_events() from
-	 * hw_perf_group_sched_in() without hw_perf_enable()
-	 */
-	for (i = 0; i < max; i++) {
-		/*
-		 * keep track of first free slot
-		 */
-		if (k == -1 && !nb->owners[i])
-			k = i;
-
-		/* already present, reuse */
-		if (nb->owners[i] == event)
-			goto done;
-	}
-	/*
-	 * not present, so grab a new slot
-	 * starting either at:
-	 */
-	if (hwc->idx != -1) {
-		/* previous assignment */
-		i = hwc->idx;
-	} else if (k != -1) {
-		/* start from free slot found */
-		i = k;
-	} else {
-		/*
-		 * event not found, no slot found in
-		 * first pass, try again from the
-		 * beginning
-		 */
-		i = 0;
-	}
-	j = i;
-	do {
-		old = cmpxchg(nb->owners+i, NULL, event);
-		if (!old)
-			break;
-		if (++i == max)
-			i = 0;
-	} while (i != j);
-done:
-	if (!old)
-		return &nb->event_constraints[i];
-
-	return &emptyconstraint;
-}
-
 static int x86_event_sched_in(struct perf_event *event,
 			  struct perf_cpu_context *cpuctx)
 {
@@ -2509,335 +1331,9 @@ undo:
 	return ret;
 }
 
-static __read_mostly struct notifier_block perf_event_nmi_notifier = {
-	.notifier_call		= perf_event_nmi_handler,
-	.next			= NULL,
-	.priority		= 1
-};
-
-static __initconst struct x86_pmu p6_pmu = {
-	.name			= "p6",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= p6_pmu_disable_all,
-	.enable_all		= p6_pmu_enable_all,
-	.enable			= p6_pmu_enable_event,
-	.disable		= p6_pmu_disable_event,
-	.eventsel		= MSR_P6_EVNTSEL0,
-	.perfctr		= MSR_P6_PERFCTR0,
-	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
-	.apic			= 1,
-	.max_period		= (1ULL << 31) - 1,
-	.version		= 0,
-	.num_events		= 2,
-	/*
-	 * Events have 40 bits implemented. However they are designed such
-	 * that bits [32-39] are sign extensions of bit 31. As such the
-	 * effective width of a event for P6-like PMU is 32 bits only.
-	 *
-	 * See IA-32 Intel Architecture Software developer manual Vol 3B
-	 */
-	.event_bits		= 32,
-	.event_mask		= (1ULL << 32) - 1,
-	.get_event_constraints	= intel_get_event_constraints,
-	.event_constraints	= intel_p6_event_constraints
-};
-
-static __initconst struct x86_pmu core_pmu = {
-	.name			= "core",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= x86_pmu_enable_all,
-	.enable			= x86_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic event period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.get_event_constraints	= intel_get_event_constraints,
-	.event_constraints	= intel_core_event_constraints,
-};
-
-static __initconst struct x86_pmu intel_pmu = {
-	.name			= "Intel",
-	.handle_irq		= intel_pmu_handle_irq,
-	.disable_all		= intel_pmu_disable_all,
-	.enable_all		= intel_pmu_enable_all,
-	.enable			= intel_pmu_enable_event,
-	.disable		= intel_pmu_disable_event,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic event period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.enable_bts		= intel_pmu_enable_bts,
-	.disable_bts		= intel_pmu_disable_bts,
-	.get_event_constraints	= intel_get_event_constraints
-};
-
-static __initconst struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= x86_pmu_enable_all,
-	.enable			= x86_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_events		= 4,
-	.event_bits		= 48,
-	.event_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints,
-	.put_event_constraints	= amd_put_event_constraints
-};
-
-static __init int p6_pmu_init(void)
-{
-	switch (boot_cpu_data.x86_model) {
-	case 1:
-	case 3:  /* Pentium Pro */
-	case 5:
-	case 6:  /* Pentium II */
-	case 7:
-	case 8:
-	case 11: /* Pentium III */
-	case 9:
-	case 13:
-		/* Pentium M */
-		break;
-	default:
-		pr_cont("unsupported p6 CPU model %d ",
-			boot_cpu_data.x86_model);
-		return -ENODEV;
-	}
-
-	x86_pmu = p6_pmu;
-
-	return 0;
-}
-
-static __init int intel_pmu_init(void)
-{
-	union cpuid10_edx edx;
-	union cpuid10_eax eax;
-	unsigned int unused;
-	unsigned int ebx;
-	int version;
-
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-		/* check for P6 processor family */
-	   if (boot_cpu_data.x86 == 6) {
-		return p6_pmu_init();
-	   } else {
-		return -ENODEV;
-	   }
-	}
-
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Branch Misses Retired hw_event or not.
-	 */
-	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
-	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
-		return -ENODEV;
-
-	version = eax.split.version_id;
-	if (version < 2)
-		x86_pmu = core_pmu;
-	else
-		x86_pmu = intel_pmu;
-
-	x86_pmu.version			= version;
-	x86_pmu.num_events		= eax.split.num_events;
-	x86_pmu.event_bits		= eax.split.bit_width;
-	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
-
-	/*
-	 * Quirk: v2 perfmon does not report fixed-purpose events, so
-	 * assume at least 3 events:
-	 */
-	if (version > 1)
-		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
-
-	/*
-	 * Install the hw-cache-events table:
-	 */
-	switch (boot_cpu_data.x86_model) {
-	case 14: /* 65 nm core solo/duo, "Yonah" */
-		pr_cont("Core events, ");
-		break;
-
-	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
-	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
-	case 29: /* six-core 45 nm xeon "Dunnington" */
-		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		x86_pmu.event_constraints = intel_core2_event_constraints;
-		pr_cont("Core2 events, ");
-		break;
-
-	case 26: /* 45 nm nehalem, "Bloomfield" */
-	case 30: /* 45 nm nehalem, "Lynnfield" */
-		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		x86_pmu.event_constraints = intel_nehalem_event_constraints;
-		pr_cont("Nehalem/Corei7 events, ");
-		break;
-	case 28:
-		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		x86_pmu.event_constraints = intel_gen_event_constraints;
-		pr_cont("Atom events, ");
-		break;
-
-	case 37: /* 32 nm nehalem, "Clarkdale" */
-	case 44: /* 32 nm nehalem, "Gulftown" */
-		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		x86_pmu.event_constraints = intel_westmere_event_constraints;
-		pr_cont("Westmere events, ");
-		break;
-	default:
-		/*
-		 * default constraints for v2 and up
-		 */
-		x86_pmu.event_constraints = intel_gen_event_constraints;
-		pr_cont("generic architected perfmon, ");
-	}
-	return 0;
-}
-
-static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
-{
-	struct amd_nb *nb;
-	int i;
-
-	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
-	if (!nb)
-		return NULL;
-
-	memset(nb, 0, sizeof(*nb));
-	nb->nb_id = nb_id;
-
-	/*
-	 * initialize all possible NB constraints
-	 */
-	for (i = 0; i < x86_pmu.num_events; i++) {
-		set_bit(i, nb->event_constraints[i].idxmsk);
-		nb->event_constraints[i].weight = 1;
-	}
-	return nb;
-}
-
-static void amd_pmu_cpu_online(int cpu)
-{
-	struct cpu_hw_events *cpu1, *cpu2;
-	struct amd_nb *nb = NULL;
-	int i, nb_id;
-
-	if (boot_cpu_data.x86_max_cores < 2)
-		return;
-
-	/*
-	 * function may be called too early in the
-	 * boot process, in which case nb_id is bogus
-	 */
-	nb_id = amd_get_nb_id(cpu);
-	if (nb_id == BAD_APICID)
-		return;
-
-	cpu1 = &per_cpu(cpu_hw_events, cpu);
-	cpu1->amd_nb = NULL;
-
-	raw_spin_lock(&amd_nb_lock);
-
-	for_each_online_cpu(i) {
-		cpu2 = &per_cpu(cpu_hw_events, i);
-		nb = cpu2->amd_nb;
-		if (!nb)
-			continue;
-		if (nb->nb_id == nb_id)
-			goto found;
-	}
-
-	nb = amd_alloc_nb(cpu, nb_id);
-	if (!nb) {
-		pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
-		raw_spin_unlock(&amd_nb_lock);
-		return;
-	}
-found:
-	nb->refcnt++;
-	cpu1->amd_nb = nb;
-
-	raw_spin_unlock(&amd_nb_lock);
-}
-
-static void amd_pmu_cpu_offline(int cpu)
-{
-	struct cpu_hw_events *cpuhw;
-
-	if (boot_cpu_data.x86_max_cores < 2)
-		return;
-
-	cpuhw = &per_cpu(cpu_hw_events, cpu);
-
-	raw_spin_lock(&amd_nb_lock);
-
-	if (--cpuhw->amd_nb->refcnt == 0)
-		kfree(cpuhw->amd_nb);
-
-	cpuhw->amd_nb = NULL;
-
-	raw_spin_unlock(&amd_nb_lock);
-}
-
-static __init int amd_pmu_init(void)
-{
-	/* Performance-monitoring supported from K7 and later: */
-	if (boot_cpu_data.x86 < 6)
-		return -ENODEV;
-
-	x86_pmu = amd_pmu;
-
-	/* Events are common for all AMDs */
-	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-	       sizeof(hw_cache_event_ids));
-
-	/*
-	 * explicitly initialize the boot cpu, other cpus will get
-	 * the cpu hotplug callbacks from smp_init()
-	 */
-	amd_pmu_cpu_online(smp_processor_id());
-	return 0;
-}
+#include "perf_event_amd.c"
+#include "perf_event_p6.c"
+#include "perf_event_intel.c"
 
 static void __init pmu_check_apic(void)
 {
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..6d28e08563e8
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
+#ifdef CONFIG_CPU_SUP_AMD
+
+static raw_spinlock_t amd_nb_lock;
+
+static __initconst u64 amd_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+	return amd_perfmon_event_map[hw_event];
+}
+
+static u64 amd_pmu_raw_event(u64 hw_event)
+{
+#define K7_EVNTSEL_EVENT_MASK	0xF000000FFULL
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
+#define K7_EVNTSEL_INV_MASK	0x000800000ULL
+#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
+
+#define K7_EVNTSEL_MASK			\
+	(K7_EVNTSEL_EVENT_MASK |	\
+	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_EDGE_MASK  |	\
+	 K7_EVNTSEL_INV_MASK   |	\
+	 K7_EVNTSEL_REG_MASK)
+
+	return hw_event & K7_EVNTSEL_MASK;
+}
+
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+	return (hwc->config & 0xe0) == 0xe0;
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	int i;
+
+	/*
+	 * only care about NB events
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return;
+
+	/*
+	 * need to scan whole list because event may not have
+	 * been assigned during scheduling
+	 *
+	 * no race condition possible because event can only
+	 * be removed on one CPU at a time AND PMU is disabled
+	 * when we come here
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (nb->owners[i] == event) {
+			cmpxchg(nb->owners+i, event, NULL);
+			break;
+		}
+	}
+}
+
+ /*
+  * AMD64 NorthBridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  *
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will eventually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling amd_put_event_constraints().
+  *
+  * Non NB events are not impacted by this restriction.
+  */
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amd_nb *nb = cpuc->amd_nb;
+	struct perf_event *old = NULL;
+	int max = x86_pmu.num_events;
+	int i, j, k = -1;
+
+	/*
+	 * if not NB event or no NB, then no constraints
+	 */
+	if (!(nb && amd_is_nb_event(hwc)))
+		return &unconstrained;
+
+	/*
+	 * detect if already present, if so reuse
+	 *
+	 * cannot merge with actual allocation
+	 * because of possible holes
+	 *
+	 * event can already be present yet not assigned (in hwc->idx)
+	 * because of successive calls to x86_schedule_events() from
+	 * hw_perf_group_sched_in() without hw_perf_enable()
+	 */
+	for (i = 0; i < max; i++) {
+		/*
+		 * keep track of first free slot
+		 */
+		if (k == -1 && !nb->owners[i])
+			k = i;
+
+		/* already present, reuse */
+		if (nb->owners[i] == event)
+			goto done;
+	}
+	/*
+	 * not present, so grab a new slot
+	 * starting either at:
+	 */
+	if (hwc->idx != -1) {
+		/* previous assignment */
+		i = hwc->idx;
+	} else if (k != -1) {
+		/* start from free slot found */
+		i = k;
+	} else {
+		/*
+		 * event not found, no slot found in
+		 * first pass, try again from the
+		 * beginning
+		 */
+		i = 0;
+	}
+	j = i;
+	do {
+		old = cmpxchg(nb->owners+i, NULL, event);
+		if (!old)
+			break;
+		if (++i == max)
+			i = 0;
+	} while (i != j);
+done:
+	if (!old)
+		return &nb->event_constraints[i];
+
+	return &emptyconstraint;
+}
+
+static __initconst struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
+	.apic			= 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+	.get_event_constraints	= amd_get_event_constraints,
+	.put_event_constraints	= amd_put_event_constraints
+};
+
+static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+{
+	struct amd_nb *nb;
+	int i;
+
+	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+	if (!nb)
+		return NULL;
+
+	memset(nb, 0, sizeof(*nb));
+	nb->nb_id = nb_id;
+
+	/*
+	 * initialize all possible NB constraints
+	 */
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		set_bit(i, nb->event_constraints[i].idxmsk);
+		nb->event_constraints[i].weight = 1;
+	}
+	return nb;
+}
+
+static void amd_pmu_cpu_online(int cpu)
+{
+	struct cpu_hw_events *cpu1, *cpu2;
+	struct amd_nb *nb = NULL;
+	int i, nb_id;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	/*
+	 * function may be called too early in the
+	 * boot process, in which case nb_id is bogus
+	 */
+	nb_id = amd_get_nb_id(cpu);
+	if (nb_id == BAD_APICID)
+		return;
+
+	cpu1 = &per_cpu(cpu_hw_events, cpu);
+	cpu1->amd_nb = NULL;
+
+	raw_spin_lock(&amd_nb_lock);
+
+	for_each_online_cpu(i) {
+		cpu2 = &per_cpu(cpu_hw_events, i);
+		nb = cpu2->amd_nb;
+		if (!nb)
+			continue;
+		if (nb->nb_id == nb_id)
+			goto found;
+	}
+
+	nb = amd_alloc_nb(cpu, nb_id);
+	if (!nb) {
+		pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
+		raw_spin_unlock(&amd_nb_lock);
+		return;
+	}
+found:
+	nb->refcnt++;
+	cpu1->amd_nb = nb;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
+static void amd_pmu_cpu_offline(int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+
+	if (boot_cpu_data.x86_max_cores < 2)
+		return;
+
+	cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	raw_spin_lock(&amd_nb_lock);
+
+	if (--cpuhw->amd_nb->refcnt == 0)
+		kfree(cpuhw->amd_nb);
+
+	cpuhw->amd_nb = NULL;
+
+	raw_spin_unlock(&amd_nb_lock);
+}
+
+static __init int amd_pmu_init(void)
+{
+	/* Performance-monitoring supported from K7 and later: */
+	if (boot_cpu_data.x86 < 6)
+		return -ENODEV;
+
+	x86_pmu = amd_pmu;
+
+	/* Events are common for all AMDs */
+	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+	       sizeof(hw_cache_event_ids));
+
+	/*
+	 * explicitly initialize the boot cpu, other cpus will get
+	 * the cpu hotplug callbacks from smp_init()
+	 */
+	amd_pmu_cpu_online(smp_processor_id());
+	return 0;
+}
+
+#else /* CONFIG_CPU_SUP_AMD */
+
+static int amd_pmu_init(void)
+{
+	return 0;
+}
+
+static void amd_pmu_cpu_online(int cpu)
+{
+}
+
+static void amd_pmu_cpu_offline(int cpu)
+{
+}
+
+#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 000000000000..cf6590cf4a5f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,971 @@
+#ifdef CONFIG_CPU_SUP_INTEL
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static const u64 intel_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+};
+
+static struct event_constraint intel_core_event_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+	INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_core2_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_nehalem_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+	return intel_perfmon_event_map[hw_event];
+}
+
+static __initconst u64 westmere_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static __initconst u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static __initconst u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static __initconst u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static u64 intel_pmu_raw_event(u64 hw_event)
+{
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
+#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK		\
+	(INTEL_ARCH_EVTSEL_MASK |	\
+	 INTEL_ARCH_UNIT_MASK   |	\
+	 INTEL_ARCH_EDGE_MASK   |	\
+	 INTEL_ARCH_INV_MASK    |	\
+	 INTEL_ARCH_CNT_MASK)
+
+	return hw_event & CORE_EVNTSEL_MASK;
+}
+
+static void intel_pmu_enable_bts(u64 config)
+{
+	unsigned long debugctlmsr;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr |= X86_DEBUGCTL_TR;
+	debugctlmsr |= X86_DEBUGCTL_BTS;
+	debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long debugctlmsr;
+
+	if (!cpuc->ds)
+		return;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr &=
+		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		intel_pmu_disable_bts();
+}
+
+static void intel_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+		struct perf_event *event =
+			cpuc->events[X86_PMC_IDX_FIXED_BTS];
+
+		if (WARN_ON_ONCE(!event))
+			return;
+
+		intel_pmu_enable_bts(event->hw.config);
+	}
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void
+intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_drain_bts_buffer(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct bts_record {
+		u64	from;
+		u64	to;
+		u64	flags;
+	};
+	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
+
+	if (!event)
+		return;
+
+	if (!ds)
+		return;
+
+	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+	if (top <= at)
+		return;
+
+	ds->bts_index = ds->bts_buffer_base;
+
+
+	data.period	= event->hw.last_period;
+	data.addr	= 0;
+	data.raw	= NULL;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, event, &regs);
+
+	if (perf_output_begin(&handle, event,
+			      header.size * (top - at), 1, 1))
+		return;
+
+	for (; at < top; at++) {
+		data.ip		= at->from;
+		data.addr	= at->to;
+
+		perf_output_sample(&handle, &header, &data, event);
+	}
+
+	perf_output_end(&handle);
+
+	/* There's new data available. */
+	event->hw.interrupts++;
+	event->pending_kill = POLL_IN;
+}
+
+static inline void
+intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		intel_pmu_disable_bts();
+		intel_pmu_drain_bts_buffer();
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_disable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_disable_event(hwc, idx);
+}
+
+static inline void
+intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+	int err;
+
+	/*
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
+	 */
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+
+	/*
+	 * ANY bit is supported in v3 and up
+	 */
+	if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+		bits |= 0x4;
+
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		if (!__get_cpu_var(cpu_hw_events).enabled)
+			return;
+
+		intel_pmu_enable_bts(hwc->config);
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_enable_fixed(hwc, idx);
+		return;
+	}
+
+	__x86_pmu_enable_event(hwc, idx);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+static int intel_pmu_save_and_restart(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	int ret;
+
+	x86_perf_event_update(event, hwc, idx);
+	ret = x86_perf_event_set_period(event, hwc, idx);
+
+	return ret;
+}
+
+static void intel_pmu_reset(void)
+{
+	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
+	unsigned long flags;
+	int idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
+		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+	}
+	if (ds)
+		ds->bts_index = ds->bts_buffer_base;
+
+	local_irq_restore(flags);
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	int bit, loops;
+	u64 ack, status;
+
+	data.addr = 0;
+	data.raw = NULL;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	perf_disable();
+	intel_pmu_drain_bts_buffer();
+	status = intel_pmu_get_status();
+	if (!status) {
+		perf_enable();
+		return 0;
+	}
+
+	loops = 0;
+again:
+	if (++loops > 100) {
+		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+		perf_event_print_debug();
+		intel_pmu_reset();
+		perf_enable();
+		return 1;
+	}
+
+	inc_irq_stat(apic_perf_irqs);
+	ack = status;
+	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+		struct perf_event *event = cpuc->events[bit];
+
+		clear_bit(bit, (unsigned long *) &status);
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+
+		if (!intel_pmu_save_and_restart(event))
+			continue;
+
+		data.period = event->hw.last_period;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			intel_pmu_disable_event(&event->hw, bit);
+	}
+
+	intel_pmu_ack_status(ack);
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	status = intel_pmu_get_status();
+	if (status)
+		goto again;
+
+	perf_enable();
+
+	return 1;
+}
+
+static struct event_constraint bts_constraint =
+	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+
+static struct event_constraint *
+intel_special_constraints(struct perf_event *event)
+{
+	unsigned int hw_event;
+
+	hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+
+	if (unlikely((hw_event ==
+		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+		     (event->hw.sample_period == 1))) {
+
+		return &bts_constraint;
+	}
+	return NULL;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	c = intel_special_constraints(event);
+	if (c)
+		return c;
+
+	return x86_get_event_constraints(cpuc, event);
+}
+
+static __initconst struct x86_pmu core_pmu = {
+	.name			= "core",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.get_event_constraints	= intel_get_event_constraints,
+	.event_constraints	= intel_core_event_constraints,
+};
+
+static __initconst struct x86_pmu intel_pmu = {
+	.name			= "Intel",
+	.handle_irq		= intel_pmu_handle_irq,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
+	.enable			= intel_pmu_enable_event,
+	.disable		= intel_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.enable_bts		= intel_pmu_enable_bts,
+	.disable_bts		= intel_pmu_disable_bts,
+	.get_event_constraints	= intel_get_event_constraints
+};
+
+static __init int intel_pmu_init(void)
+{
+	union cpuid10_edx edx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int ebx;
+	int version;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+		/* check for P6 processor family */
+	   if (boot_cpu_data.x86 == 6) {
+		return p6_pmu_init();
+	   } else {
+		return -ENODEV;
+	   }
+	}
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Branch Misses Retired hw_event or not.
+	 */
+	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+		return -ENODEV;
+
+	version = eax.split.version_id;
+	if (version < 2)
+		x86_pmu = core_pmu;
+	else
+		x86_pmu = intel_pmu;
+
+	x86_pmu.version			= version;
+	x86_pmu.num_events		= eax.split.num_events;
+	x86_pmu.event_bits		= eax.split.bit_width;
+	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+
+	/*
+	 * Quirk: v2 perfmon does not report fixed-purpose events, so
+	 * assume at least 3 events:
+	 */
+	if (version > 1)
+		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
+
+	/*
+	 * Install the hw-cache-events table:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 14: /* 65 nm core solo/duo, "Yonah" */
+		pr_cont("Core events, ");
+		break;
+
+	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+	case 29: /* six-core 45 nm xeon "Dunnington" */
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_core2_event_constraints;
+		pr_cont("Core2 events, ");
+		break;
+
+	case 26: /* 45 nm nehalem, "Bloomfield" */
+	case 30: /* 45 nm nehalem, "Lynnfield" */
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_nehalem_event_constraints;
+		pr_cont("Nehalem/Corei7 events, ");
+		break;
+	case 28:
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_gen_event_constraints;
+		pr_cont("Atom events, ");
+		break;
+
+	case 37: /* 32 nm nehalem, "Clarkdale" */
+	case 44: /* 32 nm nehalem, "Gulftown" */
+		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_westmere_event_constraints;
+		pr_cont("Westmere events, ");
+		break;
+	default:
+		/*
+		 * default constraints for v2 and up
+		 */
+		x86_pmu.event_constraints = intel_gen_event_constraints;
+		pr_cont("generic architected perfmon, ");
+	}
+	return 0;
+}
+
+#else /* CONFIG_CPU_SUP_INTEL */
+
+static int intel_pmu_init(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 000000000000..1ca5ba078afd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,157 @@
+#ifdef CONFIG_CPU_SUP_INTEL
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+	return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT			0x0000002EULL
+
+static u64 p6_pmu_raw_event(u64 hw_event)
+{
+#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define P6_EVNTSEL_INV_MASK		0x00800000ULL
+#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
+
+#define P6_EVNTSEL_MASK			\
+	(P6_EVNTSEL_EVENT_MASK |	\
+	 P6_EVNTSEL_UNIT_MASK  |	\
+	 P6_EVNTSEL_EDGE_MASK  |	\
+	 P6_EVNTSEL_INV_MASK   |	\
+	 P6_EVNTSEL_REG_MASK)
+
+	return hw_event & P6_EVNTSEL_MASK;
+}
+
+static struct event_constraint p6_event_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0xc1, 0x1),	/* FLOPS */
+	INTEL_EVENT_CONSTRAINT(0x10, 0x1),	/* FP_COMP_OPS_EXE */
+	INTEL_EVENT_CONSTRAINT(0x11, 0x1),	/* FP_ASSIST */
+	INTEL_EVENT_CONSTRAINT(0x12, 0x2),	/* MUL */
+	INTEL_EVENT_CONSTRAINT(0x13, 0x2),	/* DIV */
+	INTEL_EVENT_CONSTRAINT(0x14, 0x1),	/* CYCLES_DIV_BUSY */
+	EVENT_CONSTRAINT_END
+};
+
+static void p6_pmu_disable_all(void)
+{
+	u64 val;
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void p6_pmu_enable_all(void)
+{
+	unsigned long val;
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static inline void
+p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val = P6_NOP_EVENT;
+
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+static __initconst struct x86_pmu p6_pmu = {
+	.name			= "p6",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= p6_pmu_disable_all,
+	.enable_all		= p6_pmu_enable_all,
+	.enable			= p6_pmu_enable_event,
+	.disable		= p6_pmu_disable_event,
+	.eventsel		= MSR_P6_EVNTSEL0,
+	.perfctr		= MSR_P6_PERFCTR0,
+	.event_map		= p6_pmu_event_map,
+	.raw_event		= p6_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
+	.apic			= 1,
+	.max_period		= (1ULL << 31) - 1,
+	.version		= 0,
+	.num_events		= 2,
+	/*
+	 * Events have 40 bits implemented. However they are designed such
+	 * that bits [32-39] are sign extensions of bit 31. As such the
+	 * effective width of a event for P6-like PMU is 32 bits only.
+	 *
+	 * See IA-32 Intel Architecture Software developer manual Vol 3B
+	 */
+	.event_bits		= 32,
+	.event_mask		= (1ULL << 32) - 1,
+	.get_event_constraints	= x86_get_event_constraints,
+	.event_constraints	= p6_event_constraints,
+};
+
+static __init int p6_pmu_init(void)
+{
+	switch (boot_cpu_data.x86_model) {
+	case 1:
+	case 3:  /* Pentium Pro */
+	case 5:
+	case 6:  /* Pentium II */
+	case 7:
+	case 8:
+	case 11: /* Pentium III */
+	case 9:
+	case 13:
+		/* Pentium M */
+		break;
+	default:
+		pr_cont("unsupported p6 CPU model %d ",
+			boot_cpu_data.x86_model);
+		return -ENODEV;
+	}
+
+	x86_pmu = p6_pmu;
+
+	return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_INTEL */

From 4385d580f2278abab6d336e52522e9a6f5452a11 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 26 Feb 2010 12:08:34 -0300
Subject: [PATCH 627/640] perf tools: Flush maps on COMM events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even though we don't register the counters until the child is right about
to exec(), we're still going to get at least a few events while the
fork()'d child is still executing 'perf' and in particular we're going to
get the MMAP events.

We can't distinguish the ones in the newly executed process because the
PID will be the same.

One way to solve this would be to have a PERF_RECORD_EXEC event, and when
this is seen 'perf' can flush it's map cache.  We can't use
PERF_RECORD_COMM since that's generated by other things, not just exec().

Actually, thinking about it some more, using PERF_RECORD_COMM might be a
good enough approximation.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1267196914-16238-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/thread.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index c090654cb6c0..21b92162282b 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -31,15 +31,41 @@ static struct thread *thread__new(pid_t pid)
 	return self;
 }
 
+static void map_groups__flush(struct map_groups *self)
+{
+	int type;
+
+	for (type = 0; type < MAP__NR_TYPES; type++) {
+		struct rb_root *root = &self->maps[type];
+		struct rb_node *next = rb_first(root);
+
+		while (next) {
+			struct map *pos = rb_entry(next, struct map, rb_node);
+			next = rb_next(&pos->rb_node);
+			rb_erase(&pos->rb_node, root);
+			/*
+			 * We may have references to this map, for
+			 * instance in some hist_entry instances, so
+			 * just move them to a separate list.
+			 */
+			list_add_tail(&pos->node, &self->removed_maps[pos->type]);
+		}
+	}
+}
+
 int thread__set_comm(struct thread *self, const char *comm)
 {
+	int err;
+
 	if (self->comm)
 		free(self->comm);
 	self->comm = strdup(comm);
-	if (self->comm == NULL)
-		return -ENOMEM;
-	self->comm_set = true;
-	return 0;
+	err = self->comm == NULL ? -ENOMEM : 0;
+	if (!err) {
+		self->comm_set = true;
+		map_groups__flush(&self->mg);
+	}
+	return err;
 }
 
 int thread__comm_len(struct thread *self)

From 24691ea964cc0123e386b661e03a86a481c6ee79 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 26 Feb 2010 16:36:23 +0100
Subject: [PATCH 628/640] perf_event: Fix preempt warning in perf_clock()

A recent commit introduced a preemption warning for
perf_clock(), use raw_smp_processor_id() to avoid this, it
really doesn't matter which cpu we use here.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1267198583.22519.684.camel@laptop>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 05b6c6b825e3..aa6155b5e24c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -249,7 +249,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
 
 static inline u64 perf_clock(void)
 {
-	return cpu_clock(smp_processor_id());
+	return cpu_clock(raw_smp_processor_id());
 }
 
 /*

From 1dd2980d990068e20045b90c424518cc7f3657ff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 26 Feb 2010 17:07:35 +0100
Subject: [PATCH 629/640] perf_event, amd: Fix spinlock initialization

Avoid kernels from exploding on AMD machines when they have any
lock debugging bits enabled.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 6d28e08563e8..8f3dbfda3c4f 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,6 +1,6 @@
 #ifdef CONFIG_CPU_SUP_AMD
 
-static raw_spinlock_t amd_nb_lock;
+static DEFINE_RAW_SPINLOCK(amd_nb_lock);
 
 static __initconst u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]

From 44ee63587dce85593c22497140db16f4e5027860 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 17 Feb 2010 10:50:50 +0900
Subject: [PATCH 630/640] percpu: Add __percpu sparse annotations to
 hw_breakpoint

Add __percpu sparse annotations to hw_breakpoint.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

In kernel/hw_breakpoint.c, per_cpu(nr_task_bp_pinned, cpu)'s will
trigger spurious noderef related warnings from sparse.  Changing it to
&per_cpu(nr_task_bp_pinned[0], cpu) will work around the problem but
deemed to ugly by the maintainer.  Leave it alone until better
solution can be found.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <4B7B4B7A.9050902@kernel.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h           |  8 ++++----
 kernel/hw_breakpoint.c                  | 10 +++++-----
 samples/hw_breakpoint/data_breakpoint.c |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 5977b724f7c6..c70d27af03f9 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -66,14 +66,14 @@ register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	triggered,
 				int cpu);
 
-extern struct perf_event **
+extern struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
 extern void unregister_hw_breakpoint(struct perf_event *bp);
-extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events);
 
 extern int dbg_reserve_bp_slot(struct perf_event *bp);
 extern int dbg_release_bp_slot(struct perf_event *bp);
@@ -100,7 +100,7 @@ static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	 triggered,
 				int cpu)		{ return NULL; }
-static inline struct perf_event **
+static inline struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered)	{ return NULL; }
 static inline int
@@ -109,7 +109,7 @@ static inline int
 __register_perf_hw_breakpoint(struct perf_event *bp) 	{ return -ENOSYS; }
 static inline void unregister_hw_breakpoint(struct perf_event *bp)	{ }
 static inline void
-unregister_wide_hw_breakpoint(struct perf_event **cpu_events)		{ }
+unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)	{ }
 static inline int
 reserve_bp_slot(struct perf_event *bp)			{return -ENOSYS; }
 static inline void release_bp_slot(struct perf_event *bp) 		{ }
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 967e66143e11..6542eacb3fa5 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  *
  * @return a set of per_cpu pointers to perf events
  */
-struct perf_event **
+struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered)
 {
-	struct perf_event **cpu_events, **pevent, *bp;
+	struct perf_event * __percpu *cpu_events, **pevent, *bp;
 	long err;
 	int cpu;
 
 	cpu_events = alloc_percpu(typeof(*cpu_events));
 	if (!cpu_events)
-		return ERR_PTR(-ENOMEM);
+		return (void __percpu __force *)ERR_PTR(-ENOMEM);
 
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
@@ -451,7 +451,7 @@ fail:
 	put_online_cpus();
 
 	free_percpu(cpu_events);
-	return ERR_PTR(err);
+	return (void __percpu __force *)ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
  * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
  * @cpu_events: the per cpu set of events to unregister
  */
-void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
+void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 {
 	int cpu;
 	struct perf_event **pevent;
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index c69cbe9b2426..bd0f337afcab 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -34,7 +34,7 @@
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 
-struct perf_event **sample_hbp;
+struct perf_event * __percpu *sample_hbp;
 
 static char ksym_name[KSYM_NAME_LEN] = "pid_max";
 module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
@@ -61,8 +61,8 @@ static int __init hw_break_module_init(void)
 	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 
 	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
-	if (IS_ERR(sample_hbp)) {
-		ret = PTR_ERR(sample_hbp);
+	if (IS_ERR((void __force *)sample_hbp)) {
+		ret = PTR_ERR((void __force *)sample_hbp);
 		goto fail;
 	}
 

From 84c6f88fc8265d7a712d7d6ed8fc1a878dfc84d1 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Thu, 4 Feb 2010 16:08:15 +0900
Subject: [PATCH 631/640] perf lock: Fix and add misc documentally things

I've forgot to add 'perf lock' line to command-list.txt,
so users of perf could not find perf lock when they type 'perf'.

Fixing command-list.txt requires document
(tools/perf/Documentation/perf-lock.txt).
But perf lock is too much "under construction" to write a
stable document, so this is something like pseudo document for now.

And I wrote description of perf lock at help section of
CONFIG_LOCK_STAT, this will navigate users of lock trace events.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
LKML-Reference: <1265267295-8388-1-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 lib/Kconfig.debug                      |  6 ++++++
 tools/perf/Documentation/perf-lock.txt | 29 ++++++++++++++++++++++++++
 tools/perf/command-list.txt            |  1 +
 3 files changed, 36 insertions(+)
 create mode 100644 tools/perf/Documentation/perf-lock.txt

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..65f964e7fe78 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -520,6 +520,12 @@ config LOCK_STAT
 
 	 For more details, see Documentation/lockstat.txt
 
+	 You can analyze lock events with "perf lock", subcommand of perf.
+	 If you want to use "perf lock", you need to turn on CONFIG_EVENT_TRACING.
+
+	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
+ 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
new file mode 100644
index 000000000000..b317102138c8
--- /dev/null
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -0,0 +1,29 @@
+perf-lock(1)
+============
+
+NAME
+----
+perf-lock - Analyze lock events
+
+SYNOPSIS
+--------
+[verse]
+'perf lock' {record|report|trace}
+
+DESCRIPTION
+-----------
+You can analyze various lock behaviours
+and statistics with this 'perf lock' command.
+
+  'perf lock record <command>' records lock events
+  between start and end <command>. And this command
+  produces the file "perf.data" which contains tracing
+  results of lock events.
+
+  'perf lock trace' shows raw lock events.
+
+  'perf lock report' reports statistical data.
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 9afcff2e3ae5..db6ee94d4a8e 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -18,3 +18,4 @@ perf-top			mainporcelain common
 perf-trace			mainporcelain common
 perf-probe			mainporcelain common
 perf-kmem			mainporcelain common
+perf-lock			mainporcelain common

From b67577dfb45580c498bfdb1bc76c00c3b2ad6310 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 3 Feb 2010 09:09:33 +0100
Subject: [PATCH 632/640] perf lock: Drop the buffers multiplexing dependency

We need to deal with time ordered events to build a correct
state machine of lock events. This is why we multiplex the lock
events buffers. But the ordering is done from the kernel, on
the tracing fast path, leading to high contention between cpus.

Without multiplexing, the events appears in a weak order.
If we have four events, each split per cpu, perf record will
read the events buffers in the following order:

[ CPU0 ev0, CPU0 ev1, CPU0 ev3, CPU0 ev4, CPU1 ev0, CPU1 ev0....]

To handle a post processing reordering, we could just read and sort
the whole in memory, but it just doesn't scale with high amounts
of events: lock events can fill huge amounts in few times.

Basically we need to sort in memory and find a "grace period"
point when we know that a given slice of previously sorted events
can be committed for post-processing, so that we can unload the
memory usage step by step and keep a scalable sorting list.

There is no strong rules about how to define such "grace period".
What does this patch is:

We define a FLUSH_PERIOD value that defines a grace period in
seconds.
We want to have a slice of events covering 2 * FLUSH_PERIOD in our
sorted list.
If FLUSH_PERIOD is big enough, it ensures every events that occured
in the first half of the timeslice have all been buffered and there
are none remaining and there won't be further to put inside this
first timeslice. Then once we reach the 2 * FLUSH_PERIOD
timeslice, we flush the first half to be gentle with the memory
(the second half can still get new events in the middle, so wait
another period to flush it)

FLUSH_PERIOD is defined to 5 seconds. Say the first event started on
time t0. We can safely assume that at the time we are processing
events of t0 + 10 seconds, ther won't be anymore events to read
from perf.data that occured between t0 and t0 + 5 seconds. Hence
we can safely flush the first half.

To point out funky bugs, we have a guardian that checks a new event
timestamp is not below the last event's timestamp flushed and that
displays a warning in this case.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
---
 tools/perf/builtin-lock.c | 148 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 146 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fb9ab2ad3f92..e12c844df1e2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -460,6 +460,150 @@ process_raw_event(void *data, int cpu,
 		process_lock_release_event(data, event, cpu, timestamp, thread);
 }
 
+struct raw_event_queue {
+	u64			timestamp;
+	int			cpu;
+	void			*data;
+	struct thread		*thread;
+	struct list_head	list;
+};
+
+static LIST_HEAD(raw_event_head);
+
+#define FLUSH_PERIOD	(5 * NSEC_PER_SEC)
+
+static u64 flush_limit = ULLONG_MAX;
+static u64 last_flush = 0;
+struct raw_event_queue *last_inserted;
+
+static void flush_raw_event_queue(u64 limit)
+{
+	struct raw_event_queue *tmp, *iter;
+
+	list_for_each_entry_safe(iter, tmp, &raw_event_head, list) {
+		if (iter->timestamp > limit)
+			return;
+
+		if (iter == last_inserted)
+			last_inserted = NULL;
+
+		process_raw_event(iter->data, iter->cpu, iter->timestamp,
+				  iter->thread);
+
+		last_flush = iter->timestamp;
+		list_del(&iter->list);
+		free(iter->data);
+		free(iter);
+	}
+}
+
+static void __queue_raw_event_end(struct raw_event_queue *new)
+{
+	struct raw_event_queue *iter;
+
+	list_for_each_entry_reverse(iter, &raw_event_head, list) {
+		if (iter->timestamp < new->timestamp) {
+			list_add(&new->list, &iter->list);
+			return;
+		}
+	}
+
+	list_add(&new->list, &raw_event_head);
+}
+
+static void __queue_raw_event_before(struct raw_event_queue *new,
+				     struct raw_event_queue *iter)
+{
+	list_for_each_entry_continue_reverse(iter, &raw_event_head, list) {
+		if (iter->timestamp < new->timestamp) {
+			list_add(&new->list, &iter->list);
+			return;
+		}
+	}
+
+	list_add(&new->list, &raw_event_head);
+}
+
+static void __queue_raw_event_after(struct raw_event_queue *new,
+				     struct raw_event_queue *iter)
+{
+	list_for_each_entry_continue(iter, &raw_event_head, list) {
+		if (iter->timestamp > new->timestamp) {
+			list_add_tail(&new->list, &iter->list);
+			return;
+		}
+	}
+	list_add_tail(&new->list, &raw_event_head);
+}
+
+/* The queue is ordered by time */
+static void __queue_raw_event(struct raw_event_queue *new)
+{
+	if (!last_inserted) {
+		__queue_raw_event_end(new);
+		return;
+	}
+
+	/*
+	 * Most of the time the current event has a timestamp
+	 * very close to the last event inserted, unless we just switched
+	 * to another event buffer. Having a sorting based on a list and
+	 * on the last inserted event that is close to the current one is
+	 * probably more efficient than an rbtree based sorting.
+	 */
+	if (last_inserted->timestamp >= new->timestamp)
+		__queue_raw_event_before(new, last_inserted);
+	else
+		__queue_raw_event_after(new, last_inserted);
+}
+
+static void queue_raw_event(void *data, int raw_size, int cpu,
+			    u64 timestamp, struct thread *thread)
+{
+	struct raw_event_queue *new;
+
+	if (flush_limit == ULLONG_MAX)
+		flush_limit = timestamp + FLUSH_PERIOD;
+
+	if (timestamp < last_flush) {
+		printf("Warning: Timestamp below last timeslice flush\n");
+		return;
+	}
+
+	new = malloc(sizeof(*new));
+	if (!new)
+		die("Not enough memory\n");
+
+	new->timestamp = timestamp;
+	new->cpu = cpu;
+	new->thread = thread;
+
+	new->data = malloc(raw_size);
+	if (!new->data)
+		die("Not enough memory\n");
+
+	memcpy(new->data, data, raw_size);
+
+	__queue_raw_event(new);
+	last_inserted = new;
+
+	/*
+	 * We want to have a slice of events covering 2 * FLUSH_PERIOD
+	 * If FLUSH_PERIOD is big enough, it ensures every events that occured
+	 * in the first half of the timeslice have all been buffered and there
+	 * are none remaining (we need that because of the weakly ordered
+	 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
+	 * timeslice, we flush the first half to be gentle with the memory
+	 * (the second half can still get new events in the middle, so wait
+	 * another period to flush it)
+	 */
+	if (new->timestamp > flush_limit &&
+		new->timestamp - flush_limit > FLUSH_PERIOD) {
+		flush_limit += FLUSH_PERIOD;
+		flush_raw_event_queue(flush_limit);
+	}
+}
+
 static int process_sample_event(event_t *event, struct perf_session *session)
 {
 	struct thread *thread;
@@ -480,7 +624,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 	if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
 		return 0;
 
-	process_raw_event(data.raw_data, data.cpu, data.time, thread);
+	queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread);
 
 	return 0;
 }
@@ -576,6 +720,7 @@ static void __cmd_report(void)
 	setup_pager();
 	select_key();
 	read_events();
+	flush_raw_event_queue(ULLONG_MAX);
 	sort_result();
 	print_result();
 }
@@ -608,7 +753,6 @@ static const char *record_args[] = {
 	"record",
 	"-a",
 	"-R",
-	"-M",
 	"-f",
 	"-m", "1024",
 	"-c", "1",

From dd8b1cf681eab40bc5afb67bdd06b2ca341f5669 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 27 Feb 2010 17:10:39 +0100
Subject: [PATCH 633/640] perf: Remove pointless breakpoint union

Remove pointless union in the breakpoint field of hw_perf_event.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_event.h | 5 ++---
 lib/Kconfig.debug          | 8 +++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7b18b4fd5df7..04f06b4be297 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -487,9 +487,8 @@ struct hw_perf_event {
 			struct hrtimer	hrtimer;
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-		union { /* breakpoint */
-			struct arch_hw_breakpoint	info;
-		};
+		/* breakpoint */
+		struct arch_hw_breakpoint	info;
 #endif
 	};
 	atomic64_t			prev_count;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 65f964e7fe78..4dc24cc13f5c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -520,11 +520,13 @@ config LOCK_STAT
 
 	 For more details, see Documentation/lockstat.txt
 
-	 You can analyze lock events with "perf lock", subcommand of perf.
-	 If you want to use "perf lock", you need to turn on CONFIG_EVENT_TRACING.
+	 This also enables lock events required by "perf lock",
+	 subcommand of perf.
+	 If you want to use "perf lock", you also need to turn on
+	 CONFIG_EVENT_TRACING.
 
 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
- 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"

From 3d083407a16698de86b42aee0da2ffb280b5cb7e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 27 Feb 2010 17:24:15 +0100
Subject: [PATCH 634/640] x86/hw-breakpoints: Remove the name field

Remove the name field from the arch_hw_breakpoint. We never deal
with target symbols in the arch level, neither do we need to ever
store it. It's a legacy for the previous version of the x86
breakpoint backend.

Let's remove it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/hw_breakpoint.h | 1 -
 arch/x86/kernel/hw_breakpoint.c      | 7 -------
 2 files changed, 8 deletions(-)

diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 0675a7c4c20e..2a1bd8f4f23a 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
  * (display/resolving)
  */
 struct arch_hw_breakpoint {
-	char		*name; /* Contains name of the symbol to set bkpt */
 	unsigned long	address;
 	u8		len;
 	u8		type;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index dca2802c666f..41e08dff0161 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -343,13 +343,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
 		return ret;
 	}
 
-	/*
-	 * For kernel-addresses, either the address or symbol name can be
-	 * specified.
-	 */
-	if (info->name)
-		info->address = (unsigned long)
-				kallsyms_lookup_name(info->name);
 	/*
 	 * Check that the low-order bits of the address are appropriate
 	 * for the alignment implied by len.

From 1d6040f17d12a65b9f7ab4cb9fd6d721206b79ec Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 25 Feb 2010 19:40:46 +0100
Subject: [PATCH 635/640] perf, x86: make IBS macros available in perf_event.h

This patch moves code from oprofile to perf_event.h to make it also
available for usage by perf.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h | 10 ++++++++++
 arch/x86/oprofile/op_model_amd.c  | 11 -----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index befd172c82ad..4933ccde96c4 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -117,6 +117,16 @@ union cpuid10_edx {
  */
 #define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
 
+/* IbsFetchCtl bits/masks */
+#define IBS_FETCH_RAND_EN		(1ULL<<57)
+#define IBS_FETCH_VAL			(1ULL<<49)
+#define IBS_FETCH_ENABLE		(1ULL<<48)
+#define IBS_FETCH_CNT_MASK		0xFFFF0000ULL
+
+/* IbsOpCtl bits */
+#define IBS_OP_CNT_CTL			(1ULL<<19)
+#define IBS_OP_VAL			(1ULL<<18)
+#define IBS_OP_ENABLE			(1ULL<<17)
 
 #ifdef CONFIG_PERF_EVENTS
 extern void init_hw_perf_events(void);
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 6a58256dce9f..c67174917305 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -46,17 +46,6 @@
 
 static unsigned long reset_value[NUM_VIRT_COUNTERS];
 
-/* IbsFetchCtl bits/masks */
-#define IBS_FETCH_RAND_EN		(1ULL<<57)
-#define IBS_FETCH_VAL			(1ULL<<49)
-#define IBS_FETCH_ENABLE		(1ULL<<48)
-#define IBS_FETCH_CNT_MASK		0xFFFF0000ULL
-
-/* IbsOpCtl bits */
-#define IBS_OP_CNT_CTL			(1ULL<<19)
-#define IBS_OP_VAL			(1ULL<<18)
-#define IBS_OP_ENABLE			(1ULL<<17)
-
 #define IBS_FETCH_SIZE			6
 #define IBS_OP_SIZE			12
 

From a163b1099dc7016704043c7fc572ae42519f08f7 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 25 Feb 2010 19:43:07 +0100
Subject: [PATCH 636/640] perf, x86: add some IBS macros to perf_event.h

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h | 4 +++-
 arch/x86/oprofile/op_model_amd.c  | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4933ccde96c4..c7f60e1297ab 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -121,12 +121,14 @@ union cpuid10_edx {
 #define IBS_FETCH_RAND_EN		(1ULL<<57)
 #define IBS_FETCH_VAL			(1ULL<<49)
 #define IBS_FETCH_ENABLE		(1ULL<<48)
-#define IBS_FETCH_CNT_MASK		0xFFFF0000ULL
+#define IBS_FETCH_CNT			0xFFFF0000ULL
+#define IBS_FETCH_MAX_CNT		0x0000FFFFULL
 
 /* IbsOpCtl bits */
 #define IBS_OP_CNT_CTL			(1ULL<<19)
 #define IBS_OP_VAL			(1ULL<<18)
 #define IBS_OP_ENABLE			(1ULL<<17)
+#define IBS_OP_MAX_CNT			0x0000FFFFULL
 
 #ifdef CONFIG_PERF_EVENTS
 extern void init_hw_perf_events(void);
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index c67174917305..8ddb9fa9c1b2 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -279,7 +279,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
-			ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK);
+			ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
 			ctl |= IBS_FETCH_ENABLE;
 			wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
 		}
@@ -319,7 +319,7 @@ static inline void op_amd_start_ibs(void)
 		return;
 
 	if (ibs_config.fetch_enabled) {
-		val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+		val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
 		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
 		val |= IBS_FETCH_ENABLE;
 		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
@@ -341,7 +341,7 @@ static inline void op_amd_start_ibs(void)
 			 * avoid underflows.
 			 */
 			ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
-					 0xFFFFULL);
+					 IBS_OP_MAX_CNT);
 		}
 		if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
 			ibs_op_ctl |= IBS_OP_CNT_CTL;

From bb1165d6882f423f90fc7007a88c6c993b7c2ac4 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 1 Mar 2010 14:21:23 +0100
Subject: [PATCH 637/640] perf, x86: rename macro in
 ARCH_PERFMON_EVENTSEL_ENABLE

For consistency reasons this patch renames
ARCH_PERFMON_EVENTSEL0_ENABLE to ARCH_PERFMON_EVENTSEL_ENABLE.

The following is performed:

 $ sed -i -e s/ARCH_PERFMON_EVENTSEL0_ENABLE/ARCH_PERFMON_EVENTSEL_ENABLE/g \
   arch/x86/include/asm/perf_event.h arch/x86/kernel/cpu/perf_event.c \
   arch/x86/kernel/cpu/perf_event_p6.c \
   arch/x86/kernel/cpu/perfctr-watchdog.c \
   arch/x86/oprofile/op_model_amd.c arch/x86/oprofile/op_model_ppro.c

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/include/asm/perf_event.h      | 2 +-
 arch/x86/kernel/cpu/perf_event.c       | 8 ++++----
 arch/x86/kernel/cpu/perf_event_p6.c    | 8 ++++----
 arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +-
 arch/x86/oprofile/op_model_amd.c       | 6 +++---
 arch/x86/oprofile/op_model_ppro.c      | 6 +++---
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index c7f60e1297ab..80e693684f18 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,7 +18,7 @@
 #define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
 #define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
 
-#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_ENABLE			  (1 << 22)
 #define ARCH_PERFMON_EVENTSEL_ANY			  (1 << 21)
 #define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
 #define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 641ccb9dddbc..6531b4bdb22d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -553,9 +553,9 @@ static void x86_pmu_disable_all(void)
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 		rdmsrl(x86_pmu.eventsel + idx, val);
-		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+		if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
 			continue;
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 		wrmsrl(x86_pmu.eventsel + idx, val);
 	}
 }
@@ -590,7 +590,7 @@ static void x86_pmu_enable_all(void)
 			continue;
 
 		val = event->hw.config;
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 		wrmsrl(x86_pmu.eventsel + idx, val);
 	}
 }
@@ -853,7 +853,7 @@ void hw_perf_enable(void)
 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	(void)checking_wrmsrl(hwc->config_base + idx,
-			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
+			      hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
 }
 
 static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 1ca5ba078afd..a4e67b99d91c 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -62,7 +62,7 @@ static void p6_pmu_disable_all(void)
 
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+	val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 	wrmsrl(MSR_P6_EVNTSEL0, val);
 }
 
@@ -72,7 +72,7 @@ static void p6_pmu_enable_all(void)
 
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 	wrmsrl(MSR_P6_EVNTSEL0, val);
 }
 
@@ -83,7 +83,7 @@ p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
 	u64 val = P6_NOP_EVENT;
 
 	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 
 	(void)checking_wrmsrl(hwc->config_base + idx, val);
 }
@@ -95,7 +95,7 @@ static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 
 	val = hwc->config;
 	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 
 	(void)checking_wrmsrl(hwc->config_base + idx, val);
 }
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 74f4e85a5727..fb329e9f8494 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -680,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
 	cpu_nmi_set_wd_enabled();
 
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
 	wrmsr(evntsel_msr, evntsel, 0);
 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 8ddb9fa9c1b2..090cbbec7dbd 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -171,7 +171,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 			continue;
 		}
 		rdmsrl(msrs->controls[i].addr, val);
-		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
 			op_x86_warn_in_use(i);
 		val &= model->reserved;
 		wrmsrl(msrs->controls[i].addr, val);
@@ -398,7 +398,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
 		if (!reset_value[op_x86_phys_to_virt(i)])
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 		wrmsrl(msrs->controls[i].addr, val);
 	}
 
@@ -418,7 +418,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 		if (!reset_value[op_x86_phys_to_virt(i)])
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 		wrmsrl(msrs->controls[i].addr, val);
 	}
 
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 5d1727ba409e..2bf90fafa7b5 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -88,7 +88,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 			continue;
 		}
 		rdmsrl(msrs->controls[i].addr, val);
-		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
+		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
 			op_x86_warn_in_use(i);
 		val &= model->reserved;
 		wrmsrl(msrs->controls[i].addr, val);
@@ -166,7 +166,7 @@ static void ppro_start(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
 			rdmsrl(msrs->controls[i].addr, val);
-			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+			val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 			wrmsrl(msrs->controls[i].addr, val);
 		}
 	}
@@ -184,7 +184,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
 		if (!reset_value[i])
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 		wrmsrl(msrs->controls[i].addr, val);
 	}
 }

From 320ebf09cbb6d01954c9a060266aa8e0d27f4638 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Mar 2010 12:35:37 +0100
Subject: [PATCH 638/640] perf, x86: Restrict the ANY flag

The ANY flag can show SMT data of another task (like 'top'),
so we want to disable it when system-wide profiling is
disabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |  3 +++
 include/linux/perf_event.h       | 15 +++++++++++++++
 kernel/perf_event.c              | 15 ---------------
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6531b4bdb22d..aab2e1ce9dee 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -503,6 +503,9 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
+		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
+		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
 		return 0;
 	}
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 04f06b4be297..90e0521b1690 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -857,6 +857,21 @@ extern int sysctl_perf_event_paranoid;
 extern int sysctl_perf_event_mlock;
 extern int sysctl_perf_event_sample_rate;
 
+static inline bool perf_paranoid_tracepoint_raw(void)
+{
+	return sysctl_perf_event_paranoid > -1;
+}
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_event_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_event_paranoid > 1;
+}
+
 extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
 extern void perf_bp_event(struct perf_event *event, void *data);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a661e7991865..482d5e1d3764 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -56,21 +56,6 @@ static atomic_t nr_task_events __read_mostly;
  */
 int sysctl_perf_event_paranoid __read_mostly = 1;
 
-static inline bool perf_paranoid_tracepoint_raw(void)
-{
-	return sysctl_perf_event_paranoid > -1;
-}
-
-static inline bool perf_paranoid_cpu(void)
-{
-	return sysctl_perf_event_paranoid > 0;
-}
-
-static inline bool perf_paranoid_kernel(void)
-{
-	return sysctl_perf_event_paranoid > 1;
-}
-
 int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*

From b622d644c7d61a5cb95b74e7b143c263bed21f0a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 1 Feb 2010 15:36:30 +0100
Subject: [PATCH 639/640] perf_events, x86: Fixup fixed counter constraints

Patch 1da53e0230 ("perf_events, x86: Improve x86 event scheduling")
lost us one of the fixed purpose counters and then ed8777fc13
("perf_events, x86: Fix event constraint masks") broke it even
further.

Widen the fixed event mask to event+umask and specify the full config
for each of the 3 fixed purpose counters. Then let the init code fill
out the placement for the GP regs based on the cpuid info.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h      |  2 +-
 arch/x86/kernel/cpu/perf_event.c       | 25 +++++++++++++++------
 arch/x86/kernel/cpu/perf_event_intel.c | 31 +++++++++++++++++---------
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 80e693684f18..db6109a885a7 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -50,7 +50,7 @@
 	 INTEL_ARCH_INV_MASK| \
 	 INTEL_ARCH_EDGE_MASK|\
 	 INTEL_ARCH_UNIT_MASK|\
-	 INTEL_ARCH_EVTSEL_MASK)
+	 INTEL_ARCH_EVENT_MASK)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index aab2e1ce9dee..bfc43fa208bc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -73,10 +73,10 @@ struct debug_store {
 struct event_constraint {
 	union {
 		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-		u64		idxmsk64[1];
+		u64		idxmsk64;
 	};
-	int	code;
-	int	cmask;
+	u64	code;
+	u64	cmask;
 	int	weight;
 };
 
@@ -103,7 +103,7 @@ struct cpu_hw_events {
 };
 
 #define __EVENT_CONSTRAINT(c, n, m, w) {\
-	{ .idxmsk64[0] = (n) },		\
+	{ .idxmsk64 = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
 	.weight = (w),			\
@@ -116,7 +116,7 @@ struct cpu_hw_events {
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
 #define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
 
 #define EVENT_CONSTRAINT_END		\
 	EVENT_CONSTRAINT(0, 0, 0)
@@ -615,8 +615,8 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
 	for (i = 0; i < n; i++) {
-		constraints[i] =
-		  x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+		constraints[i] = c;
 	}
 
 	/*
@@ -1350,6 +1350,7 @@ static void __init pmu_check_apic(void)
 
 void __init init_hw_perf_events(void)
 {
+	struct event_constraint *c;
 	int err;
 
 	pr_info("Performance Events: ");
@@ -1398,6 +1399,16 @@ void __init init_hw_perf_events(void)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
 				   0, x86_pmu.num_events);
 
+	if (x86_pmu.event_constraints) {
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			if (c->cmask != INTEL_ARCH_FIXED_MASK)
+				continue;
+
+			c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
+			c->weight += x86_pmu.num_events;
+		}
+	}
+
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
 	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cf6590cf4a5f..4fbdfe5708d9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,7 +1,7 @@
 #ifdef CONFIG_CPU_SUP_INTEL
 
 /*
- * Intel PerfMon v3. Used on Core2 and later.
+ * Intel PerfMon, used on Core and later.
  */
 static const u64 intel_perfmon_event_map[] =
 {
@@ -27,8 +27,14 @@ static struct event_constraint intel_core_event_constraints[] =
 
 static struct event_constraint intel_core2_event_constraints[] =
 {
-	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/*
+	 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
+	 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
+	 * ratio between these counters.
+	 */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2),  CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
 	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -37,14 +43,16 @@ static struct event_constraint intel_core2_event_constraints[] =
 	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
 	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
 	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
 	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
 	EVENT_CONSTRAINT_END
 };
 
 static struct event_constraint intel_nehalem_event_constraints[] =
 {
-	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
 	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
 	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -58,8 +66,9 @@ static struct event_constraint intel_nehalem_event_constraints[] =
 
 static struct event_constraint intel_westmere_event_constraints[] =
 {
-	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
 	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
 	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
 	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -68,8 +77,9 @@ static struct event_constraint intel_westmere_event_constraints[] =
 
 static struct event_constraint intel_gen_event_constraints[] =
 {
-	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
 	EVENT_CONSTRAINT_END
 };
 
@@ -935,7 +945,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_nehalem_event_constraints;
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
-	case 28:
+	case 28: /* Atom */
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -951,6 +961,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_westmere_event_constraints;
 		pr_cont("Westmere events, ");
 		break;
+
 	default:
 		/*
 		 * default constraints for v2 and up

From 6630125419ef37ff8781713c5e9d416f2a4ba357 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 2 Mar 2010 15:25:38 -0300
Subject: [PATCH 640/640] perf archive: Don't try to collect files without a
 build-id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To avoid these error:

   [root@doppio ~]# perf archive
   tar: .build-id/00/00000000000000000000000000000000000000: Cannot stat:
   No such file or directory
   tar: .build-id/00/00000000000000000000000000000000000000: Cannot stat:
   No such file or directory
   tar: .build-id/00/00000000000000000000000000000000000000: Cannot stat:
   No such file or directory
   tar: .build-id/00/00000000000000000000000000000000000000: Cannot stat:
   No such file or directory
   tar: Exiting with failure status due to previous errors
   [root@doppio ~]#

More work is needed to support archiving symtabs for binaries
without a build-id, perhaps creating a perf.data UUID + adding
build-ids for the binaries copied into the cache and then have
this perf.data session UUID be a directory with symlinks to the
by now calculated build-id of the files inside it.

Or just do an extra pass and insert the calculated build-ids in
the perf.data header.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/perf-archive.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 45fbe2f07b15..910468e6e01c 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -9,8 +9,9 @@ fi
 
 DEBUGDIR=~/.debug/
 BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
+NOBUILDID=0000000000000000000000000000000000000000
 
-perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
 if [ ! -s $BUILDIDS ] ; then
 	echo "perf archive: no build-ids found"
 	rm -f $BUILDIDS