linux/kernel/trace/trace_workqueue.c

/*
 * Workqueue statistical tracer.
 *
 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
 *
 */


#include <trace/workqueue.h>
#include <linux/list.h>
#include "trace_stat.h"
#include "trace.h"


/* A cpu workqueue thread */
struct cpu_workqueue_stats {
	struct list_head            list;
/* Useful to know if we print the cpu headers */
	bool		            first_entry;
	int		            cpu;
	pid_t 			    pid;
/* Can be inserted from interrupt or user context, need to be atomic */
	atomic_t 	            inserted;
/*
 *  Don't need to be atomic, works are serialized in a single workqueue thread
 *  on a single CPU.
 */
	unsigned int 	 	    executed;
};

/* List of workqueue threads on one cpu */
struct workqueue_global_stats {
	struct list_head	list;
	spinlock_t		lock;
};

/* Don't need a global lock because allocated before the workqueues, and
 * never freed.
 */
static struct workqueue_global_stats *all_workqueue_stat;

/* Insertion of a work */
static void
probe_workqueue_insertion(struct task_struct *wq_thread,
			  struct work_struct *work)
{
	int cpu = cpumask_first(&wq_thread->cpus_allowed);
	struct cpu_workqueue_stats *node, *next;
	unsigned long flags;

	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
							list) {
		if (node->pid == wq_thread->pid) {
			atomic_inc(&node->inserted);
			goto found;
		}
	}
	pr_debug("trace_workqueue: entry not found\n");
found:
	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
}

/* Execution of a work */
static void
probe_workqueue_execution(struct task_struct *wq_thread,
			  struct work_struct *work)
{
	int cpu = cpumask_first(&wq_thread->cpus_allowed);
	struct cpu_workqueue_stats *node, *next;
	unsigned long flags;

	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
							list) {
		if (node->pid == wq_thread->pid) {
			node->executed++;
			goto found;
		}
	}
	pr_debug("trace_workqueue: entry not found\n");
found:
	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
}

/* Creation of a cpu workqueue thread */
static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
{
	struct cpu_workqueue_stats *cws;
	unsigned long flags;

	WARN_ON(cpu < 0 || cpu >= num_possible_cpus());

	/* Workqueues are sometimes created in atomic context */
	cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
	if (!cws) {
		pr_warning("trace_workqueue: not enough memory\n");
		return;
	}
	tracing_record_cmdline(wq_thread);

	INIT_LIST_HEAD(&cws->list);
	cws->cpu = cpu;

	cws->pid = wq_thread->pid;

	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
	if (list_empty(&all_workqueue_stat[cpu].list))
		cws->first_entry = true;
	list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
}

/* Destruction of a cpu workqueue thread */
static void probe_workqueue_destruction(struct task_struct *wq_thread)
{
	/* Workqueue only execute on one cpu */
	int cpu = cpumask_first(&wq_thread->cpus_allowed);
	struct cpu_workqueue_stats *node, *next;
	unsigned long flags;

	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
							list) {
		if (node->pid == wq_thread->pid) {
			list_del(&node->list);
			kfree(node);
			goto found;
		}
	}

	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
found:
	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);

}

static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
{
	unsigned long flags;
	struct cpu_workqueue_stats *ret = NULL;


	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);

	if (!list_empty(&all_workqueue_stat[cpu].list))
		ret = list_entry(all_workqueue_stat[cpu].list.next,
				 struct cpu_workqueue_stats, list);

	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);

	return ret;
}

static void *workqueue_stat_start(void)
{
	int cpu;
	void *ret = NULL;

	for_each_possible_cpu(cpu) {
		ret = workqueue_stat_start_cpu(cpu);
		if (ret)
			return ret;
	}
	return NULL;
}

static void *workqueue_stat_next(void *prev, int idx)
{
	struct cpu_workqueue_stats *prev_cws = prev;
	int cpu = prev_cws->cpu;
	unsigned long flags;
	void *ret = NULL;

	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
	if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
		spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		for (++cpu ; cpu < num_possible_cpus(); cpu++) {
			ret = workqueue_stat_start_cpu(cpu);
			if (ret)
				return ret;
		}
		return NULL;
	}
	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);

	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
			  list);
}

static int workqueue_stat_show(struct seq_file *s, void *p)
{
	struct cpu_workqueue_stats *cws = p;
	unsigned long flags;
	int cpu = cws->cpu;

	seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
		   atomic_read(&cws->inserted),
		   cws->executed,
		   trace_find_cmdline(cws->pid));

	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
	if (&cws->list == all_workqueue_stat[cpu].list.next)
		seq_printf(s, "\n");
	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);

	return 0;
}

static int workqueue_stat_headers(struct seq_file *s)
{
	seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
	seq_printf(s, "# |      |         |          |\n\n");
	return 0;
}

struct tracer_stat workqueue_stats __read_mostly = {
	.name = "workqueues",
	.stat_start = workqueue_stat_start,
	.stat_next = workqueue_stat_next,
	.stat_show = workqueue_stat_show,
	.stat_headers = workqueue_stat_headers
};


int __init stat_workqueue_init(void)
{
	if (register_stat_tracer(&workqueue_stats)) {
		pr_warning("Unable to register workqueue stat tracer\n");
		return 1;
	}

	return 0;
}
fs_initcall(stat_workqueue_init);

/*
 * Workqueues are created very early, just after pre-smp initcalls.
 * So we must register our tracepoints at this stage.
 */
int __init trace_workqueue_early_init(void)
{
	int ret, cpu;

	ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
	if (ret)
		goto out;

	ret = register_trace_workqueue_execution(probe_workqueue_execution);
	if (ret)
		goto no_insertion;

	ret = register_trace_workqueue_creation(probe_workqueue_creation);
	if (ret)
		goto no_execution;

	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
	if (ret)
		goto no_creation;

	all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
				     * num_possible_cpus(), GFP_KERNEL);

	if (!all_workqueue_stat) {
		pr_warning("trace_workqueue: not enough memory\n");
		goto no_creation;
	}

	for_each_possible_cpu(cpu) {
		spin_lock_init(&all_workqueue_stat[cpu].lock);
		INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
	}

	return 0;

no_creation:
	unregister_trace_workqueue_creation(probe_workqueue_creation);
no_execution:
	unregister_trace_workqueue_execution(probe_workqueue_execution);
no_insertion:
	unregister_trace_workqueue_insertion(probe_workqueue_insertion);
out:
	pr_warning("trace_workqueue: unable to trace workqueues\n");

	return 1;
}
early_initcall(trace_workqueue_early_init);
tracing: add a new workqueue tracer Impact: new tracer The workqueue tracer provides some statistical informations about each cpu workqueue thread such as the number of the works inserted and executed since their creation. It can help to evaluate the amount of work each of them have to perform. For example it can help a developer to decide whether he should choose a per cpu workqueue instead of a singlethreaded one. It only traces statistical informations for now but it will probably later provide event tracing too. Such a tracer could help too, and be improved, to help rt priority sorted workqueue development. To have a snapshot of the workqueues state at any time, just do cat /debugfs/tracing/trace_stat/workqueues Ie: 1 125 125 reiserfs/1 1 0 0 scsi_tgtd/1 1 0 0 aio/1 1 0 0 ata/1 1 114 114 kblockd/1 1 0 0 kintegrityd/1 1 2147 2147 events/1 0 0 0 kpsmoused 0 105 105 reiserfs/0 0 0 0 scsi_tgtd/0 0 0 0 aio/0 0 0 0 ata_aux 0 0 0 ata/0 0 0 0 cqueue 0 0 0 kacpi_notify 0 0 0 kacpid 0 149 149 kblockd/0 0 0 0 kintegrityd/0 0 1000 1000 khelper 0 2270 2270 events/0 Changes in V2: _ Drop the static array based on NR_CPU and dynamically allocate the stat array with num_possible_cpus() and other cpu mask facilities.... _ Trace workqueue insertion at a bit lower level (insert_work instead of queue_work) to handle even the workqueue barriers. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> 2009-01-12 22:15:46 +00:00			`/*`
			`* Workqueue statistical tracer.`
			`*`
			`* Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>`
			`*`
			`*/`


			`#include <trace/workqueue.h>`
			`#include <linux/list.h>`
			`#include "trace_stat.h"`
			`#include "trace.h"`


			`/* A cpu workqueue thread */`
			`struct cpu_workqueue_stats {`
			`struct list_head list;`
			`/* Useful to know if we print the cpu headers */`
			`bool first_entry;`
			`int cpu;`
			`pid_t pid;`
			`/* Can be inserted from interrupt or user context, need to be atomic */`
			`atomic_t inserted;`
			`/*`
			`* Don't need to be atomic, works are serialized in a single workqueue thread`
			`* on a single CPU.`
			`*/`
			`unsigned int executed;`
			`};`

			`/* List of workqueue threads on one cpu */`
			`struct workqueue_global_stats {`
			`struct list_head list;`
			`spinlock_t lock;`
			`};`

			`/* Don't need a global lock because allocated before the workqueues, and`
			`* never freed.`
			`*/`
			`static struct workqueue_global_stats *all_workqueue_stat;`

			`/* Insertion of a work */`
			`static void`
			`probe_workqueue_insertion(struct task_struct *wq_thread,`
			`struct work_struct *work)`
			`{`
			`int cpu = cpumask_first(&wq_thread->cpus_allowed);`
			`struct cpu_workqueue_stats node, next;`
			`unsigned long flags;`

			`spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);`
			`list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,`
			`list) {`
			`if (node->pid == wq_thread->pid) {`
			`atomic_inc(&node->inserted);`
			`goto found;`
			`}`
			`}`
			`pr_debug("trace_workqueue: entry not found\n");`
			`found:`
			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`
			`}`

			`/* Execution of a work */`
			`static void`
			`probe_workqueue_execution(struct task_struct *wq_thread,`
			`struct work_struct *work)`
			`{`
			`int cpu = cpumask_first(&wq_thread->cpus_allowed);`
			`struct cpu_workqueue_stats node, next;`
			`unsigned long flags;`

			`spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);`
			`list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,`
			`list) {`
			`if (node->pid == wq_thread->pid) {`
			`node->executed++;`
			`goto found;`
			`}`
			`}`
			`pr_debug("trace_workqueue: entry not found\n");`
			`found:`
			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`
			`}`

			`/* Creation of a cpu workqueue thread */`
			`static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)`
			`{`
			`struct cpu_workqueue_stats *cws;`
			`unsigned long flags;`

			`WARN_ON(cpu < 0 \|\| cpu >= num_possible_cpus());`

			`/* Workqueues are sometimes created in atomic context */`
			`cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);`
			`if (!cws) {`
			`pr_warning("trace_workqueue: not enough memory\n");`
			`return;`
			`}`
			`tracing_record_cmdline(wq_thread);`

			`INIT_LIST_HEAD(&cws->list);`
			`cws->cpu = cpu;`

			`cws->pid = wq_thread->pid;`

			`spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);`
			`if (list_empty(&all_workqueue_stat[cpu].list))`
			`cws->first_entry = true;`
			`list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);`
			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`
			`}`

			`/* Destruction of a cpu workqueue thread */`
			`static void probe_workqueue_destruction(struct task_struct *wq_thread)`
			`{`
			`/* Workqueue only execute on one cpu */`
			`int cpu = cpumask_first(&wq_thread->cpus_allowed);`
			`struct cpu_workqueue_stats node, next;`
			`unsigned long flags;`

			`spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);`
			`list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,`
			`list) {`
			`if (node->pid == wq_thread->pid) {`
			`list_del(&node->list);`
			`kfree(node);`
			`goto found;`
			`}`
			`}`

			`pr_debug("trace_workqueue: don't find workqueue to destroy\n");`
			`found:`
			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`

			`}`

			`static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)`
			`{`
			`unsigned long flags;`
			`struct cpu_workqueue_stats *ret = NULL;`


			`spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);`

			`if (!list_empty(&all_workqueue_stat[cpu].list))`
			`ret = list_entry(all_workqueue_stat[cpu].list.next,`
			`struct cpu_workqueue_stats, list);`

			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`

			`return ret;`
			`}`

			`static void *workqueue_stat_start(void)`
			`{`
			`int cpu;`
			`void *ret = NULL;`

			`for_each_possible_cpu(cpu) {`
			`ret = workqueue_stat_start_cpu(cpu);`
			`if (ret)`
			`return ret;`
			`}`
			`return NULL;`
			`}`

			`static void workqueue_stat_next(void prev, int idx)`
			`{`
			`struct cpu_workqueue_stats *prev_cws = prev;`
			`int cpu = prev_cws->cpu;`
			`unsigned long flags;`
			`void *ret = NULL;`

			`spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);`
			`if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {`
			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`
			`for (++cpu ; cpu < num_possible_cpus(); cpu++) {`
			`ret = workqueue_stat_start_cpu(cpu);`
			`if (ret)`
			`return ret;`
			`}`
			`return NULL;`
			`}`
			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`

			`return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,`
			`list);`
			`}`

			`static int workqueue_stat_show(struct seq_file s, void p)`
			`{`
			`struct cpu_workqueue_stats *cws = p;`
			`unsigned long flags;`
			`int cpu = cws->cpu;`

			`seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,`
			`atomic_read(&cws->inserted),`
			`cws->executed,`
			`trace_find_cmdline(cws->pid));`

			`spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);`
			`if (&cws->list == all_workqueue_stat[cpu].list.next)`
			`seq_printf(s, "\n");`
			`spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);`

			`return 0;`
			`}`

			`static int workqueue_stat_headers(struct seq_file *s)`
			`{`
			`seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");`
			`seq_printf(s, "# \| \| \| \|\n\n");`
			`return 0;`
			`}`

			`struct tracer_stat workqueue_stats __read_mostly = {`
			`.name = "workqueues",`
			`.stat_start = workqueue_stat_start,`
			`.stat_next = workqueue_stat_next,`
			`.stat_show = workqueue_stat_show,`
			`.stat_headers = workqueue_stat_headers`
			`};`


			`int __init stat_workqueue_init(void)`
			`{`
			`if (register_stat_tracer(&workqueue_stats)) {`
			`pr_warning("Unable to register workqueue stat tracer\n");`
			`return 1;`
			`}`

			`return 0;`
			`}`
			`fs_initcall(stat_workqueue_init);`

			`/*`
			`* Workqueues are created very early, just after pre-smp initcalls.`
			`* So we must register our tracepoints at this stage.`
			`*/`
			`int __init trace_workqueue_early_init(void)`
			`{`
			`int ret, cpu;`

			`ret = register_trace_workqueue_insertion(probe_workqueue_insertion);`
			`if (ret)`
			`goto out;`

			`ret = register_trace_workqueue_execution(probe_workqueue_execution);`
			`if (ret)`
			`goto no_insertion;`

			`ret = register_trace_workqueue_creation(probe_workqueue_creation);`
			`if (ret)`
			`goto no_execution;`

			`ret = register_trace_workqueue_destruction(probe_workqueue_destruction);`
			`if (ret)`
			`goto no_creation;`

			`all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)`
			`* num_possible_cpus(), GFP_KERNEL);`

			`if (!all_workqueue_stat) {`
			`pr_warning("trace_workqueue: not enough memory\n");`
			`goto no_creation;`
			`}`

			`for_each_possible_cpu(cpu) {`
			`spin_lock_init(&all_workqueue_stat[cpu].lock);`
			`INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);`
			`}`

			`return 0;`

			`no_creation:`
			`unregister_trace_workqueue_creation(probe_workqueue_creation);`
			`no_execution:`
			`unregister_trace_workqueue_execution(probe_workqueue_execution);`
			`no_insertion:`
			`unregister_trace_workqueue_insertion(probe_workqueue_insertion);`
			`out:`
			`pr_warning("trace_workqueue: unable to trace workqueues\n");`

			`return 1;`
			`}`
			`early_initcall(trace_workqueue_early_init);`