linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
@ 2009-04-13  0:53 Zhaolei
  2009-04-13  0:54 ` [PATCH 1/1] " Zhaolei
  2009-04-13 13:57 ` [PATCH 0/1] " Frederic Weisbecker
  0 siblings, 2 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-13  0:53 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Tom Zanussi, Ingo Molnar,
	linux-kernel

Hi,

I moved workqueue trace to use TRACE_EVENT macro.

CC: Frederic Weisbecker
I see in your mail that you will scheduled to do it, but that time I already
finished it. (I haven't send it to ltml that time because it need apply on top
of my previous patch which was send but not commited)
So I just send this patch, you can accept it, or drop it and use yours if is
is not good.

Thanks
Zhaolei


^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  0:53 [PATCH 0/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Zhaolei
@ 2009-04-13  0:54 ` Zhaolei
  2009-04-13  1:44   ` KOSAKI Motohiro
  2009-04-13 14:13   ` Frederic Weisbecker
  2009-04-13 13:57 ` [PATCH 0/1] " Frederic Weisbecker
  1 sibling, 2 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-13  0:54 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Tom Zanussi, Ingo Molnar
  Cc: linux-kernel

TRACE_EVENT is a more generic way to define tracepoints.
Doing so adds these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 include/trace/trace_event_types.h     |    1 +
 include/trace/trace_events.h          |    1 +
 include/trace/workqueue.h             |   19 +------
 include/trace/workqueue_event_types.h |  101 +++++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+), 17 deletions(-)
 create mode 100644 include/trace/workqueue_event_types.h

diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
index 552a50e..2babba4 100644
--- a/include/trace/trace_event_types.h
+++ b/include/trace/trace_event_types.h
@@ -5,3 +5,4 @@
 #include <trace/lockdep_event_types.h>
 #include <trace/skb_event_types.h>
 #include <trace/kmem_event_types.h>
+#include <trace/workqueue_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 13d6b85..2e0f58c 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -5,3 +5,4 @@
 #include <trace/lockdep.h>
 #include <trace/skb.h>
 #include <trace/kmem.h>
+#include <trace/workqueue.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
index 7626523..ecb58f4 100644
--- a/include/trace/workqueue.h
+++ b/include/trace/workqueue.h
@@ -1,25 +1,10 @@
 #ifndef __TRACE_WORKQUEUE_H
 #define __TRACE_WORKQUEUE_H
 
-#include <linux/tracepoint.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
+#include <linux/tracepoint.h>
 
-DECLARE_TRACE(workqueue_insertion,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-	   TP_PROTO(struct task_struct *wq_thread, int cpu),
-	   TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-	   TP_PROTO(struct task_struct *wq_thread),
-	   TP_ARGS(wq_thread));
+#include <trace/workqueue_event_types.h>
 
 #endif /* __TRACE_WORKQUEUE_H */
diff --git a/include/trace/workqueue_event_types.h b/include/trace/workqueue_event_types.h
new file mode 100644
index 0000000..ff65d47
--- /dev/null
+++ b/include/trace/workqueue_event_types.h
@@ -0,0 +1,101 @@
+
+/* use <trace/workqueue.h> instead */
+#ifndef TRACE_EVENT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(	char,			thread_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,			thread_pid			)
+		__field(	struct work_struct *,	work				)
+		__field(	work_func_t,		func				)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->work		= work;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,
+		__entry->thread_pid, __entry->work, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(	char,			thread_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,			thread_pid			)
+		__field(	struct work_struct *,	work				)
+		__field(	work_func_t,		func				)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->work		= work;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,
+		__entry->thread_pid, __entry->work, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+	TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+	TP_ARGS(wq_thread, cpu),
+
+	TP_STRUCT__entry(
+		__array(	char,	thread_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	thread_pid			)
+		__field(	int,	cpu				)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(	char,	thread_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	thread_pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#undef TRACE_SYSTEM
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  0:54 ` [PATCH 1/1] " Zhaolei
@ 2009-04-13  1:44   ` KOSAKI Motohiro
  2009-04-13  3:25     ` KOSAKI Motohiro
  2009-04-13 14:34     ` [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
  2009-04-13 14:13   ` Frederic Weisbecker
  1 sibling, 2 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-13  1:44 UTC (permalink / raw)
  To: Zhaolei
  Cc: kosaki.motohiro, Steven Rostedt, Frederic Weisbecker,
	Tom Zanussi, Ingo Molnar, linux-kernel

Hi,

hehe, I also have similar patch in my local patch queue ;)

> +TRACE_EVENT(workqueue_insertion,
> +
> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> +
> +	TP_ARGS(wq_thread, work),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,			thread_pid			)
> +		__field(	struct work_struct *,	work				)
> +		__field(	work_func_t,		func				)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->work		= work;
> +		__entry->func		= work->func;
> +	),
> +
> +	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,
> +		__entry->thread_pid, __entry->work, __entry->func)
> +);

please don't display raw kernel pointer.

work: unnecessary. it's internal information and func name provide enough
      information.
func: %pF can print pritty format


> +
> +TRACE_EVENT(workqueue_execution,
> +
> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> +
> +	TP_ARGS(wq_thread, work),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,			thread_pid			)
> +		__field(	struct work_struct *,	work				)
> +		__field(	work_func_t,		func				)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->work		= work;
> +		__entry->func		= work->func;
> +	),
> +
> +	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,
> +		__entry->thread_pid, __entry->work, __entry->func)
> +);

ditto.

and, I plan to rename this tracepoint to workqueue_handler_entry()
and create workqueue_handler_exit().

It's because stupid driver comsume many cputime in workqueue thread,
it make delay to after work. We need mesure it.


> +
> +/* Trace the creation of one workqueue thread on a cpu */
> +TRACE_EVENT(workqueue_creation,
> +
> +	TP_PROTO(struct task_struct *wq_thread, int cpu),
> +
> +	TP_ARGS(wq_thread, cpu),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,	thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,	thread_pid			)
> +		__field(	int,	cpu				)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->cpu		= cpu;
> +	),
> +
> +	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
> +		__entry->thread_pid, __entry->cpu)
> +);
> +
> +TRACE_EVENT(workqueue_destruction,
> +
> +	TP_PROTO(struct task_struct *wq_thread),
> +
> +	TP_ARGS(wq_thread),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,	thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,	thread_pid			)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +	),
> +
> +	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
> +);
> +
> +#undef TRACE_SYSTEM
> -- 
> 1.5.5.3
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  1:44   ` KOSAKI Motohiro
@ 2009-04-13  3:25     ` KOSAKI Motohiro
  2009-04-13  3:49       ` Zhaolei
  2009-04-13 14:34     ` [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
  1 sibling, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-13  3:25 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Frederic Weisbecker,
	Tom Zanussi, Ingo Molnar, linux-kernel

> Hi,
> 
> hehe, I also have similar patch in my local patch queue ;)
> 
> > +TRACE_EVENT(workqueue_insertion,
> > +
> > +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> > +
> > +	TP_ARGS(wq_thread, work),
> > +
> > +	TP_STRUCT__entry(
> > +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> > +		__field(	pid_t,			thread_pid			)
> > +		__field(	struct work_struct *,	work				)
> > +		__field(	work_func_t,		func				)
> > +	),

and, your patch exceed 80 character in many lines.
Please use checkpatch.pl script.




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  3:25     ` KOSAKI Motohiro
@ 2009-04-13  3:49       ` Zhaolei
  2009-04-13  3:57         ` Ingo Molnar
  2009-04-13  3:57         ` [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints " KOSAKI Motohiro
  0 siblings, 2 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-13  3:49 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Steven Rostedt, Frederic Weisbecker, Tom Zanussi, Ingo Molnar,
	linux-kernel

KOSAKI Motohiro wrote:
>> Hi,
>>
>> hehe, I also have similar patch in my local patch queue ;)
>>
>>> +TRACE_EVENT(workqueue_insertion,
>>> +
>>> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
>>> +
>>> +	TP_ARGS(wq_thread, work),
>>> +
>>> +	TP_STRUCT__entry(
>>> +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
>>> +		__field(	pid_t,			thread_pid			)
>>> +		__field(	struct work_struct *,	work				)
>>> +		__field(	work_func_t,		func				)
>>> +	),
> 
> and, your patch exceed 80 character in many lines.
> Please use checkpatch.pl script.
Hello, Kosaki-san

Thanks for your comments for this patch,
I'll merge your and other people's comments and send a V2.

About line exceed 80 characters, I do checkpatch before send, and I know that
lines in TP_STRUCT__entry are too long.
It it because I use similar format with other TRACE_EVENT definition.
Actually, I don't know why we need to add prefix and postfix TABs, and is it
necessary to use tab to make alignment for each fields.
Who can give me teach? or I will use different format to fix this 80-char problem.

Thanks
Zhaolei
> 
> 
> 
> 



^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  3:49       ` Zhaolei
@ 2009-04-13  3:57         ` Ingo Molnar
  2009-04-13  4:07           ` Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueuetracepoints " Zhaolei
  2009-04-13  3:57         ` [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints " KOSAKI Motohiro
  1 sibling, 1 reply; 94+ messages in thread
From: Ingo Molnar @ 2009-04-13  3:57 UTC (permalink / raw)
  To: Zhaolei
  Cc: KOSAKI Motohiro, Steven Rostedt, Frederic Weisbecker,
	Tom Zanussi, linux-kernel


* Zhaolei <zhaolei@cn.fujitsu.com> wrote:

> It it because I use similar format with other TRACE_EVENT 
> definition. Actually, I don't know why we need to add prefix and 
> postfix TABs, and is it necessary to use tab to make alignment for 
> each fields.

The __array() entry is a bit special - i wouldnt worry much about 
it, please ignore the checkpatch warning in this case - or remove 
the prefix tab.

The alignment of the fields are useful visually: all the field names 
are enumerated in a vertical row and are easy to see at a glance. 
The postfix tabs are nice because they move the non-C-syntax closing 
')' out of the line of sight. The prefix tabs are useful for a 
similar reason.

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  3:49       ` Zhaolei
  2009-04-13  3:57         ` Ingo Molnar
@ 2009-04-13  3:57         ` KOSAKI Motohiro
  2009-04-13  5:52           ` [PATCH v2 1/4] ftrace, " KOSAKI Motohiro
  1 sibling, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-13  3:57 UTC (permalink / raw)
  To: Zhaolei
  Cc: kosaki.motohiro, Steven Rostedt, Frederic Weisbecker,
	Tom Zanussi, Ingo Molnar, linux-kernel

Hi

> KOSAKI Motohiro wrote:
> >> Hi,
> >>
> >> hehe, I also have similar patch in my local patch queue ;)
> >>
> >>> +TRACE_EVENT(workqueue_insertion,
> >>> +
> >>> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> >>> +
> >>> +	TP_ARGS(wq_thread, work),
> >>> +
> >>> +	TP_STRUCT__entry(
> >>> +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> >>> +		__field(	pid_t,			thread_pid			)
> >>> +		__field(	struct work_struct *,	work				)
> >>> +		__field(	work_func_t,		func				)
> >>> +	),
> > 
> > and, your patch exceed 80 character in many lines.
> > Please use checkpatch.pl script.
> Hello, Kosaki-san
> 
> Thanks for your comments for this patch,
> I'll merge your and other people's comments and send a V2.
> 
> About line exceed 80 characters, I do checkpatch before send, and I know that
> lines in TP_STRUCT__entry are too long.
> It it because I use similar format with other TRACE_EVENT definition.
> Actually, I don't know why we need to add prefix and postfix TABs, and is it
> necessary to use tab to make alignment for each fields.
> Who can give me teach? or I will use different format to fix this 80-char problem.

I'll send my patch today's later. please give me comment :)





^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-13  3:57         ` Ingo Molnar
@ 2009-04-13  4:07           ` Zhaolei
  0 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-13  4:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: KOSAKI Motohiro, Steven Rostedt, Frederic Weisbecker,
	Tom Zanussi, linux-kernel

* From: "Ingo Molnar" <mingo@elte.hu>
> 
> * Zhaolei <zhaolei@cn.fujitsu.com> wrote:
> 
>> It it because I use similar format with other TRACE_EVENT 
>> definition. Actually, I don't know why we need to add prefix and 
>> postfix TABs, and is it necessary to use tab to make alignment for 
>> each fields.
> 
> The __array() entry is a bit special - i wouldnt worry much about 
> it, please ignore the checkpatch warning in this case - or remove 
> the prefix tab.
Hello, Ingo 

Thanks for your teach.

I think remove the prefix tab only(but remain postfix) make code looks
a bit ugly. I prefer to remain both tabs.
It is to say I will not modify tabs in this patch.

Thanks
Zhaolei

> 
> The alignment of the fields are useful visually: all the field names 
> are enumerated in a vertical row and are easy to see at a glance. 
> The postfix tabs are nice because they move the non-C-syntax closing 
> ')' out of the line of sight. The prefix tabs are useful for a 
> similar reason.
> 
> Ingo
> 
>ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH v2 1/4] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  3:57         ` [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints " KOSAKI Motohiro
@ 2009-04-13  5:52           ` KOSAKI Motohiro
  2009-04-13  5:53             ` [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry KOSAKI Motohiro
                               ` (4 more replies)
  0 siblings, 5 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-13  5:52 UTC (permalink / raw)
  To: Zhaolei, Steven Rostedt, Frederic Weisbecker, Tom Zanussi,
	Ingo Molnar, linux-kernel
  Cc: kosaki.motohiro

From: Zhao Lei <zhaolei@cn.fujitsu.com>
Subject: [PATCH v2] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro

TRACE_EVENT is a more generic way to define tracepoints.
Doing so adds these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Then, this patch convert DEFINE_TRACE to TRACE_EVENT in workqueue related tracepoint.


Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki@jp.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/trace/trace_event_types.h     |    1 
 include/trace/trace_events.h          |    1 
 include/trace/workqueue.h             |   19 ------
 include/trace/workqueue_event_types.h |  101 ++++++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+), 17 deletions(-)
 create mode 100644 include/trace/workqueue_event_types.h

Index: b/include/trace/trace_event_types.h
===================================================================
--- a/include/trace/trace_event_types.h	2009-04-13 12:11:00.000000000 +0900
+++ b/include/trace/trace_event_types.h	2009-04-13 12:18:56.000000000 +0900
@@ -5,3 +5,4 @@
 #include <trace/lockdep_event_types.h>
 #include <trace/skb_event_types.h>
 #include <trace/kmem_event_types.h>
+#include <trace/workqueue_event_types.h>
Index: b/include/trace/trace_events.h
===================================================================
--- a/include/trace/trace_events.h	2009-04-13 12:11:00.000000000 +0900
+++ b/include/trace/trace_events.h	2009-04-13 12:18:56.000000000 +0900
@@ -5,3 +5,4 @@
 #include <trace/lockdep.h>
 #include <trace/skb.h>
 #include <trace/kmem.h>
+#include <trace/workqueue.h>
Index: b/include/trace/workqueue.h
===================================================================
--- a/include/trace/workqueue.h	2009-04-13 12:08:59.000000000 +0900
+++ b/include/trace/workqueue.h	2009-04-13 12:18:56.000000000 +0900
@@ -1,25 +1,10 @@
 #ifndef __TRACE_WORKQUEUE_H
 #define __TRACE_WORKQUEUE_H
 
-#include <linux/tracepoint.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
+#include <linux/tracepoint.h>
 
-DECLARE_TRACE(workqueue_insertion,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-	   TP_PROTO(struct task_struct *wq_thread, int cpu),
-	   TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-	   TP_PROTO(struct task_struct *wq_thread),
-	   TP_ARGS(wq_thread));
+#include <trace/workqueue_event_types.h>
 
 #endif /* __TRACE_WORKQUEUE_H */
Index: b/include/trace/workqueue_event_types.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ b/include/trace/workqueue_event_types.h	2009-04-13 12:25:16.000000000 +0900
@@ -0,0 +1,101 @@
+
+/* use <trace/workqueue.h> instead */
+#ifndef TRACE_EVENT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,			thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,			thread_pid)
+		__field(struct work_struct *,	work)
+		__field(work_func_t,		func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->work		= work;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,			thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,			thread_pid)
+		__field(struct work_struct *,	work)
+		__field(work_func_t,		func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->work		= work;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+	TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+	TP_ARGS(wq_thread, cpu),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+		__field(int,	cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#undef TRACE_SYSTEM



^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-13  5:52           ` [PATCH v2 1/4] ftrace, " KOSAKI Motohiro
@ 2009-04-13  5:53             ` KOSAKI Motohiro
  2009-04-13 16:25               ` Frederic Weisbecker
  2009-04-13  5:53             ` [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer KOSAKI Motohiro
                               ` (3 subsequent siblings)
  4 siblings, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-13  5:53 UTC (permalink / raw)
  To: Zhaolei, Steven Rostedt, Frederic Weisbecker, Tom Zanussi,
	Ingo Molnar, linux-kernel
  Cc: kosaki.motohiro

Subject: [PATCH] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry

Entry/exit handler pair is useful common tracepoint technique. it can mesure handler consumption time.

Then, workqueue also handler-exit tracepoint and rename execution to handler-entry.


Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/trace/workqueue_event_types.h |   25 ++++++++++++++++++++++++-
 kernel/trace/trace_workqueue.c        |   10 +++++-----
 kernel/workqueue.c                    |    6 ++++--
 3 files changed, 33 insertions(+), 8 deletions(-)

Index: b/include/trace/workqueue_event_types.h
===================================================================
--- a/include/trace/workqueue_event_types.h	2009-04-13 13:10:27.000000000 +0900
+++ b/include/trace/workqueue_event_types.h	2009-04-13 13:10:27.000000000 +0900
@@ -32,7 +32,7 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->thread_pid, __entry->func)
 );
 
-TRACE_EVENT(workqueue_execution,
+TRACE_EVENT(workqueue_handler_entry,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
@@ -56,6 +56,29 @@ TRACE_EVENT(workqueue_execution,
 		__entry->thread_pid, __entry->func)
 );
 
+TRACE_EVENT(workqueue_handler_exit,
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,			thread_comm, TASK_COMM_LEN)
+		__field(pid_t,			thread_pid)
+		__field(struct work_struct *,	work)
+		__field(work_func_t,		func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->work		= work;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		  __entry->thread_pid, __entry->func)
+);
+
 /* Trace the creation of one workqueue thread on a cpu */
 TRACE_EVENT(workqueue_creation,
 
Index: b/kernel/trace/trace_workqueue.c
===================================================================
--- a/kernel/trace/trace_workqueue.c	2009-04-13 13:10:19.000000000 +0900
+++ b/kernel/trace/trace_workqueue.c	2009-04-13 13:12:36.000000000 +0900
@@ -65,7 +65,7 @@ found:
 
 /* Execution of a work */
 static void
-probe_workqueue_execution(struct task_struct *wq_thread,
+probe_workqueue_entry(struct task_struct *wq_thread,
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -255,13 +255,13 @@ int __init trace_workqueue_early_init(vo
 	if (ret)
 		goto out;
 
-	ret = register_trace_workqueue_execution(probe_workqueue_execution);
+	ret = register_trace_workqueue_handler_entry(probe_workqueue_entry);
 	if (ret)
 		goto no_insertion;
 
 	ret = register_trace_workqueue_creation(probe_workqueue_creation);
 	if (ret)
-		goto no_execution;
+		goto no_handler_entry;
 
 	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
 	if (ret)
@@ -276,8 +276,8 @@ int __init trace_workqueue_early_init(vo
 
 no_creation:
 	unregister_trace_workqueue_creation(probe_workqueue_creation);
-no_execution:
-	unregister_trace_workqueue_execution(probe_workqueue_execution);
+no_handler_entry:
+	unregister_trace_workqueue_handler_entry(probe_workqueue_entry);
 no_insertion:
 	unregister_trace_workqueue_insertion(probe_workqueue_insertion);
 out:
Index: b/kernel/workqueue.c
===================================================================
--- a/kernel/workqueue.c	2009-04-13 13:06:36.000000000 +0900
+++ b/kernel/workqueue.c	2009-04-13 13:14:15.000000000 +0900
@@ -262,7 +262,8 @@ int queue_delayed_work_on(int cpu, struc
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-DEFINE_TRACE(workqueue_execution);
+DEFINE_TRACE(workqueue_handler_entry);
+DEFINE_TRACE(workqueue_handler_exit);
 
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
@@ -282,7 +283,6 @@ static void run_workqueue(struct cpu_wor
 		 */
 		struct lockdep_map lockdep_map = work->lockdep_map;
 #endif
-		trace_workqueue_execution(cwq->thread, work);
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irq(&cwq->lock);
@@ -291,7 +291,9 @@ static void run_workqueue(struct cpu_wor
 		work_clear_pending(work);
 		lock_map_acquire(&cwq->wq->lockdep_map);
 		lock_map_acquire(&lockdep_map);
+		trace_workqueue_handler_entry(cwq->thread, work);
 		f(work);
+		trace_workqueue_handler_exit(cwq->thread, work);
 		lock_map_release(&lockdep_map);
 		lock_map_release(&cwq->wq->lockdep_map);
 



^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer
  2009-04-13  5:52           ` [PATCH v2 1/4] ftrace, " KOSAKI Motohiro
  2009-04-13  5:53             ` [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry KOSAKI Motohiro
@ 2009-04-13  5:53             ` KOSAKI Motohiro
  2009-04-13 16:16               ` Frederic Weisbecker
  2009-04-13  5:55             ` [PATCH v2 4/4] ftrace: add latecy mesurement feature " KOSAKI Motohiro
                               ` (2 subsequent siblings)
  4 siblings, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-13  5:53 UTC (permalink / raw)
  To: Zhaolei, Steven Rostedt, Frederic Weisbecker, Tom Zanussi,
	Ingo Molnar, linux-kernel
  Cc: kosaki.motohiro

Subject: [PATCH] ftrace: add max execution time mesurement to workqueue tracer

Too slow workqueue handler often introduce some trouble to system.
Then, administrator want to know may handler execution time.


Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_workqueue.c |   48 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

Index: b/kernel/trace/trace_workqueue.c
===================================================================
--- a/kernel/trace/trace_workqueue.c	2009-04-13 13:23:59.000000000 +0900
+++ b/kernel/trace/trace_workqueue.c	2009-04-13 13:59:51.000000000 +0900
@@ -27,6 +27,10 @@ struct cpu_workqueue_stats {
  *  on a single CPU.
  */
 	unsigned int		    executed;
+
+	/* store handler execution time */
+	u64			    handler_start_time;
+	u64			    max_executed_time;
 };
 
 /* List of workqueue threads on one cpu */
@@ -77,6 +81,7 @@ probe_workqueue_entry(struct task_struct
 							list) {
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
+			node->handler_start_time = trace_clock_global();
 			goto found;
 		}
 	}
@@ -85,6 +90,29 @@ found:
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
+static void probe_workqueue_exit(struct task_struct *wq_thread,
+				 struct work_struct *work)
+{
+	int cpu = cpumask_first(&wq_thread->cpus_allowed);
+	struct cpu_workqueue_stats *node, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
+							list) {
+		if (node->pid == wq_thread->pid) {
+			u64 start = node->handler_start_time;
+			u64 executed_time = trace_clock_global() - start;
+
+			if (node->max_executed_time < executed_time)
+				node->max_executed_time = executed_time;
+			goto found;
+		}
+	}
+found:
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+}
+
 /* Creation of a cpu workqueue thread */
 static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
 {
@@ -195,6 +223,9 @@ static int workqueue_stat_show(struct se
 	int cpu = cws->cpu;
 	struct pid *pid;
 	struct task_struct *tsk;
+	unsigned long long exec_time = ns2usecs(cws->max_executed_time);
+	unsigned long exec_usec_rem = do_div(exec_time, USEC_PER_SEC);
+	unsigned long exec_secs = (unsigned long)exec_time;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
@@ -205,8 +236,11 @@ static int workqueue_stat_show(struct se
 	if (pid) {
 		tsk = get_pid_task(pid, PIDTYPE_PID);
 		if (tsk) {
-			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
+			seq_printf(s, "%3d %6d     %6u     %5lu.%06lu"
+				   "  %s\n",
+				   cws->cpu,
 				   atomic_read(&cws->inserted), cws->executed,
+				   exec_secs, exec_usec_rem,
 				   tsk->comm);
 			put_task_struct(tsk);
 		}
@@ -218,8 +252,8 @@ static int workqueue_stat_show(struct se
 
 static int workqueue_stat_headers(struct seq_file *s)
 {
-	seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
-	seq_printf(s, "# |      |         |          |\n");
+	seq_printf(s, "# CPU  INSERTED  EXECUTED     MAX_TIME  NAME\n");
+	seq_printf(s, "# |      |         |           |         |\n");
 	return 0;
 }
 
@@ -259,10 +293,14 @@ int __init trace_workqueue_early_init(vo
 	if (ret)
 		goto no_insertion;
 
-	ret = register_trace_workqueue_creation(probe_workqueue_creation);
+	ret = register_trace_workqueue_handler_exit(probe_workqueue_exit);
 	if (ret)
 		goto no_handler_entry;
 
+	ret = register_trace_workqueue_creation(probe_workqueue_creation);
+	if (ret)
+		goto no_handler_exit;
+
 	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
 	if (ret)
 		goto no_creation;
@@ -276,6 +314,8 @@ int __init trace_workqueue_early_init(vo
 
 no_creation:
 	unregister_trace_workqueue_creation(probe_workqueue_creation);
+no_handler_exit:
+	unregister_trace_workqueue_handler_exit(probe_workqueue_exit);
 no_handler_entry:
 	unregister_trace_workqueue_handler_entry(probe_workqueue_entry);
 no_insertion:



^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH v2 4/4] ftrace: add latecy mesurement feature to workqueue tracer
  2009-04-13  5:52           ` [PATCH v2 1/4] ftrace, " KOSAKI Motohiro
  2009-04-13  5:53             ` [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry KOSAKI Motohiro
  2009-04-13  5:53             ` [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer KOSAKI Motohiro
@ 2009-04-13  5:55             ` KOSAKI Motohiro
  2009-04-14  2:50               ` KOSAKI Motohiro
  2009-04-13 15:24             ` [PATCH v2 1/4] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
  2009-04-14  4:03             ` [PATCH v2 5/4] ftrace, workqueuetrace: display work name KOSAKI Motohiro
  4 siblings, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-13  5:55 UTC (permalink / raw)
  To: Zhaolei, Steven Rostedt, Frederic Weisbecker, Tom Zanussi,
	Ingo Molnar, linux-kernel
  Cc: kosaki.motohiro

Subject: [PATCH] ftrace: add latecy mesurement feature to workqueue tracer

The schedule_delayed_work() doesn't gurantee delay argument is exactly, of course.
but Administrator often wants to know how long time delayed it.

Then, this patch add latency mesurement feature to workqueue tracer.


<example output>

# cd /sys/kernel/debug/tracing/trace_stat
# cat workqueues

---------------------------------------------------------
# CPU  INSERTED  EXECUTED     MAX_TIME    MAX_LAT  NAME
# |      |         |           |             |      |

  0    326        326         0.000115      2.421  events/0
  0      0          0         0.000000      0.000  cpuset
  0    597        597         0.000387      0.028  khelper
  0      5          5         0.000029      0.002  kblockd/0
  0      0          0         0.000000      0.000  kacpid
  0      0          0         0.000000      0.000  kacpi_notify
  0      0          0         0.000000      0.000  ksuspend_usbd
  0      0          0         0.000000      0.000  aio/0
  0      0          0         0.000000      0.000  crypto/0
  0      0          0         0.000000      0.000  usbhid_resumer
  0    299        299         0.004047      0.001  ata/0
  0      0          0         0.000000      0.000  ata_aux


BTW, I'm not sure about why events/ workqueue thread always have long latency.


Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/workqueue.h             |    3 +++
 include/trace/workqueue_event_types.h |   10 ++++++++--
 kernel/trace/trace_workqueue.c        |   19 ++++++++++++++++---
 kernel/workqueue.c                    |   18 +++++++++++++++++-
 4 files changed, 44 insertions(+), 6 deletions(-)

Index: b/include/trace/workqueue_event_types.h
===================================================================
--- a/include/trace/workqueue_event_types.h	2009-04-13 13:58:21.000000000 +0900
+++ b/include/trace/workqueue_event_types.h	2009-04-13 14:07:14.000000000 +0900
@@ -43,6 +43,8 @@ TRACE_EVENT(workqueue_handler_entry,
 		__field(pid_t,			thread_pid)
 		__field(struct work_struct *,	work)
 		__field(work_func_t,		func)
+		__field(unsigned long,		expected_time)
+		__field(unsigned long,		actual_time)
 	),
 
 	TP_fast_assign(
@@ -50,10 +52,14 @@ TRACE_EVENT(workqueue_handler_entry,
 		__entry->thread_pid	= wq_thread->pid;
 		__entry->work		= work;
 		__entry->func		= work->func;
+		__entry->expected_time	= work->expected_time;
+		__entry->actual_time	= jiffies;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
-		__entry->thread_pid, __entry->func)
+	TP_printk("thread=%s:%d latency=%lums func=%pF", __entry->thread_comm,
+		  __entry->thread_pid,
+		  jiffies_to_msecs(__entry->actual_time-__entry->expected_time),
+		  __entry->func)
 );
 
 TRACE_EVENT(workqueue_handler_exit,
Index: b/kernel/trace/trace_workqueue.c
===================================================================
--- a/kernel/trace/trace_workqueue.c	2009-04-13 13:59:51.000000000 +0900
+++ b/kernel/trace/trace_workqueue.c	2009-04-13 14:09:54.000000000 +0900
@@ -31,6 +31,9 @@ struct cpu_workqueue_stats {
 	/* store handler execution time */
 	u64			    handler_start_time;
 	u64			    max_executed_time;
+
+	/* store actual fired jiffies - caller expected jiffies */
+	unsigned long		    max_work_latency;
 };
 
 /* List of workqueue threads on one cpu */
@@ -80,8 +83,13 @@ probe_workqueue_entry(struct task_struct
 	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
 							list) {
 		if (node->pid == wq_thread->pid) {
+			unsigned long latency;
+
 			node->executed++;
 			node->handler_start_time = trace_clock_global();
+			latency = jiffies - work->expected_time;
+			if (node->max_work_latency < latency)
+				node->max_work_latency = latency;
 			goto found;
 		}
 	}
@@ -226,6 +234,8 @@ static int workqueue_stat_show(struct se
 	unsigned long long exec_time = ns2usecs(cws->max_executed_time);
 	unsigned long exec_usec_rem = do_div(exec_time, USEC_PER_SEC);
 	unsigned long exec_secs = (unsigned long)exec_time;
+	unsigned long lat = jiffies_to_msecs(cws->max_work_latency);
+	unsigned long lat_msec_rem = do_div(lat, MSEC_PER_SEC);
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
@@ -237,10 +247,11 @@ static int workqueue_stat_show(struct se
 		tsk = get_pid_task(pid, PIDTYPE_PID);
 		if (tsk) {
 			seq_printf(s, "%3d %6d     %6u     %5lu.%06lu"
-				   "  %s\n",
+				   "  %5lu.%03lu  %s\n",
 				   cws->cpu,
 				   atomic_read(&cws->inserted), cws->executed,
 				   exec_secs, exec_usec_rem,
+				   lat, lat_msec_rem,
 				   tsk->comm);
 			put_task_struct(tsk);
 		}
@@ -252,8 +263,10 @@ static int workqueue_stat_show(struct se
 
 static int workqueue_stat_headers(struct seq_file *s)
 {
-	seq_printf(s, "# CPU  INSERTED  EXECUTED     MAX_TIME  NAME\n");
-	seq_printf(s, "# |      |         |           |         |\n");
+	seq_printf(s, "# CPU  INSERTED  EXECUTED     MAX_TIME"
+		   "    MAX_LAT  NAME\n");
+	seq_printf(s, "# |      |         |           |      "
+		   "     |        |\n");
 	return 0;
 }
 
Index: b/kernel/workqueue.c
===================================================================
--- a/kernel/workqueue.c	2009-04-13 13:58:21.000000000 +0900
+++ b/kernel/workqueue.c	2009-04-13 14:07:14.000000000 +0900
@@ -82,6 +82,20 @@ static const struct cpumask *cpu_singlet
  */
 static cpumask_var_t cpu_populated_map __read_mostly;
 
+#ifdef CONFIG_WORKQUEUE_TRACER
+static void set_expected_time(struct work_struct *work,
+			      unsigned long expected_jiffies)
+{
+	work->expected_time = expected_jiffies;
+}
+#else
+static void set_expected_time(struct work_struct *work,
+			      unsigned long expected_jiffies)
+{
+}
+#endif
+
+
 /* If it's single threaded, it isn't in the list of workqueues. */
 static inline int is_wq_single_threaded(struct workqueue_struct *wq)
 {
@@ -190,6 +204,7 @@ queue_work_on(int cpu, struct workqueue_
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		BUG_ON(!list_empty(&work->entry));
+		set_expected_time(work, jiffies);
 		__queue_work(wq_per_cpu(wq, cpu), work);
 		ret = 1;
 	}
@@ -251,6 +266,7 @@ int queue_delayed_work_on(int cpu, struc
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
+		set_expected_time(&dwork->work, timer->expires);
 
 		if (unlikely(cpu >= 0))
 			add_timer_on(timer, cpu);
@@ -359,8 +375,8 @@ static void insert_wq_barrier(struct cpu
 {
 	INIT_WORK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
-
 	init_completion(&barr->done);
+	set_expected_time(&barr->work, jiffies);
 
 	insert_work(cwq, &barr->work, head);
 }
Index: b/include/linux/workqueue.h
===================================================================
--- a/include/linux/workqueue.h	2009-04-13 12:04:48.000000000 +0900
+++ b/include/linux/workqueue.h	2009-04-13 14:11:06.000000000 +0900
@@ -32,6 +32,9 @@ struct work_struct {
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map lockdep_map;
 #endif
+#ifdef CONFIG_WORKQUEUE_TRACER
+	unsigned long expected_time;
+#endif
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)



^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  0:53 [PATCH 0/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Zhaolei
  2009-04-13  0:54 ` [PATCH 1/1] " Zhaolei
@ 2009-04-13 13:57 ` Frederic Weisbecker
  1 sibling, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-13 13:57 UTC (permalink / raw)
  To: Zhaolei; +Cc: Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel

On Mon, Apr 13, 2009 at 08:53:20AM +0800, Zhaolei wrote:
> Hi,
> 
> I moved workqueue trace to use TRACE_EVENT macro.
> 
> CC: Frederic Weisbecker
> I see in your mail that you will scheduled to do it, but that time I already
> finished it. (I haven't send it to ltml that time because it need apply on top
> of my previous patch which was send but not commited)


This is fine, I have not yet worked on it anyway, I only planned to do it today.
I'm about to review you patches.

Thanks!


> So I just send this patch, you can accept it, or drop it and use yours if is
> is not good.
> 
> Thanks
> Zhaolei
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  0:54 ` [PATCH 1/1] " Zhaolei
  2009-04-13  1:44   ` KOSAKI Motohiro
@ 2009-04-13 14:13   ` Frederic Weisbecker
  2009-04-15  6:47     ` KOSAKI Motohiro
  1 sibling, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-13 14:13 UTC (permalink / raw)
  To: Zhaolei; +Cc: Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel

On Mon, Apr 13, 2009 at 08:54:35AM +0800, Zhaolei wrote:
> TRACE_EVENT is a more generic way to define tracepoints.
> Doing so adds these new capabilities to this tracepoint:
> 
>   - zero-copy and per-cpu splice() tracing
>   - binary tracing without printf overhead
>   - structured logging records exposed under /debug/tracing/events
>   - trace events embedded in function tracer output and other plugins
>   - user-defined, per tracepoint filter expressions
> 
> Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
> ---
>  include/trace/trace_event_types.h     |    1 +
>  include/trace/trace_events.h          |    1 +
>  include/trace/workqueue.h             |   19 +------
>  include/trace/workqueue_event_types.h |  101 +++++++++++++++++++++++++++++++++
>  4 files changed, 105 insertions(+), 17 deletions(-)
>  create mode 100644 include/trace/workqueue_event_types.h
> 
> diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
> index 552a50e..2babba4 100644
> --- a/include/trace/trace_event_types.h
> +++ b/include/trace/trace_event_types.h
> @@ -5,3 +5,4 @@
>  #include <trace/lockdep_event_types.h>
>  #include <trace/skb_event_types.h>
>  #include <trace/kmem_event_types.h>
> +#include <trace/workqueue_event_types.h>
> diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
> index 13d6b85..2e0f58c 100644
> --- a/include/trace/trace_events.h
> +++ b/include/trace/trace_events.h
> @@ -5,3 +5,4 @@
>  #include <trace/lockdep.h>
>  #include <trace/skb.h>
>  #include <trace/kmem.h>
> +#include <trace/workqueue.h>
> diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
> index 7626523..ecb58f4 100644
> --- a/include/trace/workqueue.h
> +++ b/include/trace/workqueue.h
> @@ -1,25 +1,10 @@
>  #ifndef __TRACE_WORKQUEUE_H
>  #define __TRACE_WORKQUEUE_H
>  
> -#include <linux/tracepoint.h>
>  #include <linux/workqueue.h>
>  #include <linux/sched.h>
> +#include <linux/tracepoint.h>
>  
> -DECLARE_TRACE(workqueue_insertion,
> -	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> -	   TP_ARGS(wq_thread, work));
> -
> -DECLARE_TRACE(workqueue_execution,
> -	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> -	   TP_ARGS(wq_thread, work));
> -
> -/* Trace the creation of one workqueue thread on a cpu */
> -DECLARE_TRACE(workqueue_creation,
> -	   TP_PROTO(struct task_struct *wq_thread, int cpu),
> -	   TP_ARGS(wq_thread, cpu));
> -
> -DECLARE_TRACE(workqueue_destruction,
> -	   TP_PROTO(struct task_struct *wq_thread),
> -	   TP_ARGS(wq_thread));
> +#include <trace/workqueue_event_types.h>
>  
>  #endif /* __TRACE_WORKQUEUE_H */
> diff --git a/include/trace/workqueue_event_types.h b/include/trace/workqueue_event_types.h
> new file mode 100644
> index 0000000..ff65d47
> --- /dev/null
> +++ b/include/trace/workqueue_event_types.h
> @@ -0,0 +1,101 @@
> +
> +/* use <trace/workqueue.h> instead */
> +#ifndef TRACE_EVENT
> +# error Do not include this file directly.
> +# error Unless you know what you are doing.
> +#endif
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM workqueue
> +
> +TRACE_EVENT(workqueue_insertion,
> +
> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> +
> +	TP_ARGS(wq_thread, work),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,			thread_pid			)
> +		__field(	struct work_struct *,	work				)
> +		__field(	work_func_t,		func				)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);



This is not about your patch but I see this pattern happen often. We should
start think about a way to use tracing_record_cmdline for that.



> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->work		= work;
> +		__entry->func		= work->func;
> +	),
> +
> +	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,



These pointer addresses are not very useful to display.
We can zap work and display the func as a %pF.

Not related to your patch: only the name of the func is useful,
not its asm offset. A format for raw func name without offset would
start to become very handy. I remember Andrew talked about that by the past.

I wonder which format we could use for that. %pf ?



> +		__entry->thread_pid, __entry->work, __entry->func)
> +);
> +
> +TRACE_EVENT(workqueue_execution,
> +
> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> +
> +	TP_ARGS(wq_thread, work),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,			thread_pid			)
> +		__field(	struct work_struct *,	work				)
> +		__field(	work_func_t,		func				)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->work		= work;
> +		__entry->func		= work->func;
> +	),
> +
> +	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,


ditto.


> +		__entry->thread_pid, __entry->work, __entry->func)
> +);
> +
> +/* Trace the creation of one workqueue thread on a cpu */
> +TRACE_EVENT(workqueue_creation,
> +
> +	TP_PROTO(struct task_struct *wq_thread, int cpu),
> +
> +	TP_ARGS(wq_thread, cpu),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,	thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,	thread_pid			)
> +		__field(	int,	cpu				)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->cpu		= cpu;
> +	),
> +
> +	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
> +		__entry->thread_pid, __entry->cpu)
> +);
> +
> +TRACE_EVENT(workqueue_destruction,
> +
> +	TP_PROTO(struct task_struct *wq_thread),
> +
> +	TP_ARGS(wq_thread),
> +
> +	TP_STRUCT__entry(
> +		__array(	char,	thread_comm,	TASK_COMM_LEN	)
> +		__field(	pid_t,	thread_pid			)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +	),
> +
> +	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
> +);
> +
> +#undef TRACE_SYSTEM


But the rest looks fine.

Thanks!
Frederic.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  1:44   ` KOSAKI Motohiro
  2009-04-13  3:25     ` KOSAKI Motohiro
@ 2009-04-13 14:34     ` Frederic Weisbecker
  1 sibling, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-13 14:34 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel

On Mon, Apr 13, 2009 at 10:44:27AM +0900, KOSAKI Motohiro wrote:
> Hi,
> 
> hehe, I also have similar patch in my local patch queue ;)
> 
> > +TRACE_EVENT(workqueue_insertion,
> > +
> > +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> > +
> > +	TP_ARGS(wq_thread, work),
> > +
> > +	TP_STRUCT__entry(
> > +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> > +		__field(	pid_t,			thread_pid			)
> > +		__field(	struct work_struct *,	work				)
> > +		__field(	work_func_t,		func				)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> > +		__entry->thread_pid	= wq_thread->pid;
> > +		__entry->work		= work;
> > +		__entry->func		= work->func;
> > +	),
> > +
> > +	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,
> > +		__entry->thread_pid, __entry->work, __entry->func)
> > +);
> 
> please don't display raw kernel pointer.
> 
> work: unnecessary. it's internal information and func name provide enough
>       information.
> func: %pF can print pritty format



Ah, you already commented out it, sorry, I haven't seen you answer :)
(Reminder for me: always check the patches with threaded view).



> 
> 
> > +
> > +TRACE_EVENT(workqueue_execution,
> > +
> > +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> > +
> > +	TP_ARGS(wq_thread, work),
> > +
> > +	TP_STRUCT__entry(
> > +		__array(	char,			thread_comm,	TASK_COMM_LEN	)
> > +		__field(	pid_t,			thread_pid			)
> > +		__field(	struct work_struct *,	work				)
> > +		__field(	work_func_t,		func				)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> > +		__entry->thread_pid	= wq_thread->pid;
> > +		__entry->work		= work;
> > +		__entry->func		= work->func;
> > +	),
> > +
> > +	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,
> > +		__entry->thread_pid, __entry->work, __entry->func)
> > +);
> 
> ditto.
> 
> and, I plan to rename this tracepoint to workqueue_handler_entry()
> and create workqueue_handler_exit().
>
> It's because stupid driver comsume many cputime in workqueue thread,
> it make delay to after work. We need mesure it.



Nice idea!

Frederic.

 
> 
> > +
> > +/* Trace the creation of one workqueue thread on a cpu */
> > +TRACE_EVENT(workqueue_creation,
> > +
> > +	TP_PROTO(struct task_struct *wq_thread, int cpu),
> > +
> > +	TP_ARGS(wq_thread, cpu),
> > +
> > +	TP_STRUCT__entry(
> > +		__array(	char,	thread_comm,	TASK_COMM_LEN	)
> > +		__field(	pid_t,	thread_pid			)
> > +		__field(	int,	cpu				)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> > +		__entry->thread_pid	= wq_thread->pid;
> > +		__entry->cpu		= cpu;
> > +	),
> > +
> > +	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
> > +		__entry->thread_pid, __entry->cpu)
> > +);
> > +
> > +TRACE_EVENT(workqueue_destruction,
> > +
> > +	TP_PROTO(struct task_struct *wq_thread),
> > +
> > +	TP_ARGS(wq_thread),
> > +
> > +	TP_STRUCT__entry(
> > +		__array(	char,	thread_comm,	TASK_COMM_LEN	)
> > +		__field(	pid_t,	thread_pid			)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> > +		__entry->thread_pid	= wq_thread->pid;
> > +	),
> > +
> > +	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
> > +);
> > +
> > +#undef TRACE_SYSTEM
> > -- 
> > 1.5.5.3
> > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> 
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 1/4] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13  5:52           ` [PATCH v2 1/4] ftrace, " KOSAKI Motohiro
                               ` (2 preceding siblings ...)
  2009-04-13  5:55             ` [PATCH v2 4/4] ftrace: add latecy mesurement feature " KOSAKI Motohiro
@ 2009-04-13 15:24             ` Frederic Weisbecker
  2009-04-14  4:03             ` [PATCH v2 5/4] ftrace, workqueuetrace: display work name KOSAKI Motohiro
  4 siblings, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-13 15:24 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel

On Mon, Apr 13, 2009 at 02:52:05PM +0900, KOSAKI Motohiro wrote:
> From: Zhao Lei <zhaolei@cn.fujitsu.com>
> Subject: [PATCH v2] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
> 
> TRACE_EVENT is a more generic way to define tracepoints.
> Doing so adds these new capabilities to this tracepoint:
> 
>   - zero-copy and per-cpu splice() tracing
>   - binary tracing without printf overhead
>   - structured logging records exposed under /debug/tracing/events
>   - trace events embedded in function tracer output and other plugins
>   - user-defined, per tracepoint filter expressions
> 
> Then, this patch convert DEFINE_TRACE to TRACE_EVENT in workqueue related tracepoint.
> 


Kosaki-san, you might also want to credit your changes in this patch.
Something like:

[v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and
zap the work addr]


 
> Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
> Signed-off-by: KOSAKI Motohiro <kosaki@jp.fujitsu.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Tom Zanussi <tzanussi@gmail.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> ---
>  include/trace/trace_event_types.h     |    1 
>  include/trace/trace_events.h          |    1 
>  include/trace/workqueue.h             |   19 ------
>  include/trace/workqueue_event_types.h |  101 ++++++++++++++++++++++++++++++++++
>  4 files changed, 105 insertions(+), 17 deletions(-)
>  create mode 100644 include/trace/workqueue_event_types.h
> 
> Index: b/include/trace/trace_event_types.h
> ===================================================================
> --- a/include/trace/trace_event_types.h	2009-04-13 12:11:00.000000000 +0900
> +++ b/include/trace/trace_event_types.h	2009-04-13 12:18:56.000000000 +0900
> @@ -5,3 +5,4 @@
>  #include <trace/lockdep_event_types.h>
>  #include <trace/skb_event_types.h>
>  #include <trace/kmem_event_types.h>
> +#include <trace/workqueue_event_types.h>
> Index: b/include/trace/trace_events.h
> ===================================================================
> --- a/include/trace/trace_events.h	2009-04-13 12:11:00.000000000 +0900
> +++ b/include/trace/trace_events.h	2009-04-13 12:18:56.000000000 +0900
> @@ -5,3 +5,4 @@
>  #include <trace/lockdep.h>
>  #include <trace/skb.h>
>  #include <trace/kmem.h>
> +#include <trace/workqueue.h>
> Index: b/include/trace/workqueue.h
> ===================================================================
> --- a/include/trace/workqueue.h	2009-04-13 12:08:59.000000000 +0900
> +++ b/include/trace/workqueue.h	2009-04-13 12:18:56.000000000 +0900
> @@ -1,25 +1,10 @@
>  #ifndef __TRACE_WORKQUEUE_H
>  #define __TRACE_WORKQUEUE_H
>  
> -#include <linux/tracepoint.h>
>  #include <linux/workqueue.h>
>  #include <linux/sched.h>
> +#include <linux/tracepoint.h>
>  
> -DECLARE_TRACE(workqueue_insertion,
> -	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> -	   TP_ARGS(wq_thread, work));
> -
> -DECLARE_TRACE(workqueue_execution,
> -	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> -	   TP_ARGS(wq_thread, work));
> -
> -/* Trace the creation of one workqueue thread on a cpu */
> -DECLARE_TRACE(workqueue_creation,
> -	   TP_PROTO(struct task_struct *wq_thread, int cpu),
> -	   TP_ARGS(wq_thread, cpu));
> -
> -DECLARE_TRACE(workqueue_destruction,
> -	   TP_PROTO(struct task_struct *wq_thread),
> -	   TP_ARGS(wq_thread));
> +#include <trace/workqueue_event_types.h>
>  
>  #endif /* __TRACE_WORKQUEUE_H */
> Index: b/include/trace/workqueue_event_types.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ b/include/trace/workqueue_event_types.h	2009-04-13 12:25:16.000000000 +0900
> @@ -0,0 +1,101 @@
> +
> +/* use <trace/workqueue.h> instead */
> +#ifndef TRACE_EVENT
> +# error Do not include this file directly.
> +# error Unless you know what you are doing.
> +#endif
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM workqueue
> +
> +TRACE_EVENT(workqueue_insertion,
> +
> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> +
> +	TP_ARGS(wq_thread, work),
> +
> +	TP_STRUCT__entry(
> +		__array(char,			thread_comm,	TASK_COMM_LEN)
> +		__field(pid_t,			thread_pid)
> +		__field(struct work_struct *,	work)
> +		__field(work_func_t,		func)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->work		= work;
> +		__entry->func		= work->func;
> +	),
> +
> +	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
> +		__entry->thread_pid, __entry->func)
> +);
> +
> +TRACE_EVENT(workqueue_execution,
> +
> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> +
> +	TP_ARGS(wq_thread, work),
> +
> +	TP_STRUCT__entry(
> +		__array(char,			thread_comm,	TASK_COMM_LEN)
> +		__field(pid_t,			thread_pid)
> +		__field(struct work_struct *,	work)
> +		__field(work_func_t,		func)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->work		= work;
> +		__entry->func		= work->func;
> +	),
> +
> +	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
> +		__entry->thread_pid, __entry->func)
> +);
> +
> +/* Trace the creation of one workqueue thread on a cpu */
> +TRACE_EVENT(workqueue_creation,
> +
> +	TP_PROTO(struct task_struct *wq_thread, int cpu),
> +
> +	TP_ARGS(wq_thread, cpu),
> +
> +	TP_STRUCT__entry(
> +		__array(char,	thread_comm,	TASK_COMM_LEN)
> +		__field(pid_t,	thread_pid)
> +		__field(int,	cpu)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->cpu		= cpu;
> +	),
> +
> +	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
> +		__entry->thread_pid, __entry->cpu)
> +);
> +
> +TRACE_EVENT(workqueue_destruction,
> +
> +	TP_PROTO(struct task_struct *wq_thread),
> +
> +	TP_ARGS(wq_thread),
> +
> +	TP_STRUCT__entry(
> +		__array(char,	thread_comm,	TASK_COMM_LEN)
> +		__field(pid_t,	thread_pid)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +	),
> +
> +	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
> +);
> +
> +#undef TRACE_SYSTEM
> 
> 


Thank you both!

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer
  2009-04-13  5:53             ` [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer KOSAKI Motohiro
@ 2009-04-13 16:16               ` Frederic Weisbecker
  2009-04-13 21:21                 ` Ingo Molnar
  2009-04-14  1:43                 ` KOSAKI Motohiro
  0 siblings, 2 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-13 16:16 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel

On Mon, Apr 13, 2009 at 02:53:45PM +0900, KOSAKI Motohiro wrote:
> Subject: [PATCH] ftrace: add max execution time mesurement to workqueue tracer
> 
> Too slow workqueue handler often introduce some trouble to system.
> Then, administrator want to know may handler execution time.
> 
> 
> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: Zhaolei <zhaolei@cn.fujitsu.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Tom Zanussi <tzanussi@gmail.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> ---
>  kernel/trace/trace_workqueue.c |   48 +++++++++++++++++++++++++++++++++++++----
>  1 file changed, 44 insertions(+), 4 deletions(-)
> 
> Index: b/kernel/trace/trace_workqueue.c
> ===================================================================
> --- a/kernel/trace/trace_workqueue.c	2009-04-13 13:23:59.000000000 +0900
> +++ b/kernel/trace/trace_workqueue.c	2009-04-13 13:59:51.000000000 +0900
> @@ -27,6 +27,10 @@ struct cpu_workqueue_stats {
>   *  on a single CPU.
>   */
>  	unsigned int		    executed;
> +
> +	/* store handler execution time */
> +	u64			    handler_start_time;
> +	u64			    max_executed_time;
>  };



Imho, this is the wrong place to instrument.
I explain my point of view below.



>  /* List of workqueue threads on one cpu */
> @@ -77,6 +81,7 @@ probe_workqueue_entry(struct task_struct
>  							list) {
>  		if (node->pid == wq_thread->pid) {
>  			node->executed++;
> +			node->handler_start_time = trace_clock_global();
>  			goto found;
>  		}
>  	}
> @@ -85,6 +90,29 @@ found:
>  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>  
> +static void probe_workqueue_exit(struct task_struct *wq_thread,
> +				 struct work_struct *work)
> +{
> +	int cpu = cpumask_first(&wq_thread->cpus_allowed);
> +	struct cpu_workqueue_stats *node, *next;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
> +							list) {


Do you need the safe version here? You don't seem to remove any entry.

Sidenote: only the workqueue destruction handler might need it if I'm
not wrong.
I placed some of them in other places in this file because I misunderstood the
kernel list concepts in the past :)
(Heh, and probably still today).



> +		if (node->pid == wq_thread->pid) {
> +			u64 start = node->handler_start_time;
> +			u64 executed_time = trace_clock_global() - start;
> +
> +			if (node->max_executed_time < executed_time)
> +				node->max_executed_time = executed_time;
> +			goto found;
> +		}
> +	}
> +found:
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> +}
> +
>  /* Creation of a cpu workqueue thread */
>  static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
>  {
> @@ -195,6 +223,9 @@ static int workqueue_stat_show(struct se
>  	int cpu = cws->cpu;
>  	struct pid *pid;
>  	struct task_struct *tsk;
> +	unsigned long long exec_time = ns2usecs(cws->max_executed_time);
> +	unsigned long exec_usec_rem = do_div(exec_time, USEC_PER_SEC);
> +	unsigned long exec_secs = (unsigned long)exec_time;
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>  	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
> @@ -205,8 +236,11 @@ static int workqueue_stat_show(struct se
>  	if (pid) {
>  		tsk = get_pid_task(pid, PIDTYPE_PID);
>  		if (tsk) {
> -			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
> +			seq_printf(s, "%3d %6d     %6u     %5lu.%06lu"
> +				   "  %s\n",
> +				   cws->cpu,
>  				   atomic_read(&cws->inserted), cws->executed,
> +				   exec_secs, exec_usec_rem,




You are measuring the latency from a workqueue thread point of view.
While I find the work latency measurement very interesting,
I think this patch does it in the wrong place. The _work_ latency point of view
seems to me much more rich as an information source.

There are several reasons for that.

Indeed this patch is useful for workqueues that receive always the same work
to perform so that you can find very easily the guilty worklet.
But the sense of this design is lost once we consider the workqueue threads
that receive random works. Of course the best example is events/%d
One will observe the max latency that happened on event/0 as an exemple but
he will only be able to feel a silent FUD because he has no way to find
which work caused this max latency.


Especially the events/%d latency measurement seems to me very important
because a single work from a random driver can propagate its latency
all over the system.

A single work that consumes too much cpu time, waits for long coming
events, sleeps too much, tries to take too often contended locks, or
whatever... such single work may delay all pending works in the queue
and the only max latency for a given workqueue is not helpful to find
these culprits.

Having this max latency snapshot per work and not per workqueue thread
would be useful for every kind of workqueue latency instrumentation:

- workqueues with single works
- workqueue with random works

A developer will also be able to measure its own worklet action and
find if it takes too much time, even if it isn't the worst worklet in
the workqueue to cause latencies.

The end result would be to have a descending latency sort of works
per cpu workqueue threads (or better: per workqueue group).

What do you think?

Frederic.



>  				   tsk->comm);
>  			put_task_struct(tsk);
>  		}
> @@ -218,8 +252,8 @@ static int workqueue_stat_show(struct se
>  
>  static int workqueue_stat_headers(struct seq_file *s)
>  {
> -	seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
> -	seq_printf(s, "# |      |         |          |\n");
> +	seq_printf(s, "# CPU  INSERTED  EXECUTED     MAX_TIME  NAME\n");
> +	seq_printf(s, "# |      |         |           |         |\n");
>  	return 0;
>  }
>  
> @@ -259,10 +293,14 @@ int __init trace_workqueue_early_init(vo
>  	if (ret)
>  		goto no_insertion;
>  
> -	ret = register_trace_workqueue_creation(probe_workqueue_creation);
> +	ret = register_trace_workqueue_handler_exit(probe_workqueue_exit);
>  	if (ret)
>  		goto no_handler_entry;
>  
> +	ret = register_trace_workqueue_creation(probe_workqueue_creation);
> +	if (ret)
> +		goto no_handler_exit;
> +
>  	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
>  	if (ret)
>  		goto no_creation;
> @@ -276,6 +314,8 @@ int __init trace_workqueue_early_init(vo
>  
>  no_creation:
>  	unregister_trace_workqueue_creation(probe_workqueue_creation);
> +no_handler_exit:
> +	unregister_trace_workqueue_handler_exit(probe_workqueue_exit);
>  no_handler_entry:
>  	unregister_trace_workqueue_handler_entry(probe_workqueue_entry);
>  no_insertion:
> 
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-13  5:53             ` [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry KOSAKI Motohiro
@ 2009-04-13 16:25               ` Frederic Weisbecker
  2009-04-15 10:22                 ` Oleg Nesterov
  0 siblings, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-13 16:25 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel,
	Oleg Nesterov

On Mon, Apr 13, 2009 at 02:53:01PM +0900, KOSAKI Motohiro wrote:
> Subject: [PATCH] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
> 
> Entry/exit handler pair is useful common tracepoint technique. it can mesure handler consumption time.
> 
> Then, workqueue also handler-exit tracepoint and rename execution to handler-entry.
> 
> 
> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: Zhaolei <zhaolei@cn.fujitsu.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Tom Zanussi <tzanussi@gmail.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> ---
>  include/trace/workqueue_event_types.h |   25 ++++++++++++++++++++++++-
>  kernel/trace/trace_workqueue.c        |   10 +++++-----
>  kernel/workqueue.c                    |    6 ++++--
>  3 files changed, 33 insertions(+), 8 deletions(-)
> 
> Index: b/include/trace/workqueue_event_types.h
> ===================================================================
> --- a/include/trace/workqueue_event_types.h	2009-04-13 13:10:27.000000000 +0900
> +++ b/include/trace/workqueue_event_types.h	2009-04-13 13:10:27.000000000 +0900
> @@ -32,7 +32,7 @@ TRACE_EVENT(workqueue_insertion,
>  		__entry->thread_pid, __entry->func)
>  );
>  
> -TRACE_EVENT(workqueue_execution,
> +TRACE_EVENT(workqueue_handler_entry,
>  
>  	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
>  
> @@ -56,6 +56,29 @@ TRACE_EVENT(workqueue_execution,
>  		__entry->thread_pid, __entry->func)
>  );
>  
> +TRACE_EVENT(workqueue_handler_exit,
> +	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
> +
> +	TP_ARGS(wq_thread, work),
> +
> +	TP_STRUCT__entry(
> +		__array(char,			thread_comm, TASK_COMM_LEN)
> +		__field(pid_t,			thread_pid)
> +		__field(struct work_struct *,	work)
> +		__field(work_func_t,		func)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> +		__entry->thread_pid	= wq_thread->pid;
> +		__entry->work		= work;
> +		__entry->func		= work->func;
> +	),
> +
> +	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
> +		  __entry->thread_pid, __entry->func)
> +);
> +
>  /* Trace the creation of one workqueue thread on a cpu */
>  TRACE_EVENT(workqueue_creation,
>  
> Index: b/kernel/trace/trace_workqueue.c
> ===================================================================
> --- a/kernel/trace/trace_workqueue.c	2009-04-13 13:10:19.000000000 +0900
> +++ b/kernel/trace/trace_workqueue.c	2009-04-13 13:12:36.000000000 +0900
> @@ -65,7 +65,7 @@ found:
>  
>  /* Execution of a work */
>  static void
> -probe_workqueue_execution(struct task_struct *wq_thread,
> +probe_workqueue_entry(struct task_struct *wq_thread,
>  			  struct work_struct *work)
>  {
>  	int cpu = cpumask_first(&wq_thread->cpus_allowed);
> @@ -255,13 +255,13 @@ int __init trace_workqueue_early_init(vo
>  	if (ret)
>  		goto out;
>  
> -	ret = register_trace_workqueue_execution(probe_workqueue_execution);
> +	ret = register_trace_workqueue_handler_entry(probe_workqueue_entry);
>  	if (ret)
>  		goto no_insertion;
>  
>  	ret = register_trace_workqueue_creation(probe_workqueue_creation);
>  	if (ret)
> -		goto no_execution;
> +		goto no_handler_entry;
>  
>  	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
>  	if (ret)
> @@ -276,8 +276,8 @@ int __init trace_workqueue_early_init(vo
>  
>  no_creation:
>  	unregister_trace_workqueue_creation(probe_workqueue_creation);
> -no_execution:
> -	unregister_trace_workqueue_execution(probe_workqueue_execution);
> +no_handler_entry:
> +	unregister_trace_workqueue_handler_entry(probe_workqueue_entry);
>  no_insertion:
>  	unregister_trace_workqueue_insertion(probe_workqueue_insertion);
>  out:
> Index: b/kernel/workqueue.c
> ===================================================================
> --- a/kernel/workqueue.c	2009-04-13 13:06:36.000000000 +0900
> +++ b/kernel/workqueue.c	2009-04-13 13:14:15.000000000 +0900
> @@ -262,7 +262,8 @@ int queue_delayed_work_on(int cpu, struc
>  }
>  EXPORT_SYMBOL_GPL(queue_delayed_work_on);
>  
> -DEFINE_TRACE(workqueue_execution);
> +DEFINE_TRACE(workqueue_handler_entry);
> +DEFINE_TRACE(workqueue_handler_exit);
>  
>  static void run_workqueue(struct cpu_workqueue_struct *cwq)
>  {
> @@ -282,7 +283,6 @@ static void run_workqueue(struct cpu_wor
>  		 */
>  		struct lockdep_map lockdep_map = work->lockdep_map;
>  #endif
> -		trace_workqueue_execution(cwq->thread, work);
>  		cwq->current_work = work;
>  		list_del_init(cwq->worklist.next);
>  		spin_unlock_irq(&cwq->lock);
> @@ -291,7 +291,9 @@ static void run_workqueue(struct cpu_wor
>  		work_clear_pending(work);
>  		lock_map_acquire(&cwq->wq->lockdep_map);
>  		lock_map_acquire(&lockdep_map);
> +		trace_workqueue_handler_entry(cwq->thread, work);
>  		f(work);
> +		trace_workqueue_handler_exit(cwq->thread, work);
>  		lock_map_release(&lockdep_map);
>  		lock_map_release(&cwq->wq->lockdep_map);



Whatever we choose a per work or per workqueue latency measurement
(see my comment on the second patch) this patch makes sense.

Thanks!

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>

(Also adding Oleg in Cc).


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer
  2009-04-13 16:16               ` Frederic Weisbecker
@ 2009-04-13 21:21                 ` Ingo Molnar
  2009-04-14  1:43                 ` KOSAKI Motohiro
  1 sibling, 0 replies; 94+ messages in thread
From: Ingo Molnar @ 2009-04-13 21:21 UTC (permalink / raw)
  To: Frederic Weisbecker, Oleg Nesterov, Andrew Morton
  Cc: KOSAKI Motohiro, Zhaolei, Steven Rostedt, Tom Zanussi, linux-kernel


(Oleg, Andrew: it's about workqueue tracing design.)

* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> >  		if (tsk) {
> > -			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
> > +			seq_printf(s, "%3d %6d     %6u     %5lu.%06lu"
> > +				   "  %s\n",
> > +				   cws->cpu,
> >  				   atomic_read(&cws->inserted), cws->executed,
> > +				   exec_secs, exec_usec_rem,
> 
> 
> You are measuring the latency from a workqueue thread point of 
> view. While I find the work latency measurement very interesting, 
> I think this patch does it in the wrong place. The _work_ latency 
> point of view seems to me much more rich as an information source.
> 
> There are several reasons for that.
> 
> Indeed this patch is useful for workqueues that receive always the 
> same work to perform so that you can find very easily the guilty 
> worklet. But the sense of this design is lost once we consider the 
> workqueue threads that receive random works. Of course the best 
> example is events/%d One will observe the max latency that 
> happened on event/0 as an exemple but he will only be able to feel 
> a silent FUD because he has no way to find which work caused this 
> max latency.

Expanding the trace view in a per worklet fashion is also useful for 
debugging: sometimes inefficiencies (or hangs) are related to the 
mixing of high-speed worklets with blocking worklets. This is not 
exposed if we stay at the workqueue level only.

> Especially the events/%d latency measurement seems to me very 
> important because a single work from a random driver can propagate 
> its latency all over the system.
> 
> A single work that consumes too much cpu time, waits for long 
> coming events, sleeps too much, tries to take too often contended 
> locks, or whatever... such single work may delay all pending works 
> in the queue and the only max latency for a given workqueue is not 
> helpful to find these culprits.
> 
> Having this max latency snapshot per work and not per workqueue 
> thread would be useful for every kind of workqueue latency 
> instrumentation:
> 
> - workqueues with single works
> - workqueue with random works
> 
> A developer will also be able to measure its own worklet action 
> and find if it takes too much time, even if it isn't the worst 
> worklet in the workqueue to cause latencies.
> 
> The end result would be to have a descending latency sort of works 
> per cpu workqueue threads (or better: per workqueue group).
> 
> What do you think?

Sounds like a good idea to me. It would also allow histograms based 
on worklet identity, etc. Often the most active kevents worklet 
should be considered to be split out as a new workqueue.

And if we have a per worklet tracepoint it would also allow a trace 
filter to only trace a given type of worklet.

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer
  2009-04-13 16:16               ` Frederic Weisbecker
  2009-04-13 21:21                 ` Ingo Molnar
@ 2009-04-14  1:43                 ` KOSAKI Motohiro
  2009-04-14 11:40                   ` Frederic Weisbecker
  1 sibling, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-14  1:43 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Tom Zanussi,
	Ingo Molnar, linux-kernel, Oleg Nesterov, Andrew Morton

Hi Frederic,

very thanks for good reviewing.


> > @@ -85,6 +90,29 @@ found:
> >  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> >  }
> >  
> > +static void probe_workqueue_exit(struct task_struct *wq_thread,
> > +				 struct work_struct *work)
> > +{
> > +	int cpu = cpumask_first(&wq_thread->cpus_allowed);
> > +	struct cpu_workqueue_stats *node, *next;
> > +	unsigned long flags;
> > +
> > +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> > +	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
> > +							list) {
> 
> 
> Do you need the safe version here? You don't seem to remove any entry.

Yes, this is just stupid cut&paste mistake ;)
Will fix.

> Sidenote: only the workqueue destruction handler might need it if I'm
> not wrong.
> I placed some of them in other places in this file because I misunderstood the
> kernel list concepts in the past :)
> (Heh, and probably still today).
> 
> 
> 
> > +		if (node->pid == wq_thread->pid) {
> > +			u64 start = node->handler_start_time;
> > +			u64 executed_time = trace_clock_global() - start;
> > +
> > +			if (node->max_executed_time < executed_time)
> > +				node->max_executed_time = executed_time;
> > +			goto found;
> > +		}
> > +	}
> > +found:
> > +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> > +}
> > +
> >  /* Creation of a cpu workqueue thread */
> >  static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
> >  {
> > @@ -195,6 +223,9 @@ static int workqueue_stat_show(struct se
> >  	int cpu = cws->cpu;
> >  	struct pid *pid;
> >  	struct task_struct *tsk;
> > +	unsigned long long exec_time = ns2usecs(cws->max_executed_time);
> > +	unsigned long exec_usec_rem = do_div(exec_time, USEC_PER_SEC);
> > +	unsigned long exec_secs = (unsigned long)exec_time;
> >  
> >  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> >  	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
> > @@ -205,8 +236,11 @@ static int workqueue_stat_show(struct se
> >  	if (pid) {
> >  		tsk = get_pid_task(pid, PIDTYPE_PID);
> >  		if (tsk) {
> > -			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
> > +			seq_printf(s, "%3d %6d     %6u     %5lu.%06lu"
> > +				   "  %s\n",
> > +				   cws->cpu,
> >  				   atomic_read(&cws->inserted), cws->executed,
> > +				   exec_secs, exec_usec_rem,
> 
> You are measuring the latency from a workqueue thread point of view.
> While I find the work latency measurement very interesting,
> I think this patch does it in the wrong place. The _work_ latency point of view
> seems to me much more rich as an information source.
> 
> There are several reasons for that.
> 
> Indeed this patch is useful for workqueues that receive always the same work
> to perform so that you can find very easily the guilty worklet.
> But the sense of this design is lost once we consider the workqueue threads
> that receive random works. Of course the best example is events/%d
> One will observe the max latency that happened on event/0 as an exemple but
> he will only be able to feel a silent FUD because he has no way to find
> which work caused this max latency.

May I explain my expected usage scenario?

firstly, the developer check this stastics and nortice strage result. secondly
the developer monitor workqueue activety by event-tracer.
(it provide per work activety, maybe event filter feature is useful)

Yes, I have to agree my last patch description is too poor.
but I think my expected scenario is't so insane.

Next, I hope to explain why I don't choice adding per work stastics.
struct work can put in stack and it's short lived object.
then, it isn't proper "stastics" target.

I like my approach or histogram approach (suggested by ingo).


May I ask your feeling to my usage scenario?

> 
> 
> Especially the events/%d latency measurement seems to me very important
> because a single work from a random driver can propagate its latency
> all over the system.
> 
> A single work that consumes too much cpu time, waits for long coming
> events, sleeps too much, tries to take too often contended locks, or
> whatever... such single work may delay all pending works in the queue
> and the only max latency for a given workqueue is not helpful to find
> these culprits.
> 
> Having this max latency snapshot per work and not per workqueue thread
> would be useful for every kind of workqueue latency instrumentation:
> 
> - workqueues with single works
> - workqueue with random works
> 
> A developer will also be able to measure its own worklet action and
> find if it takes too much time, even if it isn't the worst worklet in
> the workqueue to cause latencies.
> 
> The end result would be to have a descending latency sort of works
> per cpu workqueue threads (or better: per workqueue group).
> 
> What do you think?






^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 4/4] ftrace: add latecy mesurement feature to workqueue tracer
  2009-04-13  5:55             ` [PATCH v2 4/4] ftrace: add latecy mesurement feature " KOSAKI Motohiro
@ 2009-04-14  2:50               ` KOSAKI Motohiro
  0 siblings, 0 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-14  2:50 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Frederic Weisbecker,
	Tom Zanussi, Ingo Molnar, linux-kernel

> Index: b/include/trace/workqueue_event_types.h
> ===================================================================
> --- a/include/trace/workqueue_event_types.h	2009-04-13 13:58:21.000000000 +0900
> +++ b/include/trace/workqueue_event_types.h	2009-04-13 14:07:14.000000000 +0900
> @@ -43,6 +43,8 @@ TRACE_EVENT(workqueue_handler_entry,
>  		__field(pid_t,			thread_pid)
>  		__field(struct work_struct *,	work)
>  		__field(work_func_t,		func)
> +		__field(unsigned long,		expected_time)
> +		__field(unsigned long,		actual_time)
>  	),
>  
>  	TP_fast_assign(
> @@ -50,10 +52,14 @@ TRACE_EVENT(workqueue_handler_entry,
>  		__entry->thread_pid	= wq_thread->pid;
>  		__entry->work		= work;
>  		__entry->func		= work->func;
> +		__entry->expected_time	= work->expected_time;
> +		__entry->actual_time	= jiffies;
>  	),

Oops, that's my fault.
There is work->expected_time only if CONFIG_WORKQUEUE_TRACER is on.


>  
> -	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
> -		__entry->thread_pid, __entry->func)
> +	TP_printk("thread=%s:%d latency=%lums func=%pF", __entry->thread_comm,
> +		  __entry->thread_pid,
> +		  jiffies_to_msecs(__entry->actual_time-__entry->expected_time),
> +		  __entry->func)
>  );




^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-13  5:52           ` [PATCH v2 1/4] ftrace, " KOSAKI Motohiro
                               ` (3 preceding siblings ...)
  2009-04-13 15:24             ` [PATCH v2 1/4] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
@ 2009-04-14  4:03             ` KOSAKI Motohiro
  2009-04-14 21:16               ` Frederic Weisbecker
  4 siblings, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-14  4:03 UTC (permalink / raw)
  To: Zhaolei, Steven Rostedt, Frederic Weisbecker, Tom Zanussi,
	Ingo Molnar, linux-kernel, Oleg Nesterov, Andrew Morton
  Cc: kosaki.motohiro


Frederic, What do you think this patch?
===============

Currently, event-tracer only display function name. it is enough useful.
but some driver share work-function between multiple work_struct.
then, work name displaying is better.

example output
------------------------------
# tracer: nop
#
#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
#              | |       |          |         |
          <idle>-0     [003]  1540.844960: workqueue_insertion: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_task
          <idle>-0     [003]  1540.844964: workqueue_insertion: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work
           <...>-30    [003]  1540.844969: workqueue_handler_entry: thread=events/3:30 latency=0ms func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter-
>watchdog_task
           <...>-30    [003]  1540.845003: workqueue_handler_exit: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_tas
k
           <...>-30    [003]  1540.845004: workqueue_handler_entry: thread=events/3:30 latency=0ms func=vmstat_update+0x0/0x3f work=vmstat_work
           <...>-30    [003]  1540.845007: workqueue_handler_exit: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work

This patch increase kernel-size for work name string table.
But actually, lockdep already have work name string table. then if you already use lockdep,
you don't get kernel-size increasing by this patch.


Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/workqueue.h             |   34 +++++++++++++++++++++++++++++++++-
 include/trace/workqueue_event_types.h |   23 ++++++++++++++++-------
 kernel/workqueue.c                    |    2 +-
 3 files changed, 50 insertions(+), 9 deletions(-)

Index: b/include/linux/workqueue.h
===================================================================
--- a/include/linux/workqueue.h	2009-04-14 11:57:05.000000000 +0900
+++ b/include/linux/workqueue.h	2009-04-14 12:53:57.000000000 +0900
@@ -22,6 +22,11 @@ typedef void (*work_func_t)(struct work_
  */
 #define work_data_bits(work) ((unsigned long *)(&(work)->data))
 
+#if defined(CONFIG_WORKQUEUE_TRACER) || defined(CONFIG_EVENT_TRACING)
+#define HAVE_WORK_TRACE 1
+#endif
+
+
 struct work_struct {
 	atomic_long_t data;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
@@ -32,8 +37,9 @@ struct work_struct {
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map lockdep_map;
 #endif
-#ifdef CONFIG_WORKQUEUE_TRACER
+#ifdef HAVE_WORK_TRACE
 	unsigned long expected_time;
+	char *name;
 #endif
 };
 
@@ -65,11 +71,19 @@ struct execute_work {
 #define __WORK_INIT_LOCKDEP_MAP(n, k)
 #endif
 
+#ifdef HAVE_WORK_TRACE
+#define __WORK_INIT_NAME(_work, _name)				\
+	.name = _name,
+#else
+#define __WORK_INIT_NAME(_work, _name)
+#endif
+
 #define __WORK_INITIALIZER(n, f) {				\
 	.data = WORK_DATA_INIT(),				\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
+	__WORK_INIT_NAME(&n, #n)				\
 	}
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
@@ -94,6 +108,18 @@ struct execute_work {
 #define PREPARE_DELAYED_WORK(_work, _func)			\
 	PREPARE_WORK(&(_work)->work, (_func))
 
+#ifdef HAVE_WORK_TRACE
+#define SET_WORK_NAME(_work, _name)				\
+	do {							\
+		(_work)->name = (_name);			\
+	} while (0)
+
+#else /*!HAVE_WORK_TRACE*/
+#define SET_WORK_NAME(_work, name)
+#endif /*!HAVE_WORK_TRACE*/
+
+
+
 /*
  * initialize all of a work item in one go
  *
@@ -110,6 +136,7 @@ struct execute_work {
 		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
+		SET_WORK_NAME((_work), #_work);				\
 	} while (0)
 #else
 #define INIT_WORK(_work, _func)						\
@@ -117,6 +144,7 @@ struct execute_work {
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
+		SET_WORK_NAME((_work), #_work);				\
 	} while (0)
 #endif
 
@@ -124,24 +152,28 @@ struct execute_work {
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer(&(_work)->timer);			\
+		SET_WORK_NAME(&(_work)->work, #_work);		\
 	} while (0)
 
 #define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer_on_stack(&(_work)->timer);		\
+		SET_WORK_NAME(&(_work)->work, #_work);		\
 	} while (0)
 
 #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer_deferrable(&(_work)->timer);		\
+		SET_WORK_NAME(&(_work)->work, #_work);		\
 	} while (0)
 
 #define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer_on_stack(&(_work)->timer);		\
+		SET_WORK_NAME(&(_work)->work, #_work);		\
 	} while (0)
 
 /**
Index: b/include/trace/workqueue_event_types.h
===================================================================
--- a/include/trace/workqueue_event_types.h	2009-04-14 11:57:05.000000000 +0900
+++ b/include/trace/workqueue_event_types.h	2009-04-14 12:15:26.000000000 +0900
@@ -19,6 +19,7 @@ TRACE_EVENT(workqueue_insertion,
 		__field(pid_t,			thread_pid)
 		__field(struct work_struct *,	work)
 		__field(work_func_t,		func)
+		__field(char*,			workname)
 	),
 
 	TP_fast_assign(
@@ -26,10 +27,12 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->thread_pid	= wq_thread->pid;
 		__entry->work		= work;
 		__entry->func		= work->func;
+		__entry->workname	= work->name;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
-		__entry->thread_pid, __entry->func)
+	TP_printk("thread=%s:%d func=%pF work=%s",
+		  __entry->thread_comm, __entry->thread_pid,
+		  __entry->func, __entry->workname)
 );
 
 TRACE_EVENT(workqueue_handler_entry,
@@ -45,6 +48,7 @@ TRACE_EVENT(workqueue_handler_entry,
 		__field(work_func_t,		func)
 		__field(unsigned long,		expected_time)
 		__field(unsigned long,		actual_time)
+		__field(char*,			workname)
 	),
 
 	TP_fast_assign(
@@ -54,12 +58,14 @@ TRACE_EVENT(workqueue_handler_entry,
 		__entry->func		= work->func;
 		__entry->expected_time	= work->expected_time;
 		__entry->actual_time	= jiffies;
+		__entry->workname	= work->name;
 	),
 
-	TP_printk("thread=%s:%d latency=%lums func=%pF", __entry->thread_comm,
-		  __entry->thread_pid,
+	TP_printk("thread=%s:%d latency=%lums func=%pF work=%s",
+		  __entry->thread_comm, __entry->thread_pid,
 		  jiffies_to_msecs(__entry->actual_time-__entry->expected_time),
-		  __entry->func)
+		  __entry->func,
+		  __entry->workname)
 );
 
 TRACE_EVENT(workqueue_handler_exit,
@@ -72,6 +78,7 @@ TRACE_EVENT(workqueue_handler_exit,
 		__field(pid_t,			thread_pid)
 		__field(struct work_struct *,	work)
 		__field(work_func_t,		func)
+		__field(char*,			workname)
 	),
 
 	TP_fast_assign(
@@ -79,10 +86,12 @@ TRACE_EVENT(workqueue_handler_exit,
 		__entry->thread_pid	= wq_thread->pid;
 		__entry->work		= work;
 		__entry->func		= work->func;
+		__entry->workname	= work->name;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
-		  __entry->thread_pid, __entry->func)
+	TP_printk("thread=%s:%d func=%pF work=%s",
+		  __entry->thread_comm, __entry->thread_pid,
+		  __entry->func, __entry->workname)
 );
 
 /* Trace the creation of one workqueue thread on a cpu */
Index: b/kernel/workqueue.c
===================================================================
--- a/kernel/workqueue.c	2009-04-14 11:57:05.000000000 +0900
+++ b/kernel/workqueue.c	2009-04-14 11:59:52.000000000 +0900
@@ -82,7 +82,7 @@ static const struct cpumask *cpu_singlet
  */
 static cpumask_var_t cpu_populated_map __read_mostly;
 
-#ifdef CONFIG_WORKQUEUE_TRACER
+#if HAVE_WORK_TRACE
 static void set_expected_time(struct work_struct *work,
 			      unsigned long expected_jiffies)
 {



^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer
  2009-04-14  1:43                 ` KOSAKI Motohiro
@ 2009-04-14 11:40                   ` Frederic Weisbecker
  2009-04-15  0:31                     ` KOSAKI Motohiro
  0 siblings, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-14 11:40 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel,
	Oleg Nesterov, Andrew Morton

On Tue, Apr 14, 2009 at 10:43:21AM +0900, KOSAKI Motohiro wrote:
> Hi Frederic,
> 
> very thanks for good reviewing.
> 
> 
> > > @@ -85,6 +90,29 @@ found:
> > >  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> > >  }
> > >  
> > > +static void probe_workqueue_exit(struct task_struct *wq_thread,
> > > +				 struct work_struct *work)
> > > +{
> > > +	int cpu = cpumask_first(&wq_thread->cpus_allowed);
> > > +	struct cpu_workqueue_stats *node, *next;
> > > +	unsigned long flags;
> > > +
> > > +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> > > +	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
> > > +							list) {
> > 
> > 
> > Do you need the safe version here? You don't seem to remove any entry.
> 
> Yes, this is just stupid cut&paste mistake ;)
> Will fix.
> 
> > Sidenote: only the workqueue destruction handler might need it if I'm
> > not wrong.
> > I placed some of them in other places in this file because I misunderstood the
> > kernel list concepts in the past :)
> > (Heh, and probably still today).
> > 
> > 
> > 
> > > +		if (node->pid == wq_thread->pid) {
> > > +			u64 start = node->handler_start_time;
> > > +			u64 executed_time = trace_clock_global() - start;
> > > +
> > > +			if (node->max_executed_time < executed_time)
> > > +				node->max_executed_time = executed_time;
> > > +			goto found;
> > > +		}
> > > +	}
> > > +found:
> > > +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> > > +}
> > > +
> > >  /* Creation of a cpu workqueue thread */
> > >  static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
> > >  {
> > > @@ -195,6 +223,9 @@ static int workqueue_stat_show(struct se
> > >  	int cpu = cws->cpu;
> > >  	struct pid *pid;
> > >  	struct task_struct *tsk;
> > > +	unsigned long long exec_time = ns2usecs(cws->max_executed_time);
> > > +	unsigned long exec_usec_rem = do_div(exec_time, USEC_PER_SEC);
> > > +	unsigned long exec_secs = (unsigned long)exec_time;
> > >  
> > >  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> > >  	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
> > > @@ -205,8 +236,11 @@ static int workqueue_stat_show(struct se
> > >  	if (pid) {
> > >  		tsk = get_pid_task(pid, PIDTYPE_PID);
> > >  		if (tsk) {
> > > -			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
> > > +			seq_printf(s, "%3d %6d     %6u     %5lu.%06lu"
> > > +				   "  %s\n",
> > > +				   cws->cpu,
> > >  				   atomic_read(&cws->inserted), cws->executed,
> > > +				   exec_secs, exec_usec_rem,
> > 
> > You are measuring the latency from a workqueue thread point of view.
> > While I find the work latency measurement very interesting,
> > I think this patch does it in the wrong place. The _work_ latency point of view
> > seems to me much more rich as an information source.
> > 
> > There are several reasons for that.
> > 
> > Indeed this patch is useful for workqueues that receive always the same work
> > to perform so that you can find very easily the guilty worklet.
> > But the sense of this design is lost once we consider the workqueue threads
> > that receive random works. Of course the best example is events/%d
> > One will observe the max latency that happened on event/0 as an exemple but
> > he will only be able to feel a silent FUD because he has no way to find
> > which work caused this max latency.
> 
> May I explain my expected usage scenario?
> 
> firstly, the developer check this stastics and nortice strage result. secondly
> the developer monitor workqueue activety by event-tracer.
> (it provide per work activety, maybe event filter feature is useful)
> 
> Yes, I have to agree my last patch description is too poor.
> but I think my expected scenario is't so insane.
> 
> Next, I hope to explain why I don't choice adding per work stastics.
> struct work can put in stack and it's short lived object.
> then, it isn't proper "stastics" target.
> 
> I like my approach or histogram approach (suggested by ingo).
> 
> 
> May I ask your feeling to my usage scenario?



Ok, I understand. This is a coupling of statistical tracing
and batch raw event tracing.
But a statistical view for every work per workqueue would
be definetly more helpful.
Beeing forced to look at the raw batch of work events involves
more searching in the traces and more headaches.

With your patch, we only see the worst time case on a workqueue while
it would be better to find all the works which are encumbering
a workqueue, sorted by latency importance.

I agree with the fact that it's not so easy though, because the works
can be allocated on the stack as you said.

Thanks,
Frederic.


> 
> > 
> > 
> > Especially the events/%d latency measurement seems to me very important
> > because a single work from a random driver can propagate its latency
> > all over the system.
> > 
> > A single work that consumes too much cpu time, waits for long coming
> > events, sleeps too much, tries to take too often contended locks, or
> > whatever... such single work may delay all pending works in the queue
> > and the only max latency for a given workqueue is not helpful to find
> > these culprits.
> > 
> > Having this max latency snapshot per work and not per workqueue thread
> > would be useful for every kind of workqueue latency instrumentation:
> > 
> > - workqueues with single works
> > - workqueue with random works
> > 
> > A developer will also be able to measure its own worklet action and
> > find if it takes too much time, even if it isn't the worst worklet in
> > the workqueue to cause latencies.
> > 
> > The end result would be to have a descending latency sort of works
> > per cpu workqueue threads (or better: per workqueue group).
> > 
> > What do you think?
> 
> 
> 
> 
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-14  4:03             ` [PATCH v2 5/4] ftrace, workqueuetrace: display work name KOSAKI Motohiro
@ 2009-04-14 21:16               ` Frederic Weisbecker
  2009-04-14 23:55                 ` KOSAKI Motohiro
  0 siblings, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-14 21:16 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel,
	Oleg Nesterov, Andrew Morton

On Tue, Apr 14, 2009 at 01:03:35PM +0900, KOSAKI Motohiro wrote:
> 
> Frederic, What do you think this patch?
> ===============
> 
> Currently, event-tracer only display function name. it is enough useful.
> but some driver share work-function between multiple work_struct.
> then, work name displaying is better.
> 
> example output
> ------------------------------
> # tracer: nop
> #
> #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
> #              | |       |          |         |
>           <idle>-0     [003]  1540.844960: workqueue_insertion: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_task
>           <idle>-0     [003]  1540.844964: workqueue_insertion: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work
>            <...>-30    [003]  1540.844969: workqueue_handler_entry: thread=events/3:30 latency=0ms func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter-
> >watchdog_task
>            <...>-30    [003]  1540.845003: workqueue_handler_exit: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_tas
> k
>            <...>-30    [003]  1540.845004: workqueue_handler_entry: thread=events/3:30 latency=0ms func=vmstat_update+0x0/0x3f work=vmstat_work
>            <...>-30    [003]  1540.845007: workqueue_handler_exit: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work
> 
> This patch increase kernel-size for work name string table.
> But actually, lockdep already have work name string table. then if you already use lockdep,
> you don't get kernel-size increasing by this patch.
> 
> 
> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Cc: Zhaolei <zhaolei@cn.fujitsu.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Tom Zanussi <tzanussi@gmail.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> ---


I don't know. Does it happen that frequently for a function to be used
by several works?
If so, this feature may be indeed interesting.

Thanks,
Frederic Weisbecker.



>  include/linux/workqueue.h             |   34 +++++++++++++++++++++++++++++++++-
>  include/trace/workqueue_event_types.h |   23 ++++++++++++++++-------
>  kernel/workqueue.c                    |    2 +-
>  3 files changed, 50 insertions(+), 9 deletions(-)
> 
> Index: b/include/linux/workqueue.h
> ===================================================================
> --- a/include/linux/workqueue.h	2009-04-14 11:57:05.000000000 +0900
> +++ b/include/linux/workqueue.h	2009-04-14 12:53:57.000000000 +0900
> @@ -22,6 +22,11 @@ typedef void (*work_func_t)(struct work_
>   */
>  #define work_data_bits(work) ((unsigned long *)(&(work)->data))
>  
> +#if defined(CONFIG_WORKQUEUE_TRACER) || defined(CONFIG_EVENT_TRACING)
> +#define HAVE_WORK_TRACE 1
> +#endif
> +
> +
>  struct work_struct {
>  	atomic_long_t data;
>  #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
> @@ -32,8 +37,9 @@ struct work_struct {
>  #ifdef CONFIG_LOCKDEP
>  	struct lockdep_map lockdep_map;
>  #endif
> -#ifdef CONFIG_WORKQUEUE_TRACER
> +#ifdef HAVE_WORK_TRACE
>  	unsigned long expected_time;
> +	char *name;
>  #endif
>  };
>  
> @@ -65,11 +71,19 @@ struct execute_work {
>  #define __WORK_INIT_LOCKDEP_MAP(n, k)
>  #endif
>  
> +#ifdef HAVE_WORK_TRACE
> +#define __WORK_INIT_NAME(_work, _name)				\
> +	.name = _name,
> +#else
> +#define __WORK_INIT_NAME(_work, _name)
> +#endif
> +
>  #define __WORK_INITIALIZER(n, f) {				\
>  	.data = WORK_DATA_INIT(),				\
>  	.entry	= { &(n).entry, &(n).entry },			\
>  	.func = (f),						\
>  	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
> +	__WORK_INIT_NAME(&n, #n)				\
>  	}
>  
>  #define __DELAYED_WORK_INITIALIZER(n, f) {			\
> @@ -94,6 +108,18 @@ struct execute_work {
>  #define PREPARE_DELAYED_WORK(_work, _func)			\
>  	PREPARE_WORK(&(_work)->work, (_func))
>  
> +#ifdef HAVE_WORK_TRACE
> +#define SET_WORK_NAME(_work, _name)				\
> +	do {							\
> +		(_work)->name = (_name);			\
> +	} while (0)
> +
> +#else /*!HAVE_WORK_TRACE*/
> +#define SET_WORK_NAME(_work, name)
> +#endif /*!HAVE_WORK_TRACE*/
> +
> +
> +
>  /*
>   * initialize all of a work item in one go
>   *
> @@ -110,6 +136,7 @@ struct execute_work {
>  		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
>  		INIT_LIST_HEAD(&(_work)->entry);			\
>  		PREPARE_WORK((_work), (_func));				\
> +		SET_WORK_NAME((_work), #_work);				\
>  	} while (0)
>  #else
>  #define INIT_WORK(_work, _func)						\
> @@ -117,6 +144,7 @@ struct execute_work {
>  		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
>  		INIT_LIST_HEAD(&(_work)->entry);			\
>  		PREPARE_WORK((_work), (_func));				\
> +		SET_WORK_NAME((_work), #_work);				\
>  	} while (0)
>  #endif
>  
> @@ -124,24 +152,28 @@ struct execute_work {
>  	do {							\
>  		INIT_WORK(&(_work)->work, (_func));		\
>  		init_timer(&(_work)->timer);			\
> +		SET_WORK_NAME(&(_work)->work, #_work);		\
>  	} while (0)
>  
>  #define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
>  	do {							\
>  		INIT_WORK(&(_work)->work, (_func));		\
>  		init_timer_on_stack(&(_work)->timer);		\
> +		SET_WORK_NAME(&(_work)->work, #_work);		\
>  	} while (0)
>  
>  #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
>  	do {							\
>  		INIT_WORK(&(_work)->work, (_func));		\
>  		init_timer_deferrable(&(_work)->timer);		\
> +		SET_WORK_NAME(&(_work)->work, #_work);		\
>  	} while (0)
>  
>  #define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
>  	do {							\
>  		INIT_WORK(&(_work)->work, (_func));		\
>  		init_timer_on_stack(&(_work)->timer);		\
> +		SET_WORK_NAME(&(_work)->work, #_work);		\
>  	} while (0)
>  
>  /**
> Index: b/include/trace/workqueue_event_types.h
> ===================================================================
> --- a/include/trace/workqueue_event_types.h	2009-04-14 11:57:05.000000000 +0900
> +++ b/include/trace/workqueue_event_types.h	2009-04-14 12:15:26.000000000 +0900
> @@ -19,6 +19,7 @@ TRACE_EVENT(workqueue_insertion,
>  		__field(pid_t,			thread_pid)
>  		__field(struct work_struct *,	work)
>  		__field(work_func_t,		func)
> +		__field(char*,			workname)
>  	),
>  
>  	TP_fast_assign(
> @@ -26,10 +27,12 @@ TRACE_EVENT(workqueue_insertion,
>  		__entry->thread_pid	= wq_thread->pid;
>  		__entry->work		= work;
>  		__entry->func		= work->func;
> +		__entry->workname	= work->name;
>  	),
>  
> -	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
> -		__entry->thread_pid, __entry->func)
> +	TP_printk("thread=%s:%d func=%pF work=%s",
> +		  __entry->thread_comm, __entry->thread_pid,
> +		  __entry->func, __entry->workname)
>  );
>  
>  TRACE_EVENT(workqueue_handler_entry,
> @@ -45,6 +48,7 @@ TRACE_EVENT(workqueue_handler_entry,
>  		__field(work_func_t,		func)
>  		__field(unsigned long,		expected_time)
>  		__field(unsigned long,		actual_time)
> +		__field(char*,			workname)
>  	),
>  
>  	TP_fast_assign(
> @@ -54,12 +58,14 @@ TRACE_EVENT(workqueue_handler_entry,
>  		__entry->func		= work->func;
>  		__entry->expected_time	= work->expected_time;
>  		__entry->actual_time	= jiffies;
> +		__entry->workname	= work->name;
>  	),
>  
> -	TP_printk("thread=%s:%d latency=%lums func=%pF", __entry->thread_comm,
> -		  __entry->thread_pid,
> +	TP_printk("thread=%s:%d latency=%lums func=%pF work=%s",
> +		  __entry->thread_comm, __entry->thread_pid,
>  		  jiffies_to_msecs(__entry->actual_time-__entry->expected_time),
> -		  __entry->func)
> +		  __entry->func,
> +		  __entry->workname)
>  );
>  
>  TRACE_EVENT(workqueue_handler_exit,
> @@ -72,6 +78,7 @@ TRACE_EVENT(workqueue_handler_exit,
>  		__field(pid_t,			thread_pid)
>  		__field(struct work_struct *,	work)
>  		__field(work_func_t,		func)
> +		__field(char*,			workname)
>  	),
>  
>  	TP_fast_assign(
> @@ -79,10 +86,12 @@ TRACE_EVENT(workqueue_handler_exit,
>  		__entry->thread_pid	= wq_thread->pid;
>  		__entry->work		= work;
>  		__entry->func		= work->func;
> +		__entry->workname	= work->name;
>  	),
>  
> -	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
> -		  __entry->thread_pid, __entry->func)
> +	TP_printk("thread=%s:%d func=%pF work=%s",
> +		  __entry->thread_comm, __entry->thread_pid,
> +		  __entry->func, __entry->workname)
>  );
>  
>  /* Trace the creation of one workqueue thread on a cpu */
> Index: b/kernel/workqueue.c
> ===================================================================
> --- a/kernel/workqueue.c	2009-04-14 11:57:05.000000000 +0900
> +++ b/kernel/workqueue.c	2009-04-14 11:59:52.000000000 +0900
> @@ -82,7 +82,7 @@ static const struct cpumask *cpu_singlet
>   */
>  static cpumask_var_t cpu_populated_map __read_mostly;
>  
> -#ifdef CONFIG_WORKQUEUE_TRACER
> +#if HAVE_WORK_TRACE
>  static void set_expected_time(struct work_struct *work,
>  			      unsigned long expected_jiffies)
>  {
> 
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-14 21:16               ` Frederic Weisbecker
@ 2009-04-14 23:55                 ` KOSAKI Motohiro
  2009-04-15  1:15                   ` Frederic Weisbecker
  0 siblings, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-14 23:55 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Tom Zanussi,
	Ingo Molnar, linux-kernel, Oleg Nesterov, Andrew Morton

> > ------------------------------
> > # tracer: nop
> > #
> > #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
> > #              | |       |          |         |
> >           <idle>-0     [003]  1540.844960: workqueue_insertion: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_task
> >           <idle>-0     [003]  1540.844964: workqueue_insertion: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work
> >            <...>-30    [003]  1540.844969: workqueue_handler_entry: thread=events/3:30 latency=0ms func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter-
> > >watchdog_task
> >            <...>-30    [003]  1540.845003: workqueue_handler_exit: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_tas
> > k
> >            <...>-30    [003]  1540.845004: workqueue_handler_entry: thread=events/3:30 latency=0ms func=vmstat_update+0x0/0x3f work=vmstat_work
> >            <...>-30    [003]  1540.845007: workqueue_handler_exit: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work
> > 
> > This patch increase kernel-size for work name string table.
> > But actually, lockdep already have work name string table. then if you already use lockdep,
> > you don't get kernel-size increasing by this patch.
> 
> I don't know. Does it happen that frequently for a function to be used
> by several works?
> If so, this feature may be indeed interesting.

No, it's really rare.
in last discuttion, "per work" tracing is one of the key-word.
then, I add this.

but, if frequency is important, I can drop it.




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer
  2009-04-14 11:40                   ` Frederic Weisbecker
@ 2009-04-15  0:31                     ` KOSAKI Motohiro
  0 siblings, 0 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-15  0:31 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Tom Zanussi,
	Ingo Molnar, linux-kernel, Oleg Nesterov, Andrew Morton

> > May I explain my expected usage scenario?
> > 
> > firstly, the developer check this stastics and nortice strage result. secondly
> > the developer monitor workqueue activety by event-tracer.
> > (it provide per work activety, maybe event filter feature is useful)
> > 
> > Yes, I have to agree my last patch description is too poor.
> > but I think my expected scenario is't so insane.
> > 
> > Next, I hope to explain why I don't choice adding per work stastics.
> > struct work can put in stack and it's short lived object.
> > then, it isn't proper "stastics" target.
> > 
> > I like my approach or histogram approach (suggested by ingo).
> > 
> > May I ask your feeling to my usage scenario?
> 
> Ok, I understand. This is a coupling of statistical tracing
> and batch raw event tracing.
> But a statistical view for every work per workqueue would
> be definetly more helpful.
> Beeing forced to look at the raw batch of work events involves
> more searching in the traces and more headaches.
> 
> With your patch, we only see the worst time case on a workqueue while
> it would be better to find all the works which are encumbering
> a workqueue, sorted by latency importance.
> 
> I agree with the fact that it's not so easy though, because the works
> can be allocated on the stack as you said.

ok.
now, We agreed my patch works enough and your proposal is better, right?
if so, we can discuss separetely per-work stastics and per-workqueue stastics.

I submitted per-workqueue stastics v3 today's later (or tomorrow).




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-14 23:55                 ` KOSAKI Motohiro
@ 2009-04-15  1:15                   ` Frederic Weisbecker
  2009-04-15  6:13                     ` KOSAKI Motohiro
  0 siblings, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-15  1:15 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel,
	Oleg Nesterov, Andrew Morton

On Wed, Apr 15, 2009 at 08:55:31AM +0900, KOSAKI Motohiro wrote:
> > > ------------------------------
> > > # tracer: nop
> > > #
> > > #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
> > > #              | |       |          |         |
> > >           <idle>-0     [003]  1540.844960: workqueue_insertion: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_task
> > >           <idle>-0     [003]  1540.844964: workqueue_insertion: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work
> > >            <...>-30    [003]  1540.844969: workqueue_handler_entry: thread=events/3:30 latency=0ms func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter-
> > > >watchdog_task
> > >            <...>-30    [003]  1540.845003: workqueue_handler_exit: thread=events/3:30 func=e1000_watchdog_task+0x0/0x6ae [e1000e] work=&adapter->watchdog_tas
> > > k
> > >            <...>-30    [003]  1540.845004: workqueue_handler_entry: thread=events/3:30 latency=0ms func=vmstat_update+0x0/0x3f work=vmstat_work
> > >            <...>-30    [003]  1540.845007: workqueue_handler_exit: thread=events/3:30 func=vmstat_update+0x0/0x3f work=vmstat_work
> > > 
> > > This patch increase kernel-size for work name string table.
> > > But actually, lockdep already have work name string table. then if you already use lockdep,
> > > you don't get kernel-size increasing by this patch.
> > 
> > I don't know. Does it happen that frequently for a function to be used
> > by several works?
> > If so, this feature may be indeed interesting.
> 
> No, it's really rare.
> in last discuttion, "per work" tracing is one of the key-word.
> then, I add this.
> 
> but, if frequency is important, I can drop it.


Kosaki-san,

Perhaps you misunderstood me, which is easy because my english is
evil ;-)

We have to distinguish event tracing and statistical/histogram tracing
here.

Event tracing is about logging the events when they come and store
them one by one to output them later. That's what does TRACE_EVENT
for instance.

Statistical tracing doesn't store a trace of each event but instead
update some numbers after each event: number of events, maximum
latency, average, etc...

About event tracing, we want to have something that let us identifying
the events individually. For the works it can be either the function
embedeed in the work, or the work itself.
But do we need both? Since it's rare that a function can be embedeed in
more than two works, I guess we only need one of those informations.
Which one is the more efficient to identify a work? That can be discussed
actually.

When I talked about per-work tracing, it was in a generic way. What do we
use to identify each work individually: either the function or the work
name? Both seems accurate for that actually, the fact is that both can
be used for per-work tracing.

Actually my previous mails were focused on statistical tracing.

You proposed something that would result in the following final view:

	workqueue_name:pid	n_inserted n_executed cpu max_latency

And then by looking at the trace file, we can retrieve the work/function
that produced this max latency.

While I proposed this another idea:

	workqueue_name:pid	n_inserted n_executed cpu

	work1	latency_avg	latency_max
	work2	latency_avg	latency_max
	work3	latency_avg	latency_max
	.....

(We can have it with one file per workqueue).
work1 can be either the work name or the function executed though
the function is probably the main focus here because it's the
real source culprit.
But we can also output work_name:func

You see? With such output we see immediately which works are creating the
worst latencies.
And the event tracing is still very helpful here to have a more
fine grained tracing and see the behaviour of some works more precisely.

That's a kind of tracing process we can imagine:

- we start by looking at the statistics and indentify the wicked
  works/funcs.
- we look at the events on /debug/tracing/trace and, coupling with
  some well-chosen filters, we observe the behaviour of a work with
  more precision.


But I'm not opposite to your patch, I think it can be helpful to also
have the work name on certain circumstances.
But it makes the whole line a bit messy with a lot of informations for
those who only need the func name (or only the work name).
The best would be to have a runtime option to choose whether we want
to display it or not.

Frederic.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-15  1:15                   ` Frederic Weisbecker
@ 2009-04-15  6:13                     ` KOSAKI Motohiro
  2009-04-15  6:17                       ` Zhaolei
                                         ` (4 more replies)
  0 siblings, 5 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-15  6:13 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Tom Zanussi,
	Ingo Molnar, linux-kernel, Oleg Nesterov, Andrew Morton

Hi

> Kosaki-san,
> 
> Perhaps you misunderstood me, which is easy because my english is
> evil ;-)

hehe, my english is poor much than you ;)


> We have to distinguish event tracing and statistical/histogram tracing
> here.
> 
> Event tracing is about logging the events when they come and store
> them one by one to output them later. That's what does TRACE_EVENT
> for instance.
> 
> Statistical tracing doesn't store a trace of each event but instead
> update some numbers after each event: number of events, maximum
> latency, average, etc...

Agreed.


> About event tracing, we want to have something that let us identifying
> the events individually. For the works it can be either the function
> embedeed in the work, or the work itself.
> But do we need both? Since it's rare that a function can be embedeed in
> more than two works, I guess we only need one of those informations.
> Which one is the more efficient to identify a work? That can be discussed
> actually.

OK. I think function name is enough. I'll drop this patch.

And also function name has another benefit.
symbol name is module unload safe. then we don't need to care
module unloading.

In the other hand, work_struct variable is often static variable.
it mean the variable name is often very short.


> When I talked about per-work tracing, it was in a generic way. What do we
> use to identify each work individually: either the function or the work
> name? Both seems accurate for that actually, the fact is that both can
> be used for per-work tracing.
> 
> Actually my previous mails were focused on statistical tracing.
> 
> You proposed something that would result in the following final view:
> 
> 	workqueue_name:pid	n_inserted n_executed cpu max_latency
> 
> And then by looking at the trace file, we can retrieve the work/function
> that produced this max latency.
> 
> While I proposed this another idea:
> 
> 	workqueue_name:pid	n_inserted n_executed cpu
> 
> 	work1	latency_avg	latency_max
> 	work2	latency_avg	latency_max
> 	work3	latency_avg	latency_max
> 	.....
> 
> (We can have it with one file per workqueue).
> work1 can be either the work name or the function executed though
> the function is probably the main focus here because it's the
> real source culprit.
> But we can also output work_name:func
> 
> You see? With such output we see immediately which works are creating the
> worst latencies.
> And the event tracing is still very helpful here to have a more
> fine grained tracing and see the behaviour of some works more precisely.
> 
> That's a kind of tracing process we can imagine:
> 
> - we start by looking at the statistics and indentify the wicked
>   works/funcs.
> - we look at the events on /debug/tracing/trace and, coupling with
>   some well-chosen filters, we observe the behaviour of a work with
>   more precision.
> 
> 
> But I'm not opposite to your patch, I think it can be helpful to also
> have the work name on certain circumstances.
> But it makes the whole line a bit messy with a lot of informations for
> those who only need the func name (or only the work name).
> The best would be to have a runtime option to choose whether we want
> to display it or not.

I understand you mean. thanks.
My conclusion is follow,

Conclusion:
 1/4	resend, but remove __entry->work
 2/4	resend
 3/4	remake as your suggestion
 4/4	remake as your suggestion
 5/4	dropped

but unfortunately I don't have enough development time. then,
I and Zhaolei discuss this issue and we agreed Zaholei develop it.


Thanks!





^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-15  6:13                     ` KOSAKI Motohiro
@ 2009-04-15  6:17                       ` Zhaolei
  2009-04-15  9:44                       ` Ingo Molnar
                                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-15  6:17 UTC (permalink / raw)
  To: KOSAKI Motohiro, Frederic Weisbecker
  Cc: kosaki.motohiro, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

* From: "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>
> Hi
> 
>> Kosaki-san,
>> 
>> Perhaps you misunderstood me, which is easy because my english is
>> evil ;-)
> 
> hehe, my english is poor much than you ;)
> 
> 
>> We have to distinguish event tracing and statistical/histogram tracing
>> here.
>> 
>> Event tracing is about logging the events when they come and store
>> them one by one to output them later. That's what does TRACE_EVENT
>> for instance.
>> 
>> Statistical tracing doesn't store a trace of each event but instead
>> update some numbers after each event: number of events, maximum
>> latency, average, etc...
> 
> Agreed.
> 
> 
>> About event tracing, we want to have something that let us identifying
>> the events individually. For the works it can be either the function
>> embedeed in the work, or the work itself.
>> But do we need both? Since it's rare that a function can be embedeed in
>> more than two works, I guess we only need one of those informations.
>> Which one is the more efficient to identify a work? That can be discussed
>> actually.
> 
> OK. I think function name is enough. I'll drop this patch.
> 
> And also function name has another benefit.
> symbol name is module unload safe. then we don't need to care
> module unloading.
> 
> In the other hand, work_struct variable is often static variable.
> it mean the variable name is often very short.
> 
> 
>> When I talked about per-work tracing, it was in a generic way. What do we
>> use to identify each work individually: either the function or the work
>> name? Both seems accurate for that actually, the fact is that both can
>> be used for per-work tracing.
>> 
>> Actually my previous mails were focused on statistical tracing.
>> 
>> You proposed something that would result in the following final view:
>> 
>> workqueue_name:pid n_inserted n_executed cpu max_latency
>> 
>> And then by looking at the trace file, we can retrieve the work/function
>> that produced this max latency.
>> 
>> While I proposed this another idea:
>> 
>> workqueue_name:pid n_inserted n_executed cpu
>> 
>> work1 latency_avg latency_max
>> work2 latency_avg latency_max
>> work3 latency_avg latency_max
>> .....
>> 
>> (We can have it with one file per workqueue).
>> work1 can be either the work name or the function executed though
>> the function is probably the main focus here because it's the
>> real source culprit.
>> But we can also output work_name:func
>> 
>> You see? With such output we see immediately which works are creating the
>> worst latencies.
>> And the event tracing is still very helpful here to have a more
>> fine grained tracing and see the behaviour of some works more precisely.
>> 
>> That's a kind of tracing process we can imagine:
>> 
>> - we start by looking at the statistics and indentify the wicked
>>   works/funcs.
>> - we look at the events on /debug/tracing/trace and, coupling with
>>   some well-chosen filters, we observe the behaviour of a work with
>>   more precision.
>> 
>> 
>> But I'm not opposite to your patch, I think it can be helpful to also
>> have the work name on certain circumstances.
>> But it makes the whole line a bit messy with a lot of informations for
>> those who only need the func name (or only the work name).
>> The best would be to have a runtime option to choose whether we want
>> to display it or not.
> 
> I understand you mean. thanks.
> My conclusion is follow,
> 
> Conclusion:
> 1/4 resend, but remove __entry->work
> 2/4 resend
> 3/4 remake as your suggestion
> 4/4 remake as your suggestion
> 5/4 dropped
> 
> but unfortunately I don't have enough development time. then,
> I and Zhaolei discuss this issue and we agreed Zaholei develop it.
Hello,

I will do it.

Thanks
Zhaolei
> 
> 
> Thanks!
> 
> 
> 
> 
> 
>ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-13 14:13   ` Frederic Weisbecker
@ 2009-04-15  6:47     ` KOSAKI Motohiro
  0 siblings, 0 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-15  6:47 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Tom Zanussi,
	Ingo Molnar, linux-kernel

Hi

> > +	TP_fast_assign(
> > +		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
> 
> 
> 
> This is not about your patch but I see this pattern happen often. We should
> start think about a way to use tracing_record_cmdline for that.

I guess two movq isn't expensive than tracing_record_cmdline ;)


> > +	TP_printk("thread=%s:%d work=%p func=%p", __entry->thread_comm,
> 
> These pointer addresses are not very useful to display.
> We can zap work and display the func as a %pF.
> 
> Not related to your patch: only the name of the func is useful,
> not its asm offset. A format for raw func name without offset would
> start to become very handy. I remember Andrew talked about that by the past.
> 
> I wonder which format we could use for that. %pf ?

perhaps, static function need offset...





^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-15  6:13                     ` KOSAKI Motohiro
  2009-04-15  6:17                       ` Zhaolei
@ 2009-04-15  9:44                       ` Ingo Molnar
  2009-04-15 16:23                       ` Frederic Weisbecker
                                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 94+ messages in thread
From: Ingo Molnar @ 2009-04-15  9:44 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Frederic Weisbecker, Zhaolei, Steven Rostedt, Tom Zanussi,
	linux-kernel, Oleg Nesterov, Andrew Morton


* KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> > About event tracing, we want to have something that let us 
> > identifying the events individually. For the works it can be 
> > either the function embedeed in the work, or the work itself. 
> > But do we need both? Since it's rare that a function can be 
> > embedeed in more than two works, I guess we only need one of 
> > those informations. Which one is the more efficient to identify 
> > a work? That can be discussed actually.
> 
> OK. I think function name is enough. I'll drop this patch.

I think function name is what we generally use - and maybe that fits 
better into the function tracer concepts as well.

The 'work' (worklet) itself can often be non-symbolic: some 
dynamically allocated object embedded in another dynamically 
allocated object.

It might make sense to trace it (non-symbolically - just as a 
pointer value) - because to the work flow itself we can see what got 
started and where, and where it got finished, by looking at the work 
object itself.

In other words: the work function is the 'actor' or method, the 
worklet is the 'object'. They are two separate entities and tracing 
_both_ makes sense - as long as it's pervasive enough to understand 
the full life cycle of a worklet:

   worklet initialization (optional)
   queueing
   start of execution
   completion
   destruction/cancellation (optional)

It does not make sense to just trace bits of the life cycle.

Once we have these events instrumented, we can use them to build 
various derived metrics: average latency of queueing, average 
execution time, ratio of execution versus cancellation, etc. etc. We 
can build histograms that summarize informaton, or we can expose the 
full raw and uncut trace history as well.

All of that builds on a complete capture of the essentials of a 
subsystem.

> And also function name has another benefit. symbol name is module 
> unload safe. then we don't need to care module unloading.
> 
> In the other hand, work_struct variable is often static variable. 
> it mean the variable name is often very short.

That is true - but often a worklet is dynamic and then we'll have no 
symbol name to work with at all.

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-13 16:25               ` Frederic Weisbecker
@ 2009-04-15 10:22                 ` Oleg Nesterov
  2009-04-15 11:09                   ` Ingo Molnar
  0 siblings, 1 reply; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-15 10:22 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: KOSAKI Motohiro, Zhaolei, Steven Rostedt, Tom Zanussi,
	Ingo Molnar, linux-kernel

I am very sorry for delay. I am going to study these patches later.

Just one note,

> On Mon, Apr 13, 2009 at 02:53:01PM +0900, KOSAKI Motohiro wrote:
>
> > Subject: [PATCH] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
> >
> > Entry/exit handler pair is useful common tracepoint technique. it can mesure handler consumption time.
...
> >  static void run_workqueue(struct cpu_workqueue_struct *cwq)
> >  {
> > @@ -282,7 +283,6 @@ static void run_workqueue(struct cpu_wor
> >  		 */
> >  		struct lockdep_map lockdep_map = work->lockdep_map;
> >  #endif
> > -		trace_workqueue_execution(cwq->thread, work);
> >  		cwq->current_work = work;
> >  		list_del_init(cwq->worklist.next);
> >  		spin_unlock_irq(&cwq->lock);
> > @@ -291,7 +291,9 @@ static void run_workqueue(struct cpu_wor
> >  		work_clear_pending(work);
> >  		lock_map_acquire(&cwq->wq->lockdep_map);
> >  		lock_map_acquire(&lockdep_map);
> > +		trace_workqueue_handler_entry(cwq->thread, work);
> >  		f(work);
> > +		trace_workqueue_handler_exit(cwq->thread, work);

This doesn't look right. We must not use "work" after f(work). work->func()
can kfree its work.

That is why we copy lockdep_map beforehand. Perhaps ftrace should do
something similar.

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-15 10:22                 ` Oleg Nesterov
@ 2009-04-15 11:09                   ` Ingo Molnar
  2009-04-15 11:33                     ` Oleg Nesterov
  0 siblings, 1 reply; 94+ messages in thread
From: Ingo Molnar @ 2009-04-15 11:09 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Frederic Weisbecker, KOSAKI Motohiro, Zhaolei, Steven Rostedt,
	Tom Zanussi, linux-kernel


* Oleg Nesterov <oleg@redhat.com> wrote:

> > >  		lock_map_acquire(&lockdep_map);
> > > +		trace_workqueue_handler_entry(cwq->thread, work);
> > >  		f(work);
> > > +		trace_workqueue_handler_exit(cwq->thread, work);
> 
> This doesn't look right. We must not use "work" after f(work). 
> work->func() can kfree its work.

We can use it as long as we use it as a 'cookie' - i.e. an 
identifier for visualization/statistics, but dont actually 
dereference it, right?

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-15 11:09                   ` Ingo Molnar
@ 2009-04-15 11:33                     ` Oleg Nesterov
  2009-04-15 11:45                       ` Ingo Molnar
  0 siblings, 1 reply; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-15 11:33 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Frederic Weisbecker, KOSAKI Motohiro, Zhaolei, Steven Rostedt,
	Tom Zanussi, linux-kernel

On 04/15, Ingo Molnar wrote:
>
> * Oleg Nesterov <oleg@redhat.com> wrote:
>
> > > >  		lock_map_acquire(&lockdep_map);
> > > > +		trace_workqueue_handler_entry(cwq->thread, work);
> > > >  		f(work);
> > > > +		trace_workqueue_handler_exit(cwq->thread, work);
> >
> > This doesn't look right. We must not use "work" after f(work).
> > work->func() can kfree its work.
>
> We can use it as long as we use it as a 'cookie' - i.e. an
> identifier for visualization/statistics, but dont actually
> dereference it, right?

Yes sure.

I do not know whether this matters or not, but just in case...
Of course it is possible that, when trace_workqueue_handler_exit() runs,
this memory was already reused for another work even without kfree.
For example,

	void my_work_func(struct work_struct *work)
	{
		INIT_WORK(work, another_work_func);
		queue_work(another_workqueue, work);
	}

In this case another_workqueue can report trace_workqueue_handler_entry()
before my_work_func() returns.

This means that trace_workqueue_handler_exit() can't use the address
of work as a "key" to find some data recorded by _entry(). Unless this
data lives in cpu_workqueue_struct.

Not sure why I am trying to explain the things which are very obvious
to all ;) Just because I don't really understand what these patches do.

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-15 11:33                     ` Oleg Nesterov
@ 2009-04-15 11:45                       ` Ingo Molnar
  2009-04-15 12:05                         ` Oleg Nesterov
  0 siblings, 1 reply; 94+ messages in thread
From: Ingo Molnar @ 2009-04-15 11:45 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Frederic Weisbecker, KOSAKI Motohiro, Zhaolei, Steven Rostedt,
	Tom Zanussi, linux-kernel


* Oleg Nesterov <oleg@redhat.com> wrote:

> On 04/15, Ingo Molnar wrote:
> >
> > * Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > > > >  		lock_map_acquire(&lockdep_map);
> > > > > +		trace_workqueue_handler_entry(cwq->thread, work);
> > > > >  		f(work);
> > > > > +		trace_workqueue_handler_exit(cwq->thread, work);
> > >
> > > This doesn't look right. We must not use "work" after f(work).
> > > work->func() can kfree its work.
> >
> > We can use it as long as we use it as a 'cookie' - i.e. an
> > identifier for visualization/statistics, but dont actually
> > dereference it, right?
> 
> Yes sure.
> 
> I do not know whether this matters or not, but just in case... Of 
> course it is possible that, when trace_workqueue_handler_exit() 
> runs, this memory was already reused for another work even without 
> kfree. For example,
> 
> 	void my_work_func(struct work_struct *work)
> 	{
> 		INIT_WORK(work, another_work_func);
> 		queue_work(another_workqueue, work);
> 	}
> 
> In this case another_workqueue can report 
> trace_workqueue_handler_entry() before my_work_func() returns.
> 
> This means that trace_workqueue_handler_exit() can't use the 
> address of work as a "key" to find some data recorded by _entry(). 
> Unless this data lives in cpu_workqueue_struct.
> 
> Not sure why I am trying to explain the things which are very 
> obvious to all ;) Just because I don't really understand what 
> these patches do.

The patches try to map and instrument the life cycle of a worklet, 
and the main actions that occur in the workqueue subsystem in 
general.

The purpose is instrumentation: for debugging purposes, for 
improving kernel code and for just understanding how the system 
functions and what its dynamic actions are.

In that sense the worklet 'key' possibly getting reallocated and 
reused before the 'completed' action was traced is probably not a 
big deal - but tools have to be aware of it possibly happening (and 
most not hard-code any assumption to the contrary).

Plus the exit handler must not dereference the worklet either. 
Safest would be to make this sure in the prototype already: pass in 
a void * key, not a work structure.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-15 11:45                       ` Ingo Molnar
@ 2009-04-15 12:05                         ` Oleg Nesterov
  0 siblings, 0 replies; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-15 12:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Frederic Weisbecker, KOSAKI Motohiro, Zhaolei, Steven Rostedt,
	Tom Zanussi, linux-kernel

On 04/15, Ingo Molnar wrote:
>
> The patches try to map and instrument the life cycle of a worklet,
> and the main actions that occur in the workqueue subsystem in
> general.
>
> The purpose is instrumentation: for debugging purposes, for
> improving kernel code and for just understanding how the system
> functions and what its dynamic actions are.
>
> In that sense the worklet 'key' possibly getting reallocated and
> reused before the 'completed' action was traced is probably not a
> big deal - but tools have to be aware of it possibly happening (and
> most not hard-code any assumption to the contrary).
>
> Plus the exit handler must not dereference the worklet either.
> Safest would be to make this sure in the prototype already: pass in
> a void * key, not a work structure.

Yes, I think you are right.

And thanks for your explanations!

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v2 5/4] ftrace, workqueuetrace: display work name
  2009-04-15  6:13                     ` KOSAKI Motohiro
  2009-04-15  6:17                       ` Zhaolei
  2009-04-15  9:44                       ` Ingo Molnar
@ 2009-04-15 16:23                       ` Frederic Weisbecker
  2009-04-17  6:56                       ` [PATCH v3 0/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Zhaolei
  2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
  4 siblings, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-15 16:23 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Zhaolei, Steven Rostedt, Tom Zanussi, Ingo Molnar, linux-kernel,
	Oleg Nesterov, Andrew Morton

On Wed, Apr 15, 2009 at 03:13:17PM +0900, KOSAKI Motohiro wrote:
> Hi
> 
> > Kosaki-san,
> > 
> > Perhaps you misunderstood me, which is easy because my english is
> > evil ;-)
> 
> hehe, my english is poor much than you ;)
> 
> 
> > We have to distinguish event tracing and statistical/histogram tracing
> > here.
> > 
> > Event tracing is about logging the events when they come and store
> > them one by one to output them later. That's what does TRACE_EVENT
> > for instance.
> > 
> > Statistical tracing doesn't store a trace of each event but instead
> > update some numbers after each event: number of events, maximum
> > latency, average, etc...
> 
> Agreed.
> 
> 
> > About event tracing, we want to have something that let us identifying
> > the events individually. For the works it can be either the function
> > embedeed in the work, or the work itself.
> > But do we need both? Since it's rare that a function can be embedeed in
> > more than two works, I guess we only need one of those informations.
> > Which one is the more efficient to identify a work? That can be discussed
> > actually.
> 
> OK. I think function name is enough. I'll drop this patch.
> 
> And also function name has another benefit.
> symbol name is module unload safe. then we don't need to care
> module unloading.
> 
> In the other hand, work_struct variable is often static variable.
> it mean the variable name is often very short.
> 
> 
> > When I talked about per-work tracing, it was in a generic way. What do we
> > use to identify each work individually: either the function or the work
> > name? Both seems accurate for that actually, the fact is that both can
> > be used for per-work tracing.
> > 
> > Actually my previous mails were focused on statistical tracing.
> > 
> > You proposed something that would result in the following final view:
> > 
> > 	workqueue_name:pid	n_inserted n_executed cpu max_latency
> > 
> > And then by looking at the trace file, we can retrieve the work/function
> > that produced this max latency.
> > 
> > While I proposed this another idea:
> > 
> > 	workqueue_name:pid	n_inserted n_executed cpu
> > 
> > 	work1	latency_avg	latency_max
> > 	work2	latency_avg	latency_max
> > 	work3	latency_avg	latency_max
> > 	.....
> > 
> > (We can have it with one file per workqueue).
> > work1 can be either the work name or the function executed though
> > the function is probably the main focus here because it's the
> > real source culprit.
> > But we can also output work_name:func
> > 
> > You see? With such output we see immediately which works are creating the
> > worst latencies.
> > And the event tracing is still very helpful here to have a more
> > fine grained tracing and see the behaviour of some works more precisely.
> > 
> > That's a kind of tracing process we can imagine:
> > 
> > - we start by looking at the statistics and indentify the wicked
> >   works/funcs.
> > - we look at the events on /debug/tracing/trace and, coupling with
> >   some well-chosen filters, we observe the behaviour of a work with
> >   more precision.
> > 
> > 
> > But I'm not opposite to your patch, I think it can be helpful to also
> > have the work name on certain circumstances.
> > But it makes the whole line a bit messy with a lot of informations for
> > those who only need the func name (or only the work name).
> > The best would be to have a runtime option to choose whether we want
> > to display it or not.
> 
> I understand you mean. thanks.
> My conclusion is follow,
> 
> Conclusion:
>  1/4	resend, but remove __entry->work
>  2/4	resend
>  3/4	remake as your suggestion
>  4/4	remake as your suggestion
>  5/4	dropped
> 
> but unfortunately I don't have enough development time. then,
> I and Zhaolei discuss this issue and we agreed Zaholei develop it.
> 
> 
> Thanks!
> 


Ok, thank you!


^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH v3 0/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-15  6:13                     ` KOSAKI Motohiro
                                         ` (2 preceding siblings ...)
  2009-04-15 16:23                       ` Frederic Weisbecker
@ 2009-04-17  6:56                       ` Zhaolei
  2009-04-17  7:15                         ` [PATCH v3 1/1] " Zhaolei
  2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
  4 siblings, 1 reply; 94+ messages in thread
From: Zhaolei @ 2009-04-17  6:56 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Frederic Weisbecker, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

KOSAKI Motohiro wrote:
...
> I understand you mean. thanks.
> My conclusion is follow,
> 
> Conclusion:
>  1/4	resend, but remove __entry->work
>  2/4	resend
>  3/4	remake as your suggestion
>  4/4	remake as your suggestion
>  5/4	dropped
> 
> but unfortunately I don't have enough development time. then,
> I and Zhaolei discuss this issue and we agreed Zaholei develop it.
Hi,

I decide to send 1/4 as a separate patch first.
2/4~4/4 will be a patchset, it will be send after finish codeing and testing.

Thanks
Zhaolei

> 
> Thanks!
> 
> 
> 
> 
> 



^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-17  6:56                       ` [PATCH v3 0/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Zhaolei
@ 2009-04-17  7:15                         ` Zhaolei
  2009-04-17 13:45                           ` Ingo Molnar
  2009-04-20 17:11                           ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
  0 siblings, 2 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-17  7:15 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Frederic Weisbecker, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

v3: zhaolei@cn.fujitsu.com: Change TRACE_EVENT definition to new format
    introduced by Steven Rostedt: consolidate trace and trace_event headers
v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and zap
    the work addr
v1: zhaolei@cn.fujitsu.com: Make workqueue tracepoints use TRACE_EVENT macro

TRACE_EVENT is a more generic way to define tracepoints.
Doing so adds these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Then, this patch convert DEFINE_TRACE to TRACE_EVENT in workqueue related tracepoint.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 include/trace/events/workqueue.h |  100 ++++++++++++++++++++++++++++++++++++++
 include/trace/workqueue.h        |   25 ---------
 kernel/trace/trace_workqueue.c   |    2 +-
 kernel/workqueue.c               |   11 +---
 4 files changed, 103 insertions(+), 35 deletions(-)
 create mode 100644 include/trace/events/workqueue.h
 delete mode 100644 include/trace/workqueue.h

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 0000000..035f1bf
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,100 @@
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+	TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+	TP_ARGS(wq_thread, cpu),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+		__field(int,	cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#endif /* _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
deleted file mode 100644
index 7626523..0000000
--- a/include/trace/workqueue.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __TRACE_WORKQUEUE_H
-#define __TRACE_WORKQUEUE_H
-
-#include <linux/tracepoint.h>
-#include <linux/workqueue.h>
-#include <linux/sched.h>
-
-DECLARE_TRACE(workqueue_insertion,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-	   TP_PROTO(struct task_struct *wq_thread, int cpu),
-	   TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-	   TP_PROTO(struct task_struct *wq_thread),
-	   TP_ARGS(wq_thread));
-
-#endif /* __TRACE_WORKQUEUE_H */
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 984b917..cfe56d3 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -6,7 +6,7 @@
  */
 
 
-#include <trace/workqueue.h>
+#include <trace/events/workqueue.h>
 #include <linux/list.h>
 #include <linux/percpu.h>
 #include "trace_stat.h"
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f71fb2a..0668795 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,7 +33,8 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
-#include <trace/workqueue.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
-DEFINE_TRACE(workqueue_insertion);
-
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head)
 {
@@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-DEFINE_TRACE(workqueue_execution);
-
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
 	spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 	return cwq;
 }
 
-DEFINE_TRACE(workqueue_creation);
-
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-DEFINE_TRACE(workqueue_destruction);
-
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-17  7:15                         ` [PATCH v3 1/1] " Zhaolei
@ 2009-04-17 13:45                           ` Ingo Molnar
  2009-04-20  1:30                             ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints " Zhaolei
  2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
  2009-04-20 17:11                           ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
  1 sibling, 2 replies; 94+ messages in thread
From: Ingo Molnar @ 2009-04-17 13:45 UTC (permalink / raw)
  To: Zhaolei
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, linux-kernel, Oleg Nesterov, Andrew Morton


* Zhaolei <zhaolei@cn.fujitsu.com> wrote:

> +TRACE_EVENT(workqueue_insertion,
> +TRACE_EVENT(workqueue_execution,
> +TRACE_EVENT(workqueue_creation,
> +TRACE_EVENT(workqueue_destruction,

I'm missing all the worklet tracepoints i suggested. (and i think 
which you had in earlier versions)

Basically, i'd suggest the following complete set of events instead:

 TRACE_EVENT(workqueue_create
 TRACE_EVENT(workqueue_flush              /* NEW */
 TRACE_EVENT(workqueue_destroy

 TRACE_EVENT(worklet_enqueue              /* NEW */
 TRACE_EVENT(worklet_enqueue_delayed      /* NEW */

 TRACE_EVENT(worklet_execute              /* instead of workqueue_execution */
 TRACE_EVENT(worklet_complete             /* NEW */

 TRACE_EVENT(worklet_cancel               /* NEW */

This allows the understanding of the life cycle of a workqueue and 
of worklets that enter that workqueue. Note the distinction between 
workqueue and worklet (work) - that is essential.

The parameters of the events are obvious, with one detail: i'd 
suggest a 'cpu' parameter to the enqueue events, to allow the 
mapping of the _on(..cpu) variants too.

I would not bother with schedule_on_each_cpu() instrumentation - 
it's rarely used.

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-17 13:45                           ` Ingo Molnar
@ 2009-04-20  1:30                             ` Zhaolei
  2009-04-20  1:38                               ` KOSAKI Motohiro
  2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
  1 sibling, 1 reply; 94+ messages in thread
From: Zhaolei @ 2009-04-20  1:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, linux-kernel, Oleg Nesterov, Andrew Morton

* From: "Ingo Molnar" <mingo@elte.hu>
> 
> * Zhaolei <zhaolei@cn.fujitsu.com> wrote:
> 
>> +TRACE_EVENT(workqueue_insertion,
>> +TRACE_EVENT(workqueue_execution,
>> +TRACE_EVENT(workqueue_creation,
>> +TRACE_EVENT(workqueue_destruction,
> 
> I'm missing all the worklet tracepoints i suggested. (and i think 
> which you had in earlier versions)
> 
> Basically, i'd suggest the following complete set of events instead:
> 
> TRACE_EVENT(workqueue_create
> TRACE_EVENT(workqueue_flush              /* NEW */
> TRACE_EVENT(workqueue_destroy
> 
> TRACE_EVENT(worklet_enqueue              /* NEW */
> TRACE_EVENT(worklet_enqueue_delayed      /* NEW */
> 
> TRACE_EVENT(worklet_execute              /* instead of workqueue_execution */
> TRACE_EVENT(worklet_complete             /* NEW */
> 
> TRACE_EVENT(worklet_cancel               /* NEW */
> 
> This allows the understanding of the life cycle of a workqueue and 
> of worklets that enter that workqueue. Note the distinction between 
> workqueue and worklet (work) - that is essential.
Hello, Ingo

Thanks for your suggest.
I read distinction between workqueue and worklet.
In my schedule, this patch is first step of our target, my image of steps is:
  1: Move current workqueuetracepoints into TRACEEVENT (this patch)
  2: Make workqueuetrace support per-worklet output (doing)
  3: Add time information to workqueuetrace's worklet stat (need above new TRACEPOINT)

So, i prepared to add new worklet tracepoints in step3.
What's your opinion?

Thanks
Zhaolei
> 
> The parameters of the events are obvious, with one detail: i'd 
> suggest a 'cpu' parameter to the enqueue events, to allow the 
> mapping of the _on(..cpu) variants too.
> 
> I would not bother with schedule_on_each_cpu() instrumentation - 
> it's rarely used.
> 
> Ingo
> 
>ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-20  1:30                             ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints " Zhaolei
@ 2009-04-20  1:38                               ` KOSAKI Motohiro
  2009-04-20  1:43                                 ` Zhaolei
  0 siblings, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-20  1:38 UTC (permalink / raw)
  To: Zhaolei
  Cc: kosaki.motohiro, Ingo Molnar, Frederic Weisbecker,
	Steven Rostedt, Tom Zanussi, linux-kernel, Oleg Nesterov,
	Andrew Morton

> Thanks for your suggest.
> I read distinction between workqueue and worklet.
> In my schedule, this patch is first step of our target, my image of steps is:
>   1: Move current workqueuetracepoints into TRACEEVENT (this patch)
>   2: Make workqueuetrace support per-worklet output (doing)
>   3: Add time information to workqueuetrace's worklet stat (need above new TRACEPOINT)
> 
> So, i prepared to add new worklet tracepoints in step3.
> What's your opinion?

Zhao-san, Maintainer's easy reviewability is very important.
Can you switch 2 and 3 ?

it cause Ingo can review the patch fill his request or not directly.



^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-20  1:38                               ` KOSAKI Motohiro
@ 2009-04-20  1:43                                 ` Zhaolei
  2009-04-20  1:49                                   ` KOSAKI Motohiro
  0 siblings, 1 reply; 94+ messages in thread
From: Zhaolei @ 2009-04-20  1:43 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: kosaki.motohiro, Ingo Molnar, Frederic Weisbecker,
	Steven Rostedt, Tom Zanussi, linux-kernel, Oleg Nesterov,
	Andrew Morton

* From: "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>
>> Thanks for your suggest.
>> I read distinction between workqueue and worklet.
>> In my schedule, this patch is first step of our target, my image of steps is:
>>   1: Move current workqueuetracepoints into TRACEEVENT (this patch)
>>   2: Make workqueuetrace support per-worklet output (doing)
>>   3: Add time information to workqueuetrace's worklet stat (need above new TRACEPOINT)
>> 
>> So, i prepared to add new worklet tracepoints in step3.
>> What's your opinion?
> 

Hello, Kosaki-san

> Zhao-san, Maintainer's easy reviewability is very important.
I agree it.

> Can you switch 2 and 3 ?
Sorry for my poor English.
I means 2 is to add worklet stat, 3 is to add more information to worklet stat,
3 is based on 2, switch 2 and 3 means merge 2 and 3.

Thanks
Zhaolei
> 
> it cause Ingo can review the patch fill his request or not directly.
> 
> 
> 
>ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-20  1:43                                 ` Zhaolei
@ 2009-04-20  1:49                                   ` KOSAKI Motohiro
  2009-04-20  8:46                                     ` Ingo Molnar
  0 siblings, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-20  1:49 UTC (permalink / raw)
  To: Zhaolei
  Cc: kosaki.motohiro, Ingo Molnar, Frederic Weisbecker,
	Steven Rostedt, Tom Zanussi, linux-kernel, Oleg Nesterov,
	Andrew Morton

> * From: "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>
> >> Thanks for your suggest.
> >> I read distinction between workqueue and worklet.
> >> In my schedule, this patch is first step of our target, my image of steps is:
> >>   1: Move current workqueuetracepoints into TRACEEVENT (this patch)
> >>   2: Make workqueuetrace support per-worklet output (doing)
> >>   3: Add time information to workqueuetrace's worklet stat (need above new TRACEPOINT)
> >> 
> >> So, i prepared to add new worklet tracepoints in step3.
> >> What's your opinion?
> > 
> 
> Hello, Kosaki-san
> 
> > Zhao-san, Maintainer's easy reviewability is very important.
> I agree it.
> 
> > Can you switch 2 and 3 ?
> Sorry for my poor English.
> I means 2 is to add worklet stat, 3 is to add more information to worklet stat,
> 3 is based on 2, switch 2 and 3 means merge 2 and 3.

OK.

thus, I recoomend to pending 2 and 3. and change 1 as Ingo suggested.
if not, reviewer might forgot this thread.




^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame
  2009-04-15  6:13                     ` KOSAKI Motohiro
                                         ` (3 preceding siblings ...)
  2009-04-17  6:56                       ` [PATCH v3 0/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Zhaolei
@ 2009-04-20  6:42                       ` Zhaolei
  2009-04-20  6:58                         ` [PATCH 1/4] trace_workqueue: use list_for_each_entry() instead of list_for_each_entry_safe() Zhaolei
                                           ` (4 more replies)
  4 siblings, 5 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-20  6:42 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Frederic Weisbecker, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

Hello,

To make workqueuetrace support per-worklet's time mesurement(max execution,
latecy), workqueue trace need to support per-worklet's stat information first.
This patchset add them.

Before patch:
# cat [debugfs]/tracing/trace_stat/workqueues
# CPU  INSERTED  EXECUTED   NAME
# |      |         |          |

  0    589        589       events/0
  0      0          0       cpuset
  0    656        656       khelper
  0      0          0       netns
  0    809        809       kblockd/0
  0      0          0       kacpid
  0      0          0       kacpi_notify
  0      0          0       ata/0
  0      0          0       ata_aux
  0      0          0       ksuspend_usbd
  0      0          0       aio/0
  0      0          0       nfsiod
  0      0          0       crypto/0
  0      0          0       kpsmoused
  0      0          0       kstriped
  0      0          0       kondemand/0
  0      0          0       usbhid_resumer
  0     21         21       rpciod/0

After patch:
# cat [debugfs]/tracing/trace_stat/workqueues
# CPU  INSERTED  EXECUTED   TASKNAME:PID
# |      |         |        `-WORKFUNC
# |      |         |          |
  0    273        273       events/0:5
  0      1          1        |-reg_todo+0x0/0x648
  0      4          4        |-check_corruption+0x0/0x2c
  0      1          1        |-power_supply_changed_work+0x0/0x3c
  0      6          6        |-console_callback+0x0/0x145
  0    229        229        |-vmstat_update+0x0/0x33
  0     12         12        |-do_cache_clean+0x0/0x6a
  0      1          1        |-wq_barrier_func+0x0/0x12
  0     15         15        |-mce_work_fn+0x0/0x31
  0      3          3        |-rt_worker_func+0x0/0x2de
  0      1          1        `-grace_ender+0x0/0x14
  0      0          0       cpuset:6
  0    659        659       khelper:7
  0    659        659        `-__call_usermodehelper+0x0/0xa0
  0      0          0       netns:11
  0   1818       1818       kblockd/0:13
  0   1115       1115        |-blk_unplug_work+0x0/0x1e
  0    703        703        `-cfq_kick_queue+0x0/0x2f
  0      0          0       kacpid:412
  0      0          0       kacpi_notify:415
  0      0          0       ata/0:463
  0      0          0       ata_aux:509
  0      0          0       ksuspend_usbd:515
  0      0          0       aio/0:717
  0      0          0       nfsiod:764
  0      0          0       crypto/0:785
  0      0          0       kpsmoused:1141
  0      0          0       kstriped:1248
  0      0          0       kondemand/0:1259
  0      0          0       usbhid_resumer:1264
  0     20         20       rpciod/0:1333
  0      1          1        |-xs_tcp_connect_worker4+0x0/0x1b
  0     19         19        `-xs_udp_connect_worker4+0x0/0x130

After this step, we'll begin to add worklet time mesurement support to
workqueuetrace.

Thanks
Zhaolei


^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH 1/4] trace_workqueue: use list_for_each_entry() instead of list_for_each_entry_safe()
  2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
@ 2009-04-20  6:58                         ` Zhaolei
  2009-04-20  7:26                           ` Frederic Weisbecker
  2009-04-20  6:59                         ` [PATCH 2/4] trace_workqueue: Remove cpu_workqueue_stats->first_entry Zhaolei
                                           ` (3 subsequent siblings)
  4 siblings, 1 reply; 94+ messages in thread
From: Zhaolei @ 2009-04-20  6:58 UTC (permalink / raw)
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, Ingo Molnar, linux-kernel, Oleg Nesterov,
	Andrew Morton

No need to use list_for_each_entry_safe() in iteration without delete node,
we use list_for_each_entry() instead.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 kernel/trace/trace_workqueue.c |   10 ++++------
 1 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 984b917..934b27c 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -47,12 +47,11 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			atomic_inc(&node->inserted);
 			goto found;
@@ -69,12 +68,11 @@ probe_workqueue_execution(struct task_struct *wq_thread,
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
-	struct cpu_workqueue_stats *node, *next;
+	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
 			goto found;
-- 
1.5.5.3




^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 2/4] trace_workqueue: Remove cpu_workqueue_stats->first_entry
  2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
  2009-04-20  6:58                         ` [PATCH 1/4] trace_workqueue: use list_for_each_entry() instead of list_for_each_entry_safe() Zhaolei
@ 2009-04-20  6:59                         ` Zhaolei
  2009-04-20  7:02                         ` [PATCH 3/4] trace_workqueue: Remove blank line between each cpu Zhaolei
                                           ` (2 subsequent siblings)
  4 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-20  6:59 UTC (permalink / raw)
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, Ingo Molnar, linux-kernel, Oleg Nesterov,
	Andrew Morton

cpu_workqueue_stats->first_entry is useless because we can judge header of cpu
workqueue by:
if (&cpu_workqueue_stats->list == workqueue_cpu_stat(cpu)->list.next)

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 kernel/trace/trace_workqueue.c |    4 ----
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 934b27c..f52db53 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -16,8 +16,6 @@
 /* A cpu workqueue thread */
 struct cpu_workqueue_stats {
 	struct list_head            list;
-/* Useful to know if we print the cpu headers */
-	bool		            first_entry;
 	int		            cpu;
 	pid_t			    pid;
 /* Can be inserted from interrupt or user context, need to be atomic */
@@ -103,8 +101,6 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
 	cws->pid = wq_thread->pid;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (list_empty(&workqueue_cpu_stat(cpu)->list))
-		cws->first_entry = true;
 	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 3/4] trace_workqueue: Remove blank line between each cpu
  2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
  2009-04-20  6:58                         ` [PATCH 1/4] trace_workqueue: use list_for_each_entry() instead of list_for_each_entry_safe() Zhaolei
  2009-04-20  6:59                         ` [PATCH 2/4] trace_workqueue: Remove cpu_workqueue_stats->first_entry Zhaolei
@ 2009-04-20  7:02                         ` Zhaolei
  2009-04-20  7:09                         ` [PATCH 4/4] trace_workqueue: Add worklet information Zhaolei
  2009-04-20  7:23                         ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Frederic Weisbecker
  4 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-20  7:02 UTC (permalink / raw)
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, Ingo Molnar, linux-kernel, Oleg Nesterov,
	Andrew Morton

Blankline between each cpu's workqueue stat is not necessary, because cpu
number is enough to part them by eye.
Old style also caused a blankline below headline, and makes code complex by
lock, disableirq and get cpu var.

Old style:
 # CPU  INSERTED  EXECUTED   NAME
 # |      |         |          |

   0   8644       8644       events/0
   0      0          0       cpuset
   ...
   0      1          1       kdmflush

   1  35365      35365       events/1
   ...

New style:
 # CPU  INSERTED  EXECUTED   NAME
 # |      |         |          |

   0   8644       8644       events/0
   0      0          0       cpuset
   ...
   0      1          1       kdmflush
   1  35365      35365       events/1
   ...

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 kernel/trace/trace_workqueue.c |    7 -------
 1 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index f52db53..3393c95 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -185,16 +185,9 @@ static void *workqueue_stat_next(void *prev, int idx)
 static int workqueue_stat_show(struct seq_file *s, void *p)
 {
 	struct cpu_workqueue_stats *cws = p;
-	unsigned long flags;
-	int cpu = cws->cpu;
 	struct pid *pid;
 	struct task_struct *tsk;
 
-	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
-		seq_printf(s, "\n");
-	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
-
 	pid = find_get_pid(cws->pid);
 	if (pid) {
 		tsk = get_pid_task(pid, PIDTYPE_PID);
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 4/4] trace_workqueue: Add worklet information
  2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
                                           ` (2 preceding siblings ...)
  2009-04-20  7:02                         ` [PATCH 3/4] trace_workqueue: Remove blank line between each cpu Zhaolei
@ 2009-04-20  7:09                         ` Zhaolei
  2009-04-20 11:36                           ` Frederic Weisbecker
  2009-04-20  7:23                         ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Frederic Weisbecker
  4 siblings, 1 reply; 94+ messages in thread
From: Zhaolei @ 2009-04-20  7:09 UTC (permalink / raw)
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, Ingo Molnar, linux-kernel, Oleg Nesterov,
	Andrew Morton

Now workqueue tracer can display per-worklet information as following:
 # cat [debugfs]/tracing/trace_stat/workqueues
 # CPU  INSERTED  EXECUTED   TASKNAME:PID
 # |      |         |        `-WORKFUNC
 # |      |         |          |
 CPU  INSERTED  EXECUTED   TASKNAME:PID
   0    157        157       events/0:5
   0      1          1        |-reg_todo+0x0/0x648
   0      3          3        |-check_corruption+0x0/0x2c
   0      1          1        |-power_supply_changed_work+0x0/0x3c
   0      2          2        |-console_callback+0x0/0x145
   0      9          9        |-do_cache_clean+0x0/0x6a
   0      1          1        |-wq_barrier_func+0x0/0x12
   0    129        129        |-vmstat_update+0x0/0x33
   0      8          8        |-mce_work_fn+0x0/0x31
   0      1          1        |-grace_ender+0x0/0x14
   0      2          2        `-rt_worker_func+0x0/0x2de
   0      0          0       cpuset:6
   0    656        656       khelper:7
   0    656        656        `-__call_usermodehelper+0x0/0xa0
   0      0          0       netns:11
   0    592        592       kblockd/0:13
   0     39         39        |-blk_unplug_work+0x0/0x1e
   0    553        553        `-cfq_kick_queue+0x0/0x2f
   0      0          0       kacpid:412
   0      0          0       kacpi_notify:415
   0      0          0       ata/0:463
   0      0          0       ata_aux:509
   0      0          0       ksuspend_usbd:515
   0      0          0       aio/0:717
   0      0          0       nfsiod:718
   0      0          0       crypto/0:719
   0      0          0       kpsmoused:1141
   0      0          0       kstriped:1248
   0      0          0       kondemand/0:1259
   0      0          0       usbhid_resumer:1260
   0     21         21       rpciod/0:1333
   0      1          1        |-xs_tcp_connect_worker4+0x0/0x1b
   0     19         19        |-xs_udp_connect_worker4+0x0/0x130
   0      1          1        `-xprt_autoclose+0x0/0x2c

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 kernel/trace/trace_workqueue.c |  219 ++++++++++++++++++++++++++++++++--------
 1 files changed, 175 insertions(+), 44 deletions(-)

diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 3393c95..c465dbc 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -12,19 +12,33 @@
 #include "trace_stat.h"
 #include "trace.h"
 
+/* A callback function saved in struct work_struct->func */
+struct cpu_workqueue_stats;
+struct workfunc_stats {
+	struct list_head		list;
+	struct cpu_workqueue_stats	*parent;
+
+	/* Addr of worklet's callback function */
+	work_func_t			func;
+	/* Can be inserted from interrupt or user context, need to be atomic */
+	atomic_t			inserted;
+	/*
+	 * Don't need to be atomic, works are serialized in a single workqueue
+	 * thread on a single CPU.
+	 */
+	unsigned int			executed;
+};
 
 /* A cpu workqueue thread */
 struct cpu_workqueue_stats {
 	struct list_head            list;
 	int		            cpu;
 	pid_t			    pid;
-/* Can be inserted from interrupt or user context, need to be atomic */
+	/* Refer to comments of struct workfunc_stats for detail */
 	atomic_t	            inserted;
-/*
- *  Don't need to be atomic, works are serialized in a single workqueue thread
- *  on a single CPU.
- */
 	unsigned int		    executed;
+	/* list of struct workfunc_stats in this workqueue */
+	struct list_head            workfunclist;
 };
 
 /* List of workqueue threads on one cpu */
@@ -39,6 +53,38 @@ struct workqueue_global_stats {
 static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
 #define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
 
+/*
+ * Update record when insert a work into workqueue
+ * Caller need to hold cpu_workqueue_stats spin_lock
+ */
+int do_workqueue_insertion(struct cpu_workqueue_stats *cws,
+			   struct work_struct *work)
+{
+	struct workfunc_stats *wfstat;
+
+	atomic_inc(&cws->inserted);
+
+	list_for_each_entry(wfstat, &cws->workfunclist, list)
+		if (wfstat->func == work->func)
+			goto found;
+
+	/* not found, first insert */
+
+	/* work can be inserted from interrupt */
+	wfstat = kzalloc(sizeof(struct workfunc_stats), GFP_ATOMIC);
+	if (!wfstat)
+		return -ENOMEM;
+
+	wfstat->parent = cws;
+	wfstat->func = work->func;
+
+	list_add_tail(&wfstat->list, &cws->workfunclist);
+found:
+	atomic_inc(&wfstat->inserted);
+
+	return 0;
+}
+
 /* Insertion of a work */
 static void
 probe_workqueue_insertion(struct task_struct *wq_thread,
@@ -51,7 +97,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
-			atomic_inc(&node->inserted);
+			/* we ignore error of do_workqueue_insertion */
+			do_workqueue_insertion(node, work);
 			goto found;
 		}
 	}
@@ -67,17 +114,30 @@ probe_workqueue_execution(struct task_struct *wq_thread,
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
 	struct cpu_workqueue_stats *node;
+	struct workfunc_stats *wfnode;
 	unsigned long flags;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
+
+	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list)
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
-			goto found;
+			goto found_wq;
 		}
-	}
-	pr_debug("trace_workqueue: entry not found\n");
-found:
+	pr_debug("trace_workqueue: workqueue not found\n");
+	goto end;
+
+found_wq:
+	list_for_each_entry(wfnode, &node->workfunclist, list)
+		if (wfnode->func == work->func) {
+			wfnode->executed++;
+			goto found_wf;
+		}
+	pr_debug("trace_workqueue: worklet not found\n");
+	goto end;
+
+found_wf:
+end:
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
@@ -85,6 +145,7 @@ found:
 static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
 {
 	struct cpu_workqueue_stats *cws;
+	struct workfunc_stats *wfstat;
 	unsigned long flags;
 
 	WARN_ON(cpu < 0);
@@ -93,16 +154,37 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
 	cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
 	if (!cws) {
 		pr_warning("trace_workqueue: not enough memory\n");
-		return;
+		goto err_alloc_cws;
 	}
 	INIT_LIST_HEAD(&cws->list);
 	cws->cpu = cpu;
 
 	cws->pid = wq_thread->pid;
+	INIT_LIST_HEAD(&cws->workfunclist);
+
+	/*
+	 * add a dummy node to cpu_workqueue_stats->workfunclist to:
+	 * 1: help print function to print header of work(ex, workqueue info)
+	 * 2: used as a seq iter if no work exist in workqueue
+	 */
+	wfstat = kzalloc(sizeof(struct workfunc_stats), GFP_ATOMIC);
+	if (!wfstat) {
+		pr_warning("trace_workqueue: not enough memory\n");
+		goto err_alloc_wfstat;
+	}
+	wfstat->parent = cws;
+	list_add_tail(&wfstat->list, &cws->workfunclist);
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+
+	return;
+
+err_alloc_wfstat:
+	kfree(cws);
+err_alloc_cws:
+	return;
 }
 
 /* Destruction of a cpu workqueue thread */
@@ -115,12 +197,22 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-							list) {
-		if (node->pid == wq_thread->pid) {
-			list_del(&node->list);
-			kfree(node);
-			goto found;
+				 list) {
+		struct workfunc_stats *wfstat, *wfstatnext;
+
+		if (node->pid != wq_thread->pid)
+			continue;
+
+		list_for_each_entry_safe(wfstat, wfstatnext,
+					 &node->workfunclist, list) {
+			list_del(&wfstat->list);
+			kfree(wfstat);
 		}
+
+		list_del(&node->list);
+		kfree(node);
+
+		goto found;
 	}
 
 	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
@@ -129,17 +221,23 @@ found:
 
 }
 
-static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
+static struct workfunc_stats *workqueue_stat_start_cpu(int cpu)
 {
 	unsigned long flags;
-	struct cpu_workqueue_stats *ret = NULL;
-
+	struct workfunc_stats *ret = NULL;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 
-	if (!list_empty(&workqueue_cpu_stat(cpu)->list))
-		ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
-				 struct cpu_workqueue_stats, list);
+	if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
+		struct cpu_workqueue_stats *cws;
+		cws = list_entry(workqueue_cpu_stat(cpu)->list.next,
+			struct cpu_workqueue_stats, list);
+		/*
+		 * cpu_workqueue_stats->workfunclist at least have a dummy node
+		 */
+		ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
+			list);
+	}
 
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
@@ -161,43 +259,75 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
 
 static void *workqueue_stat_next(void *prev, int idx)
 {
-	struct cpu_workqueue_stats *prev_cws = prev;
-	int cpu = prev_cws->cpu;
+	struct workfunc_stats *prev_wfstat = prev;
+	int cpu = prev_wfstat->parent->cpu;
 	unsigned long flags;
 	void *ret = NULL;
 
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
+
+	if (!list_is_last(&prev_wfstat->list,
+	    &prev_wfstat->parent->workfunclist)) {
+		ret = list_entry(prev_wfstat->list.next, struct workfunc_stats,
+			list);
 		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
-		do {
-			cpu = cpumask_next(cpu, cpu_possible_mask);
-			if (cpu >= nr_cpu_ids)
-				return NULL;
-		} while (!(ret = workqueue_stat_start_cpu(cpu)));
 		return ret;
 	}
+
+	if (!list_is_last(&prev_wfstat->parent->list,
+	    &workqueue_cpu_stat(cpu)->list)) {
+		struct cpu_workqueue_stats *cws = list_entry(
+			prev_wfstat->parent->list.next,
+			struct cpu_workqueue_stats, list);
+		ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
+			list);
+		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+		return ret;
+	}
+
 	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
-	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
-			  list);
+	do {
+		cpu = cpumask_next(cpu, cpu_possible_mask);
+		if (cpu >= nr_cpu_ids)
+			return NULL;
+	} while (!(ret = workqueue_stat_start_cpu(cpu)));
+
+	return ret;
 }
 
 static int workqueue_stat_show(struct seq_file *s, void *p)
 {
-	struct cpu_workqueue_stats *cws = p;
+	struct workfunc_stats *wfstat = p;
+	struct cpu_workqueue_stats *cws = wfstat->parent;
 	struct pid *pid;
 	struct task_struct *tsk;
 
-	pid = find_get_pid(cws->pid);
-	if (pid) {
-		tsk = get_pid_task(pid, PIDTYPE_PID);
-		if (tsk) {
-			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
-				   atomic_read(&cws->inserted), cws->executed,
-				   tsk->comm);
-			put_task_struct(tsk);
+	if (!wfstat->func) {
+		/* It is first dummy node, need to print workqueue info */
+		pid = find_get_pid(cws->pid);
+		if (pid) {
+			tsk = get_pid_task(pid, PIDTYPE_PID);
+			if (tsk) {
+				seq_printf(s, "%3d %6d     %6u       %s:%d\n",
+					   cws->cpu,
+					   atomic_read(&cws->inserted),
+					   cws->executed,
+					   tsk->comm,
+					   cws->pid);
+				put_task_struct(tsk);
+			}
+			put_pid(pid);
 		}
-		put_pid(pid);
+	} else {
+		/* It is effect node, need to print workfunc info */
+		int lastwf = list_is_last(&wfstat->list, &cws->workfunclist);
+		seq_printf(s, "%3d %6d     %6u        %c-%pF\n",
+			cws->cpu,
+			atomic_read(&wfstat->inserted),
+			wfstat->executed,
+			lastwf ? '`' : '|',
+			wfstat->func);
 	}
 
 	return 0;
@@ -205,7 +335,8 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
 
 static int workqueue_stat_headers(struct seq_file *s)
 {
-	seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
+	seq_printf(s, "# CPU  INSERTED  EXECUTED   TASKNAME:PID\n");
+	seq_printf(s, "# |      |         |        `-WORKFUNC\n");
 	seq_printf(s, "# |      |         |          |\n");
 	return 0;
 }
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame
  2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
                                           ` (3 preceding siblings ...)
  2009-04-20  7:09                         ` [PATCH 4/4] trace_workqueue: Add worklet information Zhaolei
@ 2009-04-20  7:23                         ` Frederic Weisbecker
  4 siblings, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-20  7:23 UTC (permalink / raw)
  To: Zhaolei
  Cc: KOSAKI Motohiro, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

On Mon, Apr 20, 2009 at 02:42:11PM +0800, Zhaolei wrote:
> Hello,
> 
> To make workqueuetrace support per-worklet's time mesurement(max execution,
> latecy), workqueue trace need to support per-worklet's stat information first.
> This patchset add them.
> 
> Before patch:
> # cat [debugfs]/tracing/trace_stat/workqueues
> # CPU  INSERTED  EXECUTED   NAME
> # |      |         |          |
> 
>   0    589        589       events/0
>   0      0          0       cpuset
>   0    656        656       khelper
>   0      0          0       netns
>   0    809        809       kblockd/0
>   0      0          0       kacpid
>   0      0          0       kacpi_notify
>   0      0          0       ata/0
>   0      0          0       ata_aux
>   0      0          0       ksuspend_usbd
>   0      0          0       aio/0
>   0      0          0       nfsiod
>   0      0          0       crypto/0
>   0      0          0       kpsmoused
>   0      0          0       kstriped
>   0      0          0       kondemand/0
>   0      0          0       usbhid_resumer
>   0     21         21       rpciod/0
> 
> After patch:
> # cat [debugfs]/tracing/trace_stat/workqueues
> # CPU  INSERTED  EXECUTED   TASKNAME:PID
> # |      |         |        `-WORKFUNC
> # |      |         |          |
>   0    273        273       events/0:5
>   0      1          1        |-reg_todo+0x0/0x648
>   0      4          4        |-check_corruption+0x0/0x2c
>   0      1          1        |-power_supply_changed_work+0x0/0x3c
>   0      6          6        |-console_callback+0x0/0x145
>   0    229        229        |-vmstat_update+0x0/0x33
>   0     12         12        |-do_cache_clean+0x0/0x6a
>   0      1          1        |-wq_barrier_func+0x0/0x12
>   0     15         15        |-mce_work_fn+0x0/0x31
>   0      3          3        |-rt_worker_func+0x0/0x2de
>   0      1          1        `-grace_ender+0x0/0x14
>   0      0          0       cpuset:6
>   0    659        659       khelper:7
>   0    659        659        `-__call_usermodehelper+0x0/0xa0
>   0      0          0       netns:11
>   0   1818       1818       kblockd/0:13
>   0   1115       1115        |-blk_unplug_work+0x0/0x1e
>   0    703        703        `-cfq_kick_queue+0x0/0x2f
>   0      0          0       kacpid:412
>   0      0          0       kacpi_notify:415
>   0      0          0       ata/0:463
>   0      0          0       ata_aux:509
>   0      0          0       ksuspend_usbd:515
>   0      0          0       aio/0:717
>   0      0          0       nfsiod:764
>   0      0          0       crypto/0:785
>   0      0          0       kpsmoused:1141
>   0      0          0       kstriped:1248
>   0      0          0       kondemand/0:1259
>   0      0          0       usbhid_resumer:1264
>   0     20         20       rpciod/0:1333
>   0      1          1        |-xs_tcp_connect_worker4+0x0/0x1b
>   0     19         19        `-xs_udp_connect_worker4+0x0/0x130



Oh, Nice!
This is an interesting view of worklets percentage/frequency of execution.
We could also have a file per workqueue, but anyway it seems to me a very good
first step!


> 
> After this step, we'll begin to add worklet time mesurement support to
> workqueuetrace.


Great!
Thanks.


> Thanks
> Zhaolei
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 1/4] trace_workqueue: use list_for_each_entry() instead of list_for_each_entry_safe()
  2009-04-20  6:58                         ` [PATCH 1/4] trace_workqueue: use list_for_each_entry() instead of list_for_each_entry_safe() Zhaolei
@ 2009-04-20  7:26                           ` Frederic Weisbecker
  0 siblings, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-20  7:26 UTC (permalink / raw)
  To: Zhaolei
  Cc: KOSAKI Motohiro, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

On Mon, Apr 20, 2009 at 02:58:26PM +0800, Zhaolei wrote:
> No need to use list_for_each_entry_safe() in iteration without delete node,
> we use list_for_each_entry() instead.
> 
> Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
> ---
>  kernel/trace/trace_workqueue.c |   10 ++++------
>  1 files changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
> index 984b917..934b27c 100644
> --- a/kernel/trace/trace_workqueue.c
> +++ b/kernel/trace/trace_workqueue.c
> @@ -47,12 +47,11 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
>  			  struct work_struct *work)
>  {
>  	int cpu = cpumask_first(&wq_thread->cpus_allowed);
> -	struct cpu_workqueue_stats *node, *next;
> +	struct cpu_workqueue_stats *node;
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> -	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
> -							list) {
> +	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
>  		if (node->pid == wq_thread->pid) {
>  			atomic_inc(&node->inserted);
>  			goto found;
> @@ -69,12 +68,11 @@ probe_workqueue_execution(struct task_struct *wq_thread,
>  			  struct work_struct *work)
>  {
>  	int cpu = cpumask_first(&wq_thread->cpus_allowed);
> -	struct cpu_workqueue_stats *node, *next;
> +	struct cpu_workqueue_stats *node;
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> -	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
> -							list) {
> +	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
>  		if (node->pid == wq_thread->pid) {
>  			node->executed++;
>  			goto found;


Thanks, looks good.

Acked-by: Frederic Weisbecker <fweisbec@gmail.com> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-20  1:49                                   ` KOSAKI Motohiro
@ 2009-04-20  8:46                                     ` Ingo Molnar
  2009-04-20 22:25                                       ` Oleg Nesterov
  0 siblings, 1 reply; 94+ messages in thread
From: Ingo Molnar @ 2009-04-20  8:46 UTC (permalink / raw)
  To: KOSAKI Motohiro, Andrew Morton, Oleg Nesterov
  Cc: Zhaolei, Frederic Weisbecker, Steven Rostedt, Tom Zanussi, linux-kernel


* KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> OK.
> 
> thus, I recoomend to pending 2 and 3. and change 1 as Ingo 
> suggested. if not, reviewer might forgot this thread.

Ok - but even the original order of patches would be fine - as long 
as the end result is agreed on and is reached.

I've asked Frederic (the original author of the workqueue tracer 
plugin) to collect these patches into a topic branch in his tree. 
Once it's a complete story we can pull it into tip:tracing/core.

Oleg seemed not to disagree and Andrew is in hiding.

Andrew, Oleg: if you plan to make unhappy noises about this level of 
instrumentation in the workqueue code, please do it sooner rather 
than later, as there's quite some effort injected into this already. 
A tentative non-NAK now (patches are still being sorted out) and an 
Ack on the final topic tree from you (once we send it and if it's 
good) and general happiness would be the ideal outcome :)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 4/4] trace_workqueue: Add worklet information
  2009-04-20  7:09                         ` [PATCH 4/4] trace_workqueue: Add worklet information Zhaolei
@ 2009-04-20 11:36                           ` Frederic Weisbecker
  2009-04-21  1:57                             ` Zhaolei
  0 siblings, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-20 11:36 UTC (permalink / raw)
  To: Zhaolei
  Cc: KOSAKI Motohiro, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

On Mon, Apr 20, 2009 at 03:09:13PM +0800, Zhaolei wrote:
> Now workqueue tracer can display per-worklet information as following:
>  # cat [debugfs]/tracing/trace_stat/workqueues
>  # CPU  INSERTED  EXECUTED   TASKNAME:PID
>  # |      |         |        `-WORKFUNC
>  # |      |         |          |
>  CPU  INSERTED  EXECUTED   TASKNAME:PID
>    0    157        157       events/0:5
>    0      1          1        |-reg_todo+0x0/0x648
>    0      3          3        |-check_corruption+0x0/0x2c
>    0      1          1        |-power_supply_changed_work+0x0/0x3c
>    0      2          2        |-console_callback+0x0/0x145
>    0      9          9        |-do_cache_clean+0x0/0x6a
>    0      1          1        |-wq_barrier_func+0x0/0x12
>    0    129        129        |-vmstat_update+0x0/0x33
>    0      8          8        |-mce_work_fn+0x0/0x31
>    0      1          1        |-grace_ender+0x0/0x14
>    0      2          2        `-rt_worker_func+0x0/0x2de
>    0      0          0       cpuset:6
>    0    656        656       khelper:7
>    0    656        656        `-__call_usermodehelper+0x0/0xa0
>    0      0          0       netns:11
>    0    592        592       kblockd/0:13
>    0     39         39        |-blk_unplug_work+0x0/0x1e
>    0    553        553        `-cfq_kick_queue+0x0/0x2f
>    0      0          0       kacpid:412
>    0      0          0       kacpi_notify:415
>    0      0          0       ata/0:463
>    0      0          0       ata_aux:509
>    0      0          0       ksuspend_usbd:515
>    0      0          0       aio/0:717
>    0      0          0       nfsiod:718
>    0      0          0       crypto/0:719
>    0      0          0       kpsmoused:1141
>    0      0          0       kstriped:1248
>    0      0          0       kondemand/0:1259
>    0      0          0       usbhid_resumer:1260
>    0     21         21       rpciod/0:1333
>    0      1          1        |-xs_tcp_connect_worker4+0x0/0x1b
>    0     19         19        |-xs_udp_connect_worker4+0x0/0x130
>    0      1          1        `-xprt_autoclose+0x0/0x2c
> 
> Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
> ---
>  kernel/trace/trace_workqueue.c |  219 ++++++++++++++++++++++++++++++++--------
>  1 files changed, 175 insertions(+), 44 deletions(-)
> 
> diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
> index 3393c95..c465dbc 100644
> --- a/kernel/trace/trace_workqueue.c
> +++ b/kernel/trace/trace_workqueue.c
> @@ -12,19 +12,33 @@
>  #include "trace_stat.h"
>  #include "trace.h"
>  
> +/* A callback function saved in struct work_struct->func */
> +struct cpu_workqueue_stats;
> +struct workfunc_stats {
> +	struct list_head		list;
> +	struct cpu_workqueue_stats	*parent;
> +
> +	/* Addr of worklet's callback function */
> +	work_func_t			func;
> +	/* Can be inserted from interrupt or user context, need to be atomic */
> +	atomic_t			inserted;
> +	/*
> +	 * Don't need to be atomic, works are serialized in a single workqueue
> +	 * thread on a single CPU.
> +	 */
> +	unsigned int			executed;
> +};
>  
>  /* A cpu workqueue thread */
>  struct cpu_workqueue_stats {
>  	struct list_head            list;
>  	int		            cpu;
>  	pid_t			    pid;
> -/* Can be inserted from interrupt or user context, need to be atomic */
> +	/* Refer to comments of struct workfunc_stats for detail */
>  	atomic_t	            inserted;
> -/*
> - *  Don't need to be atomic, works are serialized in a single workqueue thread
> - *  on a single CPU.
> - */
>  	unsigned int		    executed;
> +	/* list of struct workfunc_stats in this workqueue */
> +	struct list_head            workfunclist;
>  };
>  
>  /* List of workqueue threads on one cpu */
> @@ -39,6 +53,38 @@ struct workqueue_global_stats {
>  static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
>  #define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
>  
> +/*
> + * Update record when insert a work into workqueue
> + * Caller need to hold cpu_workqueue_stats spin_lock
> + */
> +int do_workqueue_insertion(struct cpu_workqueue_stats *cws,
> +			   struct work_struct *work)
> +{
> +	struct workfunc_stats *wfstat;
> +
> +	atomic_inc(&cws->inserted);
> +
> +	list_for_each_entry(wfstat, &cws->workfunclist, list)
> +		if (wfstat->func == work->func)
> +			goto found;
> +
> +	/* not found, first insert */
> +
> +	/* work can be inserted from interrupt */
> +	wfstat = kzalloc(sizeof(struct workfunc_stats), GFP_ATOMIC);
> +	if (!wfstat)
> +		return -ENOMEM;
> +
> +	wfstat->parent = cws;
> +	wfstat->func = work->func;
> +
> +	list_add_tail(&wfstat->list, &cws->workfunclist);
> +found:
> +	atomic_inc(&wfstat->inserted);
> +
> +	return 0;
> +}
> +
>  /* Insertion of a work */
>  static void
>  probe_workqueue_insertion(struct task_struct *wq_thread,
> @@ -51,7 +97,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>  	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
>  		if (node->pid == wq_thread->pid) {
> -			atomic_inc(&node->inserted);
> +			/* we ignore error of do_workqueue_insertion */
> +			do_workqueue_insertion(node, work);
>  			goto found;
>  		}
>  	}
> @@ -67,17 +114,30 @@ probe_workqueue_execution(struct task_struct *wq_thread,
>  {
>  	int cpu = cpumask_first(&wq_thread->cpus_allowed);
>  	struct cpu_workqueue_stats *node;
> +	struct workfunc_stats *wfnode;
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> -	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
> +
> +	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list)
>  		if (node->pid == wq_thread->pid) {
>  			node->executed++;
> -			goto found;
> +			goto found_wq;
>  		}
> -	}
> -	pr_debug("trace_workqueue: entry not found\n");
> -found:
> +	pr_debug("trace_workqueue: workqueue not found\n");
> +	goto end;
> +
> +found_wq:
> +	list_for_each_entry(wfnode, &node->workfunclist, list)
> +		if (wfnode->func == work->func) {
> +			wfnode->executed++;
> +			goto found_wf;
> +		}
> +	pr_debug("trace_workqueue: worklet not found\n");
> +	goto end;
> +
> +found_wf:
> +end:
>  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>  
> @@ -85,6 +145,7 @@ found:
>  static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
>  {
>  	struct cpu_workqueue_stats *cws;
> +	struct workfunc_stats *wfstat;
>  	unsigned long flags;
>  
>  	WARN_ON(cpu < 0);
> @@ -93,16 +154,37 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
>  	cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
>  	if (!cws) {
>  		pr_warning("trace_workqueue: not enough memory\n");
> -		return;
> +		goto err_alloc_cws;
>  	}
>  	INIT_LIST_HEAD(&cws->list);
>  	cws->cpu = cpu;
>  
>  	cws->pid = wq_thread->pid;
> +	INIT_LIST_HEAD(&cws->workfunclist);
> +
> +	/*
> +	 * add a dummy node to cpu_workqueue_stats->workfunclist to:
> +	 * 1: help print function to print header of work(ex, workqueue info)
> +	 * 2: used as a seq iter if no work exist in workqueue
> +	 */
> +	wfstat = kzalloc(sizeof(struct workfunc_stats), GFP_ATOMIC);
> +	if (!wfstat) {
> +		pr_warning("trace_workqueue: not enough memory\n");
> +		goto err_alloc_wfstat;
> +	}
> +	wfstat->parent = cws;
> +	list_add_tail(&wfstat->list, &cws->workfunclist);
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>  	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
>  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> +
> +	return;
> +
> +err_alloc_wfstat:
> +	kfree(cws);
> +err_alloc_cws:
> +	return;
>  }
>  
>  /* Destruction of a cpu workqueue thread */
> @@ -115,12 +197,22 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>  	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
> -							list) {
> -		if (node->pid == wq_thread->pid) {
> -			list_del(&node->list);
> -			kfree(node);
> -			goto found;
> +				 list) {
> +		struct workfunc_stats *wfstat, *wfstatnext;
> +
> +		if (node->pid != wq_thread->pid)
> +			continue;
> +
> +		list_for_each_entry_safe(wfstat, wfstatnext,
> +					 &node->workfunclist, list) {
> +			list_del(&wfstat->list);
> +			kfree(wfstat);
>  		}
> +
> +		list_del(&node->list);
> +		kfree(node);



Sidenote to me: I have to provide a way for a stat
tracer to wait for the end of a pending statistic
output session before the tracer comes to free any
of its exposed entries. Otherwise we could end up
with freed memory dereference.

May be waitqueue.



> +
> +		goto found;
>  	}
>  
>  	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
> @@ -129,17 +221,23 @@ found:
>  
>  }
>  
> -static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
> +static struct workfunc_stats *workqueue_stat_start_cpu(int cpu)
>  {
>  	unsigned long flags;
> -	struct cpu_workqueue_stats *ret = NULL;
> -
> +	struct workfunc_stats *ret = NULL;
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>  
> -	if (!list_empty(&workqueue_cpu_stat(cpu)->list))
> -		ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
> -				 struct cpu_workqueue_stats, list);
> +	if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
> +		struct cpu_workqueue_stats *cws;
> +		cws = list_entry(workqueue_cpu_stat(cpu)->list.next,
> +			struct cpu_workqueue_stats, list);
> +		/*
> +		 * cpu_workqueue_stats->workfunclist at least have a dummy node
> +		 */
> +		ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
> +			list);
> +	}
>  
>  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  
> @@ -161,43 +259,75 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
>  
>  static void *workqueue_stat_next(void *prev, int idx)
>  {
> -	struct cpu_workqueue_stats *prev_cws = prev;
> -	int cpu = prev_cws->cpu;
> +	struct workfunc_stats *prev_wfstat = prev;
> +	int cpu = prev_wfstat->parent->cpu;
>  	unsigned long flags;
>  	void *ret = NULL;
>  
>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> -	if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
> +
> +	if (!list_is_last(&prev_wfstat->list,
> +	    &prev_wfstat->parent->workfunclist)) {
> +		ret = list_entry(prev_wfstat->list.next, struct workfunc_stats,
> +			list);
>  		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> -		do {
> -			cpu = cpumask_next(cpu, cpu_possible_mask);
> -			if (cpu >= nr_cpu_ids)
> -				return NULL;
> -		} while (!(ret = workqueue_stat_start_cpu(cpu)));
>  		return ret;
>  	}
> +
> +	if (!list_is_last(&prev_wfstat->parent->list,
> +	    &workqueue_cpu_stat(cpu)->list)) {
> +		struct cpu_workqueue_stats *cws = list_entry(
> +			prev_wfstat->parent->list.next,
> +			struct cpu_workqueue_stats, list);
> +		ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
> +			list);
> +		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
> +		return ret;
> +	}
> +
>  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  
> -	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
> -			  list);
> +	do {
> +		cpu = cpumask_next(cpu, cpu_possible_mask);
> +		if (cpu >= nr_cpu_ids)


The above test will take the total number of cpus as
the last cpu number. This assumption may be false
if the possible cpus are not contiguous.

Perhaps you'd better use:


for_each_possible_cpu(cpu) {
	ret = workqueue_stat_start_cpu(cpu);
	if (ret)
		break;
}

After a quick look in cpumask.h it seems it will start
from the cpu which follows the one you give in parameter.
So it should be fine.



> +			return NULL;
> +	} while (!(ret = workqueue_stat_start_cpu(cpu)));
> +
> +	return ret;
>  }
>  
>  static int workqueue_stat_show(struct seq_file *s, void *p)
>  {
> -	struct cpu_workqueue_stats *cws = p;
> +	struct workfunc_stats *wfstat = p;
> +	struct cpu_workqueue_stats *cws = wfstat->parent;
>  	struct pid *pid;
>  	struct task_struct *tsk;
>  
> -	pid = find_get_pid(cws->pid);
> -	if (pid) {
> -		tsk = get_pid_task(pid, PIDTYPE_PID);
> -		if (tsk) {
> -			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
> -				   atomic_read(&cws->inserted), cws->executed,
> -				   tsk->comm);
> -			put_task_struct(tsk);
> +	if (!wfstat->func) {
> +		/* It is first dummy node, need to print workqueue info */
> +		pid = find_get_pid(cws->pid);
> +		if (pid) {
> +			tsk = get_pid_task(pid, PIDTYPE_PID);
> +			if (tsk) {
> +				seq_printf(s, "%3d %6d     %6u       %s:%d\n",
> +					   cws->cpu,
> +					   atomic_read(&cws->inserted),
> +					   cws->executed,
> +					   tsk->comm,
> +					   cws->pid);
> +				put_task_struct(tsk);
> +			}
> +			put_pid(pid);
>  		}
> -		put_pid(pid);
> +	} else {
> +		/* It is effect node, need to print workfunc info */
> +		int lastwf = list_is_last(&wfstat->list, &cws->workfunclist);
> +		seq_printf(s, "%3d %6d     %6u        %c-%pF\n",
> +			cws->cpu,
> +			atomic_read(&wfstat->inserted),
> +			wfstat->executed,
> +			lastwf ? '`' : '|',
> +			wfstat->func);
>  	}
>  
>  	return 0;
> @@ -205,7 +335,8 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
>  
>  static int workqueue_stat_headers(struct seq_file *s)
>  {
> -	seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
> +	seq_printf(s, "# CPU  INSERTED  EXECUTED   TASKNAME:PID\n");
> +	seq_printf(s, "# |      |         |        `-WORKFUNC\n");
>  	seq_printf(s, "# |      |         |          |\n");
>  	return 0;
>  }
> -- 



Looks very nice, like the rest of the series.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-17  7:15                         ` [PATCH v3 1/1] " Zhaolei
  2009-04-17 13:45                           ` Ingo Molnar
@ 2009-04-20 17:11                           ` Frederic Weisbecker
  2009-04-21  1:20                             ` KOSAKI Motohiro
  1 sibling, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-20 17:11 UTC (permalink / raw)
  To: Zhaolei
  Cc: KOSAKI Motohiro, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

On Fri, Apr 17, 2009 at 03:15:51PM +0800, Zhaolei wrote:
> v3: zhaolei@cn.fujitsu.com: Change TRACE_EVENT definition to new format
>     introduced by Steven Rostedt: consolidate trace and trace_event headers
> v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and zap
>     the work addr



Kosaki-san, may I add you Signed-off-by on this one?
Since you participated in this patch.

Thanks.

Frederic.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-20  8:46                                     ` Ingo Molnar
@ 2009-04-20 22:25                                       ` Oleg Nesterov
  2009-04-20 23:48                                         ` Frederic Weisbecker
  0 siblings, 1 reply; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-20 22:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: KOSAKI Motohiro, Andrew Morton, Zhaolei, Frederic Weisbecker,
	Steven Rostedt, Tom Zanussi, linux-kernel

On 04/20, Ingo Molnar wrote:
>
> Andrew, Oleg: if you plan to make unhappy noises about this level of
> instrumentation in the workqueue code, please do it sooner rather
> than later, as there's quite some effort injected into this already.
> A tentative non-NAK now (patches are still being sorted out) and an
> Ack on the final topic tree from you (once we send it and if it's
> good) and general happiness would be the ideal outcome :)

Imho, this info is useful, and the changes are fine.

But I didn't study kernel/trace/trace_workqueue.c yet... Sorry, I was
going to do this, but I didn't.

At first glance, with or without these new changes some parts of
trace_workqueue.c looks suspicious.

For example, I don't understand cpu_workqueue_stats->pid. Why it is
needed? Why can't we just record wq_thread itself? And if we copy
wq_thread->comm to cpu_workqueue_stats, we don't even need get/put
task_struct, afaics.

probe_workqueue_destruction() does cpumask_first(cwq->cpus_allowed),
this doesn't look right. When workqueue_cpu_callback() calls
cleanup_workqueue_thread(), wq_thread is no longer affine to CPU it
was created on. This means probe_workqueue_destruction() can't always
find the correct cpu_workqueue_stats in workqueue_cpu_stat(cpu), no?

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-20 22:25                                       ` Oleg Nesterov
@ 2009-04-20 23:48                                         ` Frederic Weisbecker
  2009-04-21 15:28                                           ` Oleg Nesterov
  0 siblings, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-20 23:48 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton, Zhaolei,
	Steven Rostedt, Tom Zanussi, linux-kernel

On Tue, Apr 21, 2009 at 12:25:14AM +0200, Oleg Nesterov wrote:
> On 04/20, Ingo Molnar wrote:
> >
> > Andrew, Oleg: if you plan to make unhappy noises about this level of
> > instrumentation in the workqueue code, please do it sooner rather
> > than later, as there's quite some effort injected into this already.
> > A tentative non-NAK now (patches are still being sorted out) and an
> > Ack on the final topic tree from you (once we send it and if it's
> > good) and general happiness would be the ideal outcome :)
> 
> Imho, this info is useful, and the changes are fine.
> 
> But I didn't study kernel/trace/trace_workqueue.c yet... Sorry, I was
> going to do this, but I didn't.
> 
> At first glance, with or without these new changes some parts of
> trace_workqueue.c looks suspicious.
> 
> For example, I don't understand cpu_workqueue_stats->pid. Why it is
> needed? Why can't we just record wq_thread itself? And if we copy
> wq_thread->comm to cpu_workqueue_stats, we don't even need get/put
> task_struct, afaics.


We record the pid because of a race condition that can happen
against the stat tracing framework.

For example,

- the user opens the stat file
- then the entries are loaded and sorted into memory
- one of the workqueues is destroyed
- the user reads the file. We try to get the task struct
  of each workqueues that were recorded using their pid.
  If the task is destroyed then this is fine, we don't get
  any struct. If we were using the task_struct as an identifier,
  me would have derefenced a freed pointer.

 
> probe_workqueue_destruction() does cpumask_first(cwq->cpus_allowed),
> this doesn't look right. When workqueue_cpu_callback() calls
> cleanup_workqueue_thread(), wq_thread is no longer affine to CPU it
> was created on. This means probe_workqueue_destruction() can't always
> find the correct cpu_workqueue_stats in workqueue_cpu_stat(cpu), no?


Ah, at this stage of cpu_down() the cpu is cleared on
tsk->cpu_allowed ?

Thanks for looking at this.

Frederic.


> Oleg.
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro
  2009-04-20 17:11                           ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
@ 2009-04-21  1:20                             ` KOSAKI Motohiro
  0 siblings, 0 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-21  1:20 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: kosaki.motohiro, Zhaolei, Steven Rostedt, Tom Zanussi,
	Ingo Molnar, linux-kernel, Oleg Nesterov, Andrew Morton

> On Fri, Apr 17, 2009 at 03:15:51PM +0800, Zhaolei wrote:
> > v3: zhaolei@cn.fujitsu.com: Change TRACE_EVENT definition to new format
> >     introduced by Steven Rostedt: consolidate trace and trace_event headers
> > v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and zap
> >     the work addr
> 
> Kosaki-san, may I add you Signed-off-by on this one?
> Since you participated in this patch.
> 
> Thanks.

Sure! thanks.





^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: Re: [PATCH 4/4] trace_workqueue: Add worklet information
  2009-04-20 11:36                           ` Frederic Weisbecker
@ 2009-04-21  1:57                             ` Zhaolei
  2009-04-21 23:39                               ` Frederic Weisbecker
  0 siblings, 1 reply; 94+ messages in thread
From: Zhaolei @ 2009-04-21  1:57 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: KOSAKI Motohiro, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

Frederic Weisbecker wrote:
...
>>  /* Destruction of a cpu workqueue thread */
>> @@ -115,12 +197,22 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
>>  
>>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>>  	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
>> -							list) {
>> -		if (node->pid == wq_thread->pid) {
>> -			list_del(&node->list);
>> -			kfree(node);
>> -			goto found;
>> +				 list) {
>> +		struct workfunc_stats *wfstat, *wfstatnext;
>> +
>> +		if (node->pid != wq_thread->pid)
>> +			continue;
>> +
>> +		list_for_each_entry_safe(wfstat, wfstatnext,
>> +					 &node->workfunclist, list) {
>> +			list_del(&wfstat->list);
>> +			kfree(wfstat);
>>  		}
>> +
>> +		list_del(&node->list);
>> +		kfree(node);
> 
> 
> 
> Sidenote to me: I have to provide a way for a stat
> tracer to wait for the end of a pending statistic
> output session before the tracer comes to free any
> of its exposed entries. Otherwise we could end up
> with freed memory dereference.
> 
> May be waitqueue.
> 
> 
> 
>> +
>> +		goto found;
>>  	}
>>  
>>  	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
>> @@ -129,17 +221,23 @@ found:
>>  
>>  }
>>  
>> -static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
>> +static struct workfunc_stats *workqueue_stat_start_cpu(int cpu)
>>  {
>>  	unsigned long flags;
>> -	struct cpu_workqueue_stats *ret = NULL;
>> -
>> +	struct workfunc_stats *ret = NULL;
>>  
>>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>>  
>> -	if (!list_empty(&workqueue_cpu_stat(cpu)->list))
>> -		ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
>> -				 struct cpu_workqueue_stats, list);
>> +	if (!list_empty(&workqueue_cpu_stat(cpu)->list)) {
>> +		struct cpu_workqueue_stats *cws;
>> +		cws = list_entry(workqueue_cpu_stat(cpu)->list.next,
>> +			struct cpu_workqueue_stats, list);
>> +		/*
>> +		 * cpu_workqueue_stats->workfunclist at least have a dummy node
>> +		 */
>> +		ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
>> +			list);
>> +	}
>>  
>>  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>>  
>> @@ -161,43 +259,75 @@ static void *workqueue_stat_start(struct tracer_stat *trace)
>>  
>>  static void *workqueue_stat_next(void *prev, int idx)
>>  {
>> -	struct cpu_workqueue_stats *prev_cws = prev;
>> -	int cpu = prev_cws->cpu;
>> +	struct workfunc_stats *prev_wfstat = prev;
>> +	int cpu = prev_wfstat->parent->cpu;
>>  	unsigned long flags;
>>  	void *ret = NULL;
>>  
>>  	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>> -	if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
>> +
>> +	if (!list_is_last(&prev_wfstat->list,
>> +	    &prev_wfstat->parent->workfunclist)) {
>> +		ret = list_entry(prev_wfstat->list.next, struct workfunc_stats,
>> +			list);
>>  		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>> -		do {
>> -			cpu = cpumask_next(cpu, cpu_possible_mask);
>> -			if (cpu >= nr_cpu_ids)
>> -				return NULL;
>> -		} while (!(ret = workqueue_stat_start_cpu(cpu)));
>>  		return ret;
>>  	}
>> +
>> +	if (!list_is_last(&prev_wfstat->parent->list,
>> +	    &workqueue_cpu_stat(cpu)->list)) {
>> +		struct cpu_workqueue_stats *cws = list_entry(
>> +			prev_wfstat->parent->list.next,
>> +			struct cpu_workqueue_stats, list);
>> +		ret = list_entry(cws->workfunclist.next, struct workfunc_stats,
>> +			list);
>> +		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>> +		return ret;
>> +	}
>> +
>>  	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>>  
>> -	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
>> -			  list);
>> +	do {
>> +		cpu = cpumask_next(cpu, cpu_possible_mask);
>> +		if (cpu >= nr_cpu_ids)
> 
Hello, Frederic

Thanks for your review and advice.

> 
> The above test will take the total number of cpus as
> the last cpu number. This assumption may be false
> if the possible cpus are not contiguous.
> 
> Perhaps you'd better use:
> 
> 
> for_each_possible_cpu(cpu) {
> 	ret = workqueue_stat_start_cpu(cpu);
> 	if (ret)
> 		break;
> }
IMHO, this code is to choose first cpu who have workqueue.
But we need to choose next cpu, may be we need to:

int nextcpu;
for_each_possible_cpu(nextcpu) {
	/* bypass prev cpus */
	if (nextcpu <= cpu)
		continue;

	ret = workqueue_stat_start_cpu(cpu);
	if (ret) {
		break;
	}
}

By looking cpumask.h, for_each_possible_cpu(nextcpu) always make nextcpu
increase, but if not(in future?), above code is wrong.

Thanks
Zhaolei


> 
> After a quick look in cpumask.h it seems it will start
> from the cpu which follows the one you give in parameter.
> So it should be fine.
> 
> 
> 
>> +			return NULL;
>> +	} while (!(ret = workqueue_stat_start_cpu(cpu)));
>> +
>> +	return ret;
>>  }
>>  
>>  static int workqueue_stat_show(struct seq_file *s, void *p)
>>  {
>> -	struct cpu_workqueue_stats *cws = p;
>> +	struct workfunc_stats *wfstat = p;
>> +	struct cpu_workqueue_stats *cws = wfstat->parent;
>>  	struct pid *pid;
>>  	struct task_struct *tsk;
>>  
>> -	pid = find_get_pid(cws->pid);
>> -	if (pid) {
>> -		tsk = get_pid_task(pid, PIDTYPE_PID);
>> -		if (tsk) {
>> -			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
>> -				   atomic_read(&cws->inserted), cws->executed,
>> -				   tsk->comm);
>> -			put_task_struct(tsk);
>> +	if (!wfstat->func) {
>> +		/* It is first dummy node, need to print workqueue info */
>> +		pid = find_get_pid(cws->pid);
>> +		if (pid) {
>> +			tsk = get_pid_task(pid, PIDTYPE_PID);
>> +			if (tsk) {
>> +				seq_printf(s, "%3d %6d     %6u       %s:%d\n",
>> +					   cws->cpu,
>> +					   atomic_read(&cws->inserted),
>> +					   cws->executed,
>> +					   tsk->comm,
>> +					   cws->pid);
>> +				put_task_struct(tsk);
>> +			}
>> +			put_pid(pid);
>>  		}
>> -		put_pid(pid);
>> +	} else {
>> +		/* It is effect node, need to print workfunc info */
>> +		int lastwf = list_is_last(&wfstat->list, &cws->workfunclist);
>> +		seq_printf(s, "%3d %6d     %6u        %c-%pF\n",
>> +			cws->cpu,
>> +			atomic_read(&wfstat->inserted),
>> +			wfstat->executed,
>> +			lastwf ? '`' : '|',
>> +			wfstat->func);
>>  	}
>>  
>>  	return 0;
>> @@ -205,7 +335,8 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
>>  
>>  static int workqueue_stat_headers(struct seq_file *s)
>>  {
>> -	seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
>> +	seq_printf(s, "# CPU  INSERTED  EXECUTED   TASKNAME:PID\n");
>> +	seq_printf(s, "# |      |         |        `-WORKFUNC\n");
>>  	seq_printf(s, "# |      |         |          |\n");
>>  	return 0;
>>  }
>> -- 
> 
> 
> 
> Looks very nice, like the rest of the series.
> 
> 



^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-20 23:48                                         ` Frederic Weisbecker
@ 2009-04-21 15:28                                           ` Oleg Nesterov
  2009-04-21 15:50                                             ` Oleg Nesterov
  2009-04-21 18:28                                             ` Frederic Weisbecker
  0 siblings, 2 replies; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-21 15:28 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton, Zhaolei,
	Steven Rostedt, Tom Zanussi, linux-kernel

On 04/21, Frederic Weisbecker wrote:
>
> On Tue, Apr 21, 2009 at 12:25:14AM +0200, Oleg Nesterov wrote:
> >
> > For example, I don't understand cpu_workqueue_stats->pid. Why it is
> > needed? Why can't we just record wq_thread itself? And if we copy
> > wq_thread->comm to cpu_workqueue_stats, we don't even need get/put
> > task_struct, afaics.
>
>
> We record the pid because of a race condition that can happen
> against the stat tracing framework.
>
> For example,
>
> - the user opens the stat file
> - then the entries are loaded and sorted into memory
> - one of the workqueues is destroyed

so, its pid is freed and can be re-used,

> - the user reads the file. We try to get the task struct
>   of each workqueues that were recorded using their pid.

if it is already re-used when workqueue_stat_show() is called, we
can get a wrong a wrong tsk.

Another problem with pid_t, it needs a costly find_get_pid(). And please
note that find_get_pid() uses current->nsproxy->pid_ns, this means we
can get a wrong tsk or NULL if we read debugfs from sub-namespace, but
this is minor.

Using "struct pid* pid" is better than "pid_t pid" and solves these
problems. But I still think we don't need it at all.

>   If the task is destroyed then this is fine, we don't get
>   any struct. If we were using the task_struct as an identifier,
>   me would have derefenced a freed pointer.

Please note I said "get/put task_struct" above. probe_workqueue_creation()
does get_task_struct(), and probe_workqueue_destruction() put_task_struct().

But, unless I missed something we don't need even this. Why do we
need to dereference this task_struct? The only reason afaics is
that we read tsk->comm. But we can copy wq_thread->comm to
cpu_workqueue_stats when probe_workqueue_creation() is called . In that
case we don't need get/put, we only use cws->task (or whatever) as a
"void *", just to check "if (node->task  == wq_thread)" in
probe_workqueue_insertion/etc.

But it is very possible I missed something, please correct me.

> > probe_workqueue_destruction() does cpumask_first(cwq->cpus_allowed),
> > this doesn't look right. When workqueue_cpu_callback() calls
> > cleanup_workqueue_thread(), wq_thread is no longer affine to CPU it
> > was created on. This means probe_workqueue_destruction() can't always
> > find the correct cpu_workqueue_stats in workqueue_cpu_stat(cpu), no?
>
>
> Ah, at this stage of cpu_down() the cpu is cleared on
> tsk->cpu_allowed ?

Yes. Well not exactly...

More precisely, at this stage tsk->cpu_allowed is not bound, it has
"all CPUS", == cpu_possible_mask. This means that cpumask_first()
returns the "random" value. The fix is simple, I think you should
just add "int cpu" argument to cleanup_workqueue_thread().


Hmm... why cpu_workqueue_stats->inserted is atomic_t? We increment it
in do_workqueue_insertion() (should be static, no?), and it is always
called with cpu_workqueue_struct->lock held.

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-21 15:28                                           ` Oleg Nesterov
@ 2009-04-21 15:50                                             ` Oleg Nesterov
  2009-04-21 18:33                                               ` Frederic Weisbecker
  2009-04-21 18:28                                             ` Frederic Weisbecker
  1 sibling, 1 reply; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-21 15:50 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton, Zhaolei,
	Steven Rostedt, Tom Zanussi, linux-kernel

On 04/21, Oleg Nesterov wrote:
>
> On 04/21, Frederic Weisbecker wrote:
> >
> > For example,
> >
> > - the user opens the stat file
> > - then the entries are loaded and sorted into memory
> > - one of the workqueues is destroyed

Hmm. And what protects cws when, say, workqueue_stat_show() is called?
Can't we race with probe_workqueue_destruction()->kfree() ?

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-21 15:28                                           ` Oleg Nesterov
  2009-04-21 15:50                                             ` Oleg Nesterov
@ 2009-04-21 18:28                                             ` Frederic Weisbecker
  2009-04-21 19:37                                               ` Oleg Nesterov
  1 sibling, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-21 18:28 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton, Zhaolei,
	Steven Rostedt, Tom Zanussi, linux-kernel

On Tue, Apr 21, 2009 at 05:28:43PM +0200, Oleg Nesterov wrote:
> On 04/21, Frederic Weisbecker wrote:
> >
> > On Tue, Apr 21, 2009 at 12:25:14AM +0200, Oleg Nesterov wrote:
> > >
> > > For example, I don't understand cpu_workqueue_stats->pid. Why it is
> > > needed? Why can't we just record wq_thread itself? And if we copy
> > > wq_thread->comm to cpu_workqueue_stats, we don't even need get/put
> > > task_struct, afaics.
> >
> >
> > We record the pid because of a race condition that can happen
> > against the stat tracing framework.
> >
> > For example,
> >
> > - the user opens the stat file
> > - then the entries are loaded and sorted into memory
> > - one of the workqueues is destroyed
> 
> so, its pid is freed and can be re-used,



Indeed.


 
> > - the user reads the file. We try to get the task struct
> >   of each workqueues that were recorded using their pid.
> 
> if it is already re-used when workqueue_stat_show() is called, we
> can get a wrong a wrong tsk.
> 
> Another problem with pid_t, it needs a costly find_get_pid(). And please
> note that find_get_pid() uses current->nsproxy->pid_ns, this means we
> can get a wrong tsk or NULL if we read debugfs from sub-namespace, but
> this is minor.
> 
> Using "struct pid* pid" is better than "pid_t pid" and solves these
> problems. But I still think we don't need it at all.



Ok, I think you're right. I will manage to remove the pid and
use the task_struct as the identifier.


 
> >   If the task is destroyed then this is fine, we don't get
> >   any struct. If we were using the task_struct as an identifier,
> >   me would have derefenced a freed pointer.
> 
> Please note I said "get/put task_struct" above. probe_workqueue_creation()
> does get_task_struct(), and probe_workqueue_destruction() put_task_struct().
> 
> But, unless I missed something we don't need even this. Why do we
> need to dereference this task_struct? The only reason afaics is
> that we read tsk->comm. But we can copy wq_thread->comm to
> cpu_workqueue_stats when probe_workqueue_creation() is called . In that
> case we don't need get/put, we only use cws->task (or whatever) as a
> "void *", just to check "if (node->task  == wq_thread)" in
> probe_workqueue_insertion/etc.
> 
> But it is very possible I missed something, please correct me.



No it's a very good idea!
Such a way to proceed will solve a lot of the racy issues that you are
reporting.


> > > probe_workqueue_destruction() does cpumask_first(cwq->cpus_allowed),
> > > this doesn't look right. When workqueue_cpu_callback() calls
> > > cleanup_workqueue_thread(), wq_thread is no longer affine to CPU it
> > > was created on. This means probe_workqueue_destruction() can't always
> > > find the correct cpu_workqueue_stats in workqueue_cpu_stat(cpu), no?
> >
> >
> > Ah, at this stage of cpu_down() the cpu is cleared on
> > tsk->cpu_allowed ?
> 
> Yes. Well not exactly...
> 
> More precisely, at this stage tsk->cpu_allowed is not bound, it has
> "all CPUS", == cpu_possible_mask. This means that cpumask_first()
> returns the "random" value. The fix is simple, I think you should
> just add "int cpu" argument to cleanup_workqueue_thread().
> 



Ok.
But where is it done? I mean, by looking at workqueue_cpu_callback()
in workqueue.c, I only see this part related to _cpu_down():

case CPU_POST_DEAD:
	cleanup_workqueue_thread(cwq);
	break;



> Hmm... why cpu_workqueue_stats->inserted is atomic_t? We increment it
> in do_workqueue_insertion() (should be static, no?), and it is always
> called with cpu_workqueue_struct->lock held.


Ah indeed. Good catch.

Your comments are very useful. I will fix all that on top of Zhaolei
patches.

Thanks a lot Oleg!

Frederic.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-21 15:50                                             ` Oleg Nesterov
@ 2009-04-21 18:33                                               ` Frederic Weisbecker
  0 siblings, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-21 18:33 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton, Zhaolei,
	Steven Rostedt, Tom Zanussi, linux-kernel

On Tue, Apr 21, 2009 at 05:50:32PM +0200, Oleg Nesterov wrote:
> On 04/21, Oleg Nesterov wrote:
> >
> > On 04/21, Frederic Weisbecker wrote:
> > >
> > > For example,
> > >
> > > - the user opens the stat file
> > > - then the entries are loaded and sorted into memory
> > > - one of the workqueues is destroyed
> 
> Hmm. And what protects cws when, say, workqueue_stat_show() is called?
> Can't we race with probe_workqueue_destruction()->kfree() ?
> 
> Oleg.
>


Indeed. I plan to implement a waitqueue on the stat tracing framework
to fix it (per tracer of course).

Thanks. 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints use TRACE_EVENT macro
  2009-04-21 18:28                                             ` Frederic Weisbecker
@ 2009-04-21 19:37                                               ` Oleg Nesterov
  0 siblings, 0 replies; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-21 19:37 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, KOSAKI Motohiro, Andrew Morton, Zhaolei,
	Steven Rostedt, Tom Zanussi, linux-kernel

On 04/21, Frederic Weisbecker wrote:
>
> On Tue, Apr 21, 2009 at 05:28:43PM +0200, Oleg Nesterov wrote:
> > More precisely, at this stage tsk->cpu_allowed is not bound, it has
> > "all CPUS", == cpu_possible_mask. This means that cpumask_first()
> > returns the "random" value. The fix is simple, I think you should
> > just add "int cpu" argument to cleanup_workqueue_thread().
>
> Ok.
> But where is it done? I mean, by looking at workqueue_cpu_callback()
> in workqueue.c, I only see this part related to _cpu_down():
>
> case CPU_POST_DEAD:
> 	cleanup_workqueue_thread(cwq);
> 	break;

There are two case. Please note that create_workqueue_thread() does not
bind the thread, we delay kthread_bind() untill start_workqueue_thread().
So, if we have CPU_UP_CANCELED (after CPU_UP_PREPARE) we call
cleanup_workqueue_thread() but the thread still has ->cpu_allowed ==
cpu_all_mask.

Another case is a normal cpu_down(). When CPU_POST_DEAD happens, this
CPU is really dead. All tasks were alredy moved from the dead cpu.
And since wq_thread was affine to that CPU, it gets the "full" cpu mask.

OK, I don't think it is possible to parse the text above ;) Please
look at  /* No more Mr. Nice Guy. */ in move_task_off_dead_cpu().

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: Re: [PATCH 4/4] trace_workqueue: Add worklet information
  2009-04-21  1:57                             ` Zhaolei
@ 2009-04-21 23:39                               ` Frederic Weisbecker
  0 siblings, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-21 23:39 UTC (permalink / raw)
  To: Zhaolei
  Cc: KOSAKI Motohiro, Steven Rostedt, Tom Zanussi, Ingo Molnar,
	linux-kernel, Oleg Nesterov, Andrew Morton

On Tue, Apr 21, 2009 at 09:57:02AM +0800, Zhaolei wrote:
> Hello, Frederic
> 
> Thanks for your review and advice.
> 
> > 
> > The above test will take the total number of cpus as
> > the last cpu number. This assumption may be false
> > if the possible cpus are not contiguous.
> > 
> > Perhaps you'd better use:
> > 
> > 
> > for_each_possible_cpu(cpu) {
> > 	ret = workqueue_stat_start_cpu(cpu);
> > 	if (ret)
> > 		break;
> > }
> IMHO, this code is to choose first cpu who have workqueue.
> But we need to choose next cpu, may be we need to:
> 
> int nextcpu;
> for_each_possible_cpu(nextcpu) {
> 	/* bypass prev cpus */
> 	if (nextcpu <= cpu)
> 		continue;
> 
> 	ret = workqueue_stat_start_cpu(cpu);
> 	if (ret) {
> 		break;
> 	}
> }
> 
> By looking cpumask.h, for_each_possible_cpu(nextcpu) always make nextcpu
> increase, but if not(in future?), above code is wrong.
> 
> Thanks
> Zhaolei
> 


Ok.

BTW, I've applied your patches in
git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git tracing/workqueue

Once I have some time, I will also address Oleg's comments.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-17 13:45                           ` Ingo Molnar
  2009-04-20  1:30                             ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints " Zhaolei
@ 2009-04-24 11:42                             ` Zhaolei
  2009-04-24 11:43                               ` [PATCH 1/4] workqueue_tracepoint: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry Zhaolei
                                                 ` (4 more replies)
  1 sibling, 5 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-24 11:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, linux-kernel, Oleg Nesterov, Andrew Morton

Ingo Molnar wrote:
> ...
> Basically, i'd suggest the following complete set of events instead:
> 
>  TRACE_EVENT(workqueue_create
>  TRACE_EVENT(workqueue_flush              /* NEW */
>  TRACE_EVENT(workqueue_destroy
> 
>  TRACE_EVENT(worklet_enqueue              /* NEW */
>  TRACE_EVENT(worklet_enqueue_delayed      /* NEW */
> 
>  TRACE_EVENT(worklet_execute              /* instead of workqueue_execution */
>  TRACE_EVENT(worklet_complete             /* NEW */
> 
>  TRACE_EVENT(worklet_cancel               /* NEW */
> 
> This allows the understanding of the life cycle of a workqueue and 
> of worklets that enter that workqueue. Note the distinction between 
> workqueue and worklet (work) - that is essential.
> 
> The parameters of the events are obvious, with one detail: i'd 
> suggest a 'cpu' parameter to the enqueue events, to allow the 
> mapping of the _on(..cpu) variants too.
> 
> I would not bother with schedule_on_each_cpu() instrumentation - 
> it's rarely used.
> 
> 	Ingo
> 
Hello, Ingo

These patchs add tracepoints for per-worklet tracing.
Now we have enough tracepoints to start makeing trace_workqueue.c support
worklet time mesurement.

Thanks
Zhaolei


^ permalink raw reply	[flat|nested] 94+ messages in thread

* [PATCH 1/4] workqueue_tracepoint: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry
  2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
@ 2009-04-24 11:43                               ` Zhaolei
  2009-04-24 11:45                               ` [PATCH 2/4] workqueue_tracepoint: Add workqueue_flush and worklet_cancel tracepoint Zhaolei
                                                 ` (3 subsequent siblings)
  4 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-24 11:43 UTC (permalink / raw)
  To: Zhaolei
  Cc: Ingo Molnar, KOSAKI Motohiro, Frederic Weisbecker,
	Steven Rostedt, Tom Zanussi, linux-kernel, Oleg Nesterov,
	Andrew Morton

From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Entry/exit handler pair is useful common tracepoint technique.
It can mesure handler consumption time.

Then, workqueue also handler-exit tracepoint and rename execution to
handler-entry.

Changlog:
v1: From KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
    introduce workqueue_handler_exit tracepoint
    rename workqueue_execution to workqueue_handler_entry
v2: From Zhao Lei <zhaolei@cn.fujitsu.com>
    Declare workqueue_handler_exit's work argument as void *, for we can't
    access its contents.
    Add workqueue_handler_entry's &work_struct back to TP_printk, to used as
    'key' for workqueue_handler_entry/exit pair.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki@jp.fujitsu.com>
---
 include/trace/events/workqueue.h |   36 ++++++++++++++++++++++++++++++------
 kernel/trace/trace_workqueue.c   |   10 +++++-----
 kernel/workqueue.c               |    3 ++-
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 035f1bf..36f4939 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -30,26 +30,50 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->thread_pid, __entry->func)
 );
 
-TRACE_EVENT(workqueue_execution,
+TRACE_EVENT(workqueue_handler_entry,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
 	TP_ARGS(wq_thread, work),
 
 	TP_STRUCT__entry(
-		__array(char,		thread_comm,	TASK_COMM_LEN)
-		__field(pid_t,		thread_pid)
-		__field(work_func_t,	func)
+		__array(char,			thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,			thread_pid)
+		__field(struct work_struct *,	work)
+		__field(work_func_t,		func)
 	),
 
 	TP_fast_assign(
 		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
 		__entry->thread_pid	= wq_thread->pid;
+		__entry->work		= work;
 		__entry->func		= work->func;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
-		__entry->thread_pid, __entry->func)
+	TP_printk("thread=%s:%d work=%p func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->work, __entry->func)
+);
+
+/* Declare work as void *, because we can't use work->... in after f(work) */
+TRACE_EVENT(workqueue_handler_exit,
+	TP_PROTO(struct task_struct *wq_thread, void *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm, TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+		__field(void *,	work)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->work		= work;
+	),
+
+	TP_printk("thread=%s:%d work=%p", __entry->thread_comm,
+		  __entry->thread_pid, __entry->work)
 );
 
 /* Trace the creation of one workqueue thread on a cpu */
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 7c632b9..8d26db6 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -109,7 +109,7 @@ found:
 
 /* Execution of a work */
 static void
-probe_workqueue_execution(struct task_struct *wq_thread,
+probe_workqueue_entry(struct task_struct *wq_thread,
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -373,13 +373,13 @@ int __init trace_workqueue_early_init(void)
 	if (ret)
 		goto out;
 
-	ret = register_trace_workqueue_execution(probe_workqueue_execution);
+	ret = register_trace_workqueue_handler_entry(probe_workqueue_entry);
 	if (ret)
 		goto no_insertion;
 
 	ret = register_trace_workqueue_creation(probe_workqueue_creation);
 	if (ret)
-		goto no_execution;
+		goto no_handler_entry;
 
 	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
 	if (ret)
@@ -394,8 +394,8 @@ int __init trace_workqueue_early_init(void)
 
 no_creation:
 	unregister_trace_workqueue_creation(probe_workqueue_creation);
-no_execution:
-	unregister_trace_workqueue_execution(probe_workqueue_execution);
+no_handler_entry:
+	unregister_trace_workqueue_handler_entry(probe_workqueue_entry);
 no_insertion:
 	unregister_trace_workqueue_insertion(probe_workqueue_insertion);
 out:
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0668795..fbcdfa2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -279,7 +279,6 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		 */
 		struct lockdep_map lockdep_map = work->lockdep_map;
 #endif
-		trace_workqueue_execution(cwq->thread, work);
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irq(&cwq->lock);
@@ -288,7 +287,9 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		work_clear_pending(work);
 		lock_map_acquire(&cwq->wq->lockdep_map);
 		lock_map_acquire(&lockdep_map);
+		trace_workqueue_handler_entry(cwq->thread, work);
 		f(work);
+		trace_workqueue_handler_exit(cwq->thread, work);
 		lock_map_release(&lockdep_map);
 		lock_map_release(&cwq->wq->lockdep_map);
 
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 2/4] workqueue_tracepoint: Add workqueue_flush and worklet_cancel tracepoint
  2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
  2009-04-24 11:43                               ` [PATCH 1/4] workqueue_tracepoint: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry Zhaolei
@ 2009-04-24 11:45                               ` Zhaolei
  2009-04-24 11:45                               ` [PATCH 3/4] workqueue_tracepoint: Change tracepoint name to fit worklet and workqueue lifecycle Zhaolei
                                                 ` (2 subsequent siblings)
  4 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-24 11:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, linux-kernel, Oleg Nesterov, Andrew Morton

Ingo Molnar <mingo@elte.hu> wrote:
> This allows the understanding of the life cycle of a workqueue and
> of worklets that enter that workqueue.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 include/trace/events/workqueue.h |   36 ++++++++++++++++++++++++++++++++++++
 kernel/workqueue.c               |    4 ++++
 2 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 36f4939..2e77e84 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -76,6 +76,23 @@ TRACE_EVENT(workqueue_handler_exit,
 		  __entry->thread_pid, __entry->work)
 );
 
+TRACE_EVENT(worklet_cancel,
+
+	TP_PROTO(struct work_struct *work),
+
+	TP_ARGS(work),
+
+	TP_STRUCT__entry(
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		__entry->func	= work->func;
+	),
+
+	TP_printk("func=%pF", __entry->func)
+);
+
 /* Trace the creation of one workqueue thread on a cpu */
 TRACE_EVENT(workqueue_creation,
 
@@ -99,6 +116,25 @@ TRACE_EVENT(workqueue_creation,
 		__entry->thread_pid, __entry->cpu)
 );
 
+TRACE_EVENT(workqueue_flush,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
 TRACE_EVENT(workqueue_destruction,
 
 	TP_PROTO(struct task_struct *wq_thread),
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fbcdfa2..99a81ba 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -366,6 +366,8 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 	int active = 0;
 	struct wq_barrier barr;
 
+	trace_workqueue_flush(cwq->thread);
+
 	WARN_ON(cwq->thread == current);
 
 	spin_lock_irq(&cwq->lock);
@@ -542,6 +544,8 @@ static int __cancel_work_timer(struct work_struct *work,
 {
 	int ret;
 
+	trace_worklet_cancel(work);
+
 	do {
 		ret = (timer && likely(del_timer(timer)));
 		if (!ret)
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 3/4] workqueue_tracepoint: Change tracepoint name to fit worklet and workqueue lifecycle
  2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
  2009-04-24 11:43                               ` [PATCH 1/4] workqueue_tracepoint: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry Zhaolei
  2009-04-24 11:45                               ` [PATCH 2/4] workqueue_tracepoint: Add workqueue_flush and worklet_cancel tracepoint Zhaolei
@ 2009-04-24 11:45                               ` Zhaolei
  2009-04-24 11:46                               ` [PATCH 4/4] workqueue_trace: Separate worklet_insertion into worklet_enqueue and worklet_enqueue_delayed Zhaolei
  2009-04-24 20:06                               ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Andrew Morton
  4 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-24 11:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, linux-kernel, Oleg Nesterov, Andrew Morton

Change list:
  workqueue_insertion->worklet_insertion
  workqueue_handler_entry->worklet_execute
  workqueue_handler_exit->worklet_complete

Tracepoint list after change:
  worklet_insertion
  worklet_execute
  worklet_complete
  worklet_cancel
  workqueue_creation
  workqueue_flush
  workqueue_destruction

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 include/trace/events/workqueue.h |    6 +++---
 kernel/trace/trace_workqueue.c   |   21 ++++++++++-----------
 kernel/workqueue.c               |    6 +++---
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 2e77e84..fb35cc7 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -8,7 +8,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM workqueue
 
-TRACE_EVENT(workqueue_insertion,
+TRACE_EVENT(worklet_insertion,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
@@ -30,7 +30,7 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->thread_pid, __entry->func)
 );
 
-TRACE_EVENT(workqueue_handler_entry,
+TRACE_EVENT(worklet_execute,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
 
@@ -55,7 +55,7 @@ TRACE_EVENT(workqueue_handler_entry,
 );
 
 /* Declare work as void *, because we can't use work->... in after f(work) */
-TRACE_EVENT(workqueue_handler_exit,
+TRACE_EVENT(worklet_complete,
 	TP_PROTO(struct task_struct *wq_thread, void *work),
 
 	TP_ARGS(wq_thread, work),
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 8d26db6..e3e1218 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -57,8 +57,8 @@ static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
  * Update record when insert a work into workqueue
  * Caller need to hold cpu_workqueue_stats spin_lock
  */
-int do_workqueue_insertion(struct cpu_workqueue_stats *cws,
-			   struct work_struct *work)
+int do_worklet_insertion(struct cpu_workqueue_stats *cws,
+			 struct work_struct *work)
 {
 	struct workfunc_stats *wfstat;
 
@@ -87,7 +87,7 @@ found:
 
 /* Insertion of a work */
 static void
-probe_workqueue_insertion(struct task_struct *wq_thread,
+probe_worklet_insertion(struct task_struct *wq_thread,
 			  struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -97,8 +97,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
-			/* we ignore error of do_workqueue_insertion */
-			do_workqueue_insertion(node, work);
+			/* we ignore error of do_worklet_insertion */
+			do_worklet_insertion(node, work);
 			goto found;
 		}
 	}
@@ -109,8 +109,7 @@ found:
 
 /* Execution of a work */
 static void
-probe_workqueue_entry(struct task_struct *wq_thread,
-			  struct work_struct *work)
+probe_worklet_execute(struct task_struct *wq_thread, struct work_struct *work)
 {
 	int cpu = cpumask_first(&wq_thread->cpus_allowed);
 	struct cpu_workqueue_stats *node;
@@ -369,11 +368,11 @@ int __init trace_workqueue_early_init(void)
 {
 	int ret, cpu;
 
-	ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
+	ret = register_trace_worklet_insertion(probe_worklet_insertion);
 	if (ret)
 		goto out;
 
-	ret = register_trace_workqueue_handler_entry(probe_workqueue_entry);
+	ret = register_trace_worklet_execute(probe_worklet_execute);
 	if (ret)
 		goto no_insertion;
 
@@ -395,9 +394,9 @@ int __init trace_workqueue_early_init(void)
 no_creation:
 	unregister_trace_workqueue_creation(probe_workqueue_creation);
 no_handler_entry:
-	unregister_trace_workqueue_handler_entry(probe_workqueue_entry);
+	unregister_trace_worklet_execute(probe_worklet_execute);
 no_insertion:
-	unregister_trace_workqueue_insertion(probe_workqueue_insertion);
+	unregister_trace_worklet_insertion(probe_worklet_insertion);
 out:
 	pr_warning("trace_workqueue: unable to trace workqueues\n");
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 99a81ba..a3c9848 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -128,7 +128,7 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head)
 {
-	trace_workqueue_insertion(cwq->thread, work);
+	trace_worklet_insertion(cwq->thread, work);
 
 	set_wq_data(work, cwq);
 	/*
@@ -287,9 +287,9 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		work_clear_pending(work);
 		lock_map_acquire(&cwq->wq->lockdep_map);
 		lock_map_acquire(&lockdep_map);
-		trace_workqueue_handler_entry(cwq->thread, work);
+		trace_worklet_execute(cwq->thread, work);
 		f(work);
-		trace_workqueue_handler_exit(cwq->thread, work);
+		trace_worklet_complete(cwq->thread, work);
 		lock_map_release(&lockdep_map);
 		lock_map_release(&cwq->wq->lockdep_map);
 
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* [PATCH 4/4] workqueue_trace: Separate worklet_insertion into worklet_enqueue and worklet_enqueue_delayed
  2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
                                                 ` (2 preceding siblings ...)
  2009-04-24 11:45                               ` [PATCH 3/4] workqueue_tracepoint: Change tracepoint name to fit worklet and workqueue lifecycle Zhaolei
@ 2009-04-24 11:46                               ` Zhaolei
  2009-04-24 20:06                               ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Andrew Morton
  4 siblings, 0 replies; 94+ messages in thread
From: Zhaolei @ 2009-04-24 11:46 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: KOSAKI Motohiro, Frederic Weisbecker, Steven Rostedt,
	Tom Zanussi, linux-kernel, Oleg Nesterov, Andrew Morton

To get more information about delayed work.
Suggest by Ingo Molnar <mingo@elte.hu>
> i'd suggest the following complete set of events instead:
> TRACE_EVENT(worklet_enqueue              /* NEW */
> TRACE_EVENT(worklet_enqueue_delayed      /* NEW */
> i'd suggest a 'cpu' parameter to the enqueue events, to allow the
> mapping of the _on(..cpu) variants too.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 include/trace/events/workqueue.h |   54 ++++++++++++++++++++++++++++++++-----
 kernel/trace/trace_workqueue.c   |   41 +++++++++++++++++++---------
 kernel/workqueue.c               |   19 ++++++++++---
 3 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index fb35cc7..e4c74f2 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -8,26 +8,64 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM workqueue
 
-TRACE_EVENT(worklet_insertion,
+TRACE_EVENT(worklet_enqueue,
 
-	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+	TP_PROTO(
+		struct task_struct *wq_thread,
+		struct work_struct *work,
+		int cpu
+	),
 
-	TP_ARGS(wq_thread, work),
+	TP_ARGS(wq_thread, work, cpu),
 
 	TP_STRUCT__entry(
-		__array(char,		thread_comm,	TASK_COMM_LEN)
-		__field(pid_t,		thread_pid)
-		__field(work_func_t,	func)
+		__array(char,			thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,			thread_pid)
+		__field(work_func_t,		func)
+		__field(int,			cpu)
 	),
 
 	TP_fast_assign(
 		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
 		__entry->thread_pid	= wq_thread->pid;
 		__entry->func		= work->func;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d func=%pF cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->func, __entry->cpu)
+);
+
+TRACE_EVENT(worklet_enqueue_delayed,
+
+	TP_PROTO(
+		struct task_struct *wq_thread,
+		struct work_struct *work,
+		int cpu,
+		unsigned long delay
+	),
+
+	TP_ARGS(wq_thread, work, cpu, delay),
+
+	TP_STRUCT__entry(
+		__array(char,			thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,			thread_pid)
+		__field(work_func_t,		func)
+		__field(int,			cpu)
+		__field(unsigned long,		delay)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+		__entry->cpu		= cpu;
+		__entry->delay		= delay;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
-		__entry->thread_pid, __entry->func)
+	TP_printk("thread=%s:%d func=%pF cpu=%d delay=%lu",
+		__entry->thread_comm, __entry->thread_pid, __entry->func,
+		__entry->cpu, __entry->delay)
 );
 
 TRACE_EVENT(worklet_execute,
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index e3e1218..69ae4aa 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
  * Update record when insert a work into workqueue
  * Caller need to hold cpu_workqueue_stats spin_lock
  */
-int do_worklet_insertion(struct cpu_workqueue_stats *cws,
+int do_worklet_enqueue(struct cpu_workqueue_stats *cws,
 			 struct work_struct *work)
 {
 	struct workfunc_stats *wfstat;
@@ -85,26 +85,34 @@ found:
 	return 0;
 }
 
-/* Insertion of a work */
+/* Enqueue of a work */
 static void
-probe_worklet_insertion(struct task_struct *wq_thread,
-			  struct work_struct *work)
+probe_worklet_enqueue(struct task_struct *wq_thread, struct work_struct *work,
+		      int cpu)
 {
-	int cpu = cpumask_first(&wq_thread->cpus_allowed);
+	int wqcpu = cpumask_first(&wq_thread->cpus_allowed);
 	struct cpu_workqueue_stats *node;
 	unsigned long flags;
 
-	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-	list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
+	spin_lock_irqsave(&workqueue_cpu_stat(wqcpu)->lock, flags);
+	list_for_each_entry(node, &workqueue_cpu_stat(wqcpu)->list, list) {
 		if (node->pid == wq_thread->pid) {
 			/* we ignore error of do_worklet_insertion */
-			do_worklet_insertion(node, work);
+			do_worklet_enqueue(node, work);
 			goto found;
 		}
 	}
 	pr_debug("trace_workqueue: entry not found\n");
 found:
-	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(wqcpu)->lock, flags);
+}
+
+static void
+probe_worklet_enqueue_delayed(struct task_struct *wq_thread,
+			      struct work_struct *work, int cpu,
+			      unsigned long delay)
+{
+	probe_worklet_enqueue(wq_thread, work, cpu);
 }
 
 /* Execution of a work */
@@ -368,13 +376,18 @@ int __init trace_workqueue_early_init(void)
 {
 	int ret, cpu;
 
-	ret = register_trace_worklet_insertion(probe_worklet_insertion);
+	ret = register_trace_worklet_enqueue(probe_worklet_enqueue);
 	if (ret)
 		goto out;
 
+	ret = register_trace_worklet_enqueue_delayed(
+		probe_worklet_enqueue_delayed);
+	if (ret)
+		goto no_enqueue;
+
 	ret = register_trace_worklet_execute(probe_worklet_execute);
 	if (ret)
-		goto no_insertion;
+		goto no_enqueue_delayed;
 
 	ret = register_trace_workqueue_creation(probe_workqueue_creation);
 	if (ret)
@@ -395,8 +408,10 @@ no_creation:
 	unregister_trace_workqueue_creation(probe_workqueue_creation);
 no_handler_entry:
 	unregister_trace_worklet_execute(probe_worklet_execute);
-no_insertion:
-	unregister_trace_worklet_insertion(probe_worklet_insertion);
+no_enqueue_delayed:
+	unregister_trace_worklet_enqueue_delayed(probe_worklet_enqueue_delayed);
+no_enqueue:
+	unregister_trace_worklet_enqueue(probe_worklet_enqueue);
 out:
 	pr_warning("trace_workqueue: unable to trace workqueues\n");
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a3c9848..0cc14b9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -128,8 +128,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head)
 {
-	trace_worklet_insertion(cwq->thread, work);
-
 	set_wq_data(work, cwq);
 	/*
 	 * Ensure that we get the right work->data if we see the
@@ -188,8 +186,12 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 	int ret = 0;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+		struct cpu_workqueue_struct *cwq;
+
 		BUG_ON(!list_empty(&work->entry));
-		__queue_work(wq_per_cpu(wq, cpu), work);
+		cwq = wq_per_cpu(wq, cpu);
+		__queue_work(cwq, work);
+		trace_worklet_enqueue(cwq->thread, work, cpu);
 		ret = 1;
 	}
 	return ret;
@@ -251,10 +253,17 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
 
-		if (unlikely(cpu >= 0))
+		if (unlikely(cpu >= 0)) {
 			add_timer_on(timer, cpu);
-		else
+			trace_worklet_enqueue_delayed(
+				wq_per_cpu(wq, cpu)->thread, work, cpu, delay);
+		} else {
 			add_timer(timer);
+			trace_worklet_enqueue_delayed(
+				wq_per_cpu(wq, raw_smp_processor_id())->thread,
+				work, cpu, delay);
+		}
+
 		ret = 1;
 	}
 	return ret;
-- 
1.5.5.3



^ permalink raw reply related	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
                                                 ` (3 preceding siblings ...)
  2009-04-24 11:46                               ` [PATCH 4/4] workqueue_trace: Separate worklet_insertion into worklet_enqueue and worklet_enqueue_delayed Zhaolei
@ 2009-04-24 20:06                               ` Andrew Morton
  2009-04-24 22:59                                 ` Frederic Weisbecker
  2009-04-28 17:24                                 ` Frank Ch. Eigler
  4 siblings, 2 replies; 94+ messages in thread
From: Andrew Morton @ 2009-04-24 20:06 UTC (permalink / raw)
  To: Zhaolei
  Cc: mingo, kosaki.motohiro, fweisbec, rostedt, tzanussi, linux-kernel, oleg

On Fri, 24 Apr 2009 19:42:19 +0800
Zhaolei <zhaolei@cn.fujitsu.com> wrote:

> These patchs add tracepoints for per-worklet tracing.
> Now we have enough tracepoints to start makeing trace_workqueue.c support
> worklet time mesurement.

I'm not seing anywhere in this patchset a description of the user
interface.  What does the operator/programmer actually get to see from
the kernel as a result of these changes?

A complete usage example would be an appropriate way of communicating
all of this.

The patches introduce a moderate amount of tracing-specific hooks into
the core workqueue code, which inevitably increases the maintenance
load for that code.  It is important that it be demonstrated that the
benefts of the code are worth that cost.  Hence it is important that
these benefits be demonstrated to us, by yourself.  Please.

Another way of looking at it: which previously-encountered problems
would this facility have helped us to solve?  How will this facility
help us to solve problems in the future?  Looking at this patch series
I cannot answer those questions!



^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-24 20:06                               ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Andrew Morton
@ 2009-04-24 22:59                                 ` Frederic Weisbecker
  2009-04-24 23:20                                   ` Andrew Morton
  2009-04-24 23:27                                   ` Frederic Weisbecker
  2009-04-28 17:24                                 ` Frank Ch. Eigler
  1 sibling, 2 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-24 22:59 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Zhaolei, mingo, kosaki.motohiro, rostedt, tzanussi, linux-kernel, oleg

On Fri, Apr 24, 2009 at 01:06:16PM -0700, Andrew Morton wrote:
> On Fri, 24 Apr 2009 19:42:19 +0800
> Zhaolei <zhaolei@cn.fujitsu.com> wrote:
> 
> > These patchs add tracepoints for per-worklet tracing.
> > Now we have enough tracepoints to start makeing trace_workqueue.c support
> > worklet time mesurement.
> 
> I'm not seing anywhere in this patchset a description of the user
> interface.  What does the operator/programmer actually get to see from
> the kernel as a result of these changes?
> 
> A complete usage example would be an appropriate way of communicating
> all of this.
> 
> The patches introduce a moderate amount of tracing-specific hooks into
> the core workqueue code, which inevitably increases the maintenance
> load for that code.  It is important that it be demonstrated that the
> benefts of the code are worth that cost.  Hence it is important that
> these benefits be demonstrated to us, by yourself.  Please.
> 
> Another way of looking at it: which previously-encountered problems
> would this facility have helped us to solve?  How will this facility
> help us to solve problems in the future?  Looking at this patch series
> I cannot answer those questions!
> 


Hi Andrew,

Although I'm not the author of this patchset, I'm somewhat
involved in the workqueue tracer and I would like to express
my opinion on what is happening.

Until recently, the workqueue tracer was a basic machine.
It was designed to trace the workqueue level. We were not yet
thinking about the worklet level:

- creation of a workqueue thread
- cleanup/destruction of these threads
- insertion of a work in a workqueue
- execution of this work

The output looked like such following histogram:

#   CPU   INSERTED  EXECUTED   WORKQUEUE THREAD
#
      1    125        125       reiserfs/1
      1      0          0       scsi_tgtd/1
      1      0          0       aio/1
      1      0          0       ata/1
      1    114        114       kblockd/1
      1      0          0       kintegrityd/1
      1   2147       2147       events/1
    
      0      0          0       kpsmoused
      0    105        105       reiserfs/0
      0      0          0       scsi_tgtd/0
      0      0          0       aio/0
      0      0          0       ata_aux
      0      0          0       ata/0
      0      0          0       cqueue
      0      0          0       kacpi_notify
      0      0          0       kacpid
      0    149        149       kblockd/0
      0      0          0       kintegrityd/0
      0   1000       1000       khelper
      0   2270       2270       events/0


Its purpose and the information it gave was limited, though
somehow useful.
It was able to provide some informations about the frequency
of the works inserted.

Why that? Because it pushed some useful informations to help a
kernel developer when it's time to decide how to defer async jobs.
Should he use the traditional and whole purposed events workqueues
or a devoted workqueue?

The decision has to be made relatively to:

- the workload it induces. If we might insert too much
  works at a time, we can create unacceptable latencies
  that may delay other queued works. Events/%d are such
  fragile pieces.

- The workload is not only about work insertion frequency
  but also about how much time is spent to process a work.
  Could it be trying to perform too much things which involve
  too much cpu time? Could it sleep for too much time, waiting
  for an event, a very contended lock or something?

- Another kind of reasons that may encumber the events and
  create bad latencies.

After reading this list of requirements, I guess you noticed
that only capturing the frequency of inserted events is only
one variable that composes the workload recipe.
Not only is it giving incomplete information, but it is also
providing erroneous datas to the user which might focuse
on these frequencies when they become large whereas a single work
can create more burden than hundreds of others.


Then, Zhaolei and Kosaki Motohiro came up recently with a good
update for this tracer by posting a patchset which integrates
the per-worklet tracing dimension.
And the output on my laptop became the following:

$ cat /debug/tracing/trace_stat/workqueues 
# CPU  INSERTED  EXECUTED   TASKNAME:PID
# |      |         |        `-WORKFUNC
# |      |         |          |
  0   3822       3898       events/0:9
  0      1          1        |-hpet_work+0x0/0x340
  0     47         46        |-rt_worker_func+0x0/0x400
  0   2655       2654        |-vmstat_update+0x0/0x40
  0      1          1        |-power_supply_changed_work+0x0/0x60
  0    212        212        |-fb_flashcursor+0x0/0x3c0
  0     80         80        |-test_work+0x0/0x64
  0     10          9        |-rekey_seq_generator+0x0/0x70
  0      8          7        |-lru_add_drain_per_cpu+0x0/0x10
  0    455        455        |-console_callback+0x0/0x1e0
  0    249        249        |-flush_to_ldisc+0x0/0x2f0
  0      1          1        |-atkbd_event_work+0x0/0xb0
  0    103         96        `-dst_gc_task+0x0/0x340
  0      0          0       cpuset:11
  0   1378       1378       khelper:12
  0   1378       1378        `-__call_usermodehelper+0x0/0xe0
  0      0          0       kintegrityd/0:178
  0    333        333       kblockd/0:184
  0    233        233        |-blk_unplug_work+0x0/0x110
  0    100        100        `-cfq_kick_queue+0x0/0x50
  0      0          0       kacpid:190
  0      0          0       kacpi_notify:191
  0      0          0       tifm:498
  0   8048       8048       ata/0:507
  0   8048       8048        `-ata_pio_task+0x0/0x280
  0      0          0       ata_aux:511
  0      0          0       aio/0:871
  0      0          0       crypto/0:897
  0      0          0       scsi_tgtd/0:2368
  0      0          0       iscsi_eh:2387
  0      0          0       kpsmoused:2521
  0      0          0       hd-audio0:2660
  0      0          0       krxrpcd/0:2745
  0     59         59       reiserfs/0:2895
  0     59         59        `-flush_async_commits+0x0/0x90
  1   5480       5503       events/1:10
  1      1          1        |-hpet_work+0x0/0x340
  1   2819       2818        |-vmstat_update+0x0/0x40
  1    464        464        |-console_callback+0x0/0x1e0
  1     80         80        |-test_work+0x0/0x64
  1      1          1        |-sysfs_add_func+0x0/0xb0
  1      8          8        |-lru_add_drain_per_cpu+0x0/0x10
  1      1          1        |-sync_cmos_clock+0x0/0x140
  1   1823       1823        |-fb_flashcursor+0x0/0x3c0
  1    282        282        |-flush_to_ldisc+0x0/0x2f0
  1      1          1        `-atkbd_event_work+0x0/0xb0
  1      0          0       kintegrityd/1:181
  1    254        254       kblockd/1:185
  1    148        148        |-blk_unplug_work+0x0/0x110
  1    106        106        `-cfq_kick_queue+0x0/0x50
  1  10988      10988       ata/1:509
  1  10988      10988        `-ata_pio_task+0x0/0x280
  1      0          0       aio/1:878
  1      0          0       crypto/1:898
  1      0          0       scsi_tgtd/1:2370
  1      0          0       krxrpcd/1:2746
  1     25         25       reiserfs/1:2896
  1     25         25        `-flush_async_commits+0x0/0x90


The trace has evolved by decomposing these frequency stats
per-worklet. Not only does it help for a driver writer to
instrument its own async jobs, but also it can help
any developer to find the culprit source of a latency
issued from a workqueue.

It can at least show a first direction of investigation, because
we are still only talking about frequency.
And speaking about incomplete informations, this output is only
a step before the main goal which is to provide the average and the
maximum time of execution for each worklet.


# CPU  INSERTED  EXECUTED  AVG TIME  MAX TIME	 TASKNAME:PID
# |      |         |          |         |        `-WORKFUNC
# |      |         |          |         |          |
  0   3822       3898        100 us    200us       events/0:9
  0      1          1        12 us     15us        |-hpet_work+0x0/0x340
  0     47         46        10 us     45us        |-rt_worker_func+0x0/0x400


With that we'll have much more indications to start a latency investigation.
But to get this end result, we needed the changes done on this latest patchset
from Zhaolei and Kosaki which, in the middle of other useful changes, turns
the workqueue_execution event into a new dichotomous capture with:

- trace_work_execute
- trace_work_complete

It was a required piece to perform the time of execution measurement.

Also, as I said, the statistical/histogram output is only the first step
of any worklet instrumentation. Once we focuse in a specific culprit,
we can trace all of its lifecycle using the trace events:
insertion, execution, completion, delayed insertion, cancelling, and so on.

All these events provide all the required fine grained instrumentation
to learn any worklet lifecycle details. Combining with well chosen events
toggling and event's fields filtering, we can focus on the only relevant
events at the time. 

Example of such trace:

# cd /debug/tracing

# ls events/workqueue/
filter          worklet_complete  worklet_enqueue_delayed  workqueue_creation     workqueue_flush
worklet_cancel  worklet_enqueue   worklet_execute          workqueue_destruction

# for e in $(ls events/workqueue); do echo 1 > events/workqueue/$e/enable; done

# tail -f trace

 events/1-10    [001]  4687.187314: worklet_execute: thread=events/1:10 work=ffff880001b27200 func=vmstat_update+0x0/0x40
 events/1-10    [001]  4687.187330: worklet_enqueue_delayed: thread=events/1:10 func=vmstat_update+0x0/0x40 cpu=-1 delay=250
 events/1-10    [001]  4687.187331: worklet_complete: thread=events/1:10 work=ffff880001b27200
 events/0-9     [000]  4687.250236: worklet_execute: thread=events/0:9 work=ffff880001b0c200 func=vmstat_update+0x0/0x40
 events/0-9     [000]  4687.250250: worklet_enqueue_delayed: thread=events/0:9 func=vmstat_update+0x0/0x40 cpu=-1 delay=237
 events/0-9     [000]  4687.250251: worklet_complete: thread=events/0:9 work=ffff880001b0c200

If more details are needed, such events could fit nicely inside with a
function graph tracer output.

So this latest patchset provides all these required informations on the events
tracing level. The next step is likely to be on the stat tracing to provide the avg/max time
of execution.


Thanks,
Frederic.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-24 22:59                                 ` Frederic Weisbecker
@ 2009-04-24 23:20                                   ` Andrew Morton
  2009-04-25  0:37                                     ` Frederic Weisbecker
  2009-04-24 23:27                                   ` Frederic Weisbecker
  1 sibling, 1 reply; 94+ messages in thread
From: Andrew Morton @ 2009-04-24 23:20 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: zhaolei, mingo, kosaki.motohiro, rostedt, tzanussi, linux-kernel, oleg

On Sat, 25 Apr 2009 00:59:10 +0200
Frederic Weisbecker <fweisbec@gmail.com> wrote:

> 
> [useful info]
>

OK, thanks.  It was waaaay more useful than the original description.

> So this latest patchset provides all these required informations on the events
> tracing level.

Well.. required by who?

I don't recall ever seeing any problems of this nature, nor patches to
solve any such problems.  If someone wants to get down and optimise our use
of workqueues then good for them, but that exercise doesn't require the
permanent addition of large amounts of code to the kernel.

The same amount of additional code and additional churn could be added
to probably tens of core kernel subsystems, but what _point_ is there
to all this?  Who is using it, what problems are they solving?

We keep on adding all these fancy debug gizmos to the core kernel which look
like they will be used by one person, once.  If that!

> The next step is likely to be on the stat tracing to provide the avg/max time
> of execution.

eek.

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-24 22:59                                 ` Frederic Weisbecker
  2009-04-24 23:20                                   ` Andrew Morton
@ 2009-04-24 23:27                                   ` Frederic Weisbecker
  1 sibling, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-24 23:27 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Zhaolei, mingo, kosaki.motohiro, rostedt, tzanussi, linux-kernel, oleg

On Sat, Apr 25, 2009 at 12:59:10AM +0200, Frederic Weisbecker wrote:
> On Fri, Apr 24, 2009 at 01:06:16PM -0700, Andrew Morton wrote:
> > On Fri, 24 Apr 2009 19:42:19 +0800
> > Zhaolei <zhaolei@cn.fujitsu.com> wrote:
> > 
> > > These patchs add tracepoints for per-worklet tracing.
> > > Now we have enough tracepoints to start makeing trace_workqueue.c support
> > > worklet time mesurement.
> > 
> > I'm not seing anywhere in this patchset a description of the user
> > interface.  What does the operator/programmer actually get to see from
> > the kernel as a result of these changes?
> > 
> > A complete usage example would be an appropriate way of communicating
> > all of this.
> > 
> > The patches introduce a moderate amount of tracing-specific hooks into
> > the core workqueue code, which inevitably increases the maintenance
> > load for that code.  It is important that it be demonstrated that the
> > benefts of the code are worth that cost.  Hence it is important that
> > these benefits be demonstrated to us, by yourself.  Please.
> > 
> > Another way of looking at it: which previously-encountered problems
> > would this facility have helped us to solve?  How will this facility
> > help us to solve problems in the future?  Looking at this patch series
> > I cannot answer those questions!
> > 
> 
> 
> Hi Andrew,
> 
> Although I'm not the author of this patchset, I'm somewhat
> involved in the workqueue tracer and I would like to express
> my opinion on what is happening.
> 
> Until recently, the workqueue tracer was a basic machine.
> It was designed to trace the workqueue level. We were not yet
> thinking about the worklet level:
> 
> - creation of a workqueue thread
> - cleanup/destruction of these threads
> - insertion of a work in a workqueue
> - execution of this work
> 
> The output looked like such following histogram:
> 
> #   CPU   INSERTED  EXECUTED   WORKQUEUE THREAD
> #
>       1    125        125       reiserfs/1
>       1      0          0       scsi_tgtd/1
>       1      0          0       aio/1
>       1      0          0       ata/1
>       1    114        114       kblockd/1
>       1      0          0       kintegrityd/1
>       1   2147       2147       events/1
>     
>       0      0          0       kpsmoused
>       0    105        105       reiserfs/0
>       0      0          0       scsi_tgtd/0
>       0      0          0       aio/0
>       0      0          0       ata_aux
>       0      0          0       ata/0
>       0      0          0       cqueue
>       0      0          0       kacpi_notify
>       0      0          0       kacpid
>       0    149        149       kblockd/0
>       0      0          0       kintegrityd/0
>       0   1000       1000       khelper
>       0   2270       2270       events/0
> 
> 
> Its purpose and the information it gave was limited, though
> somehow useful.
> It was able to provide some informations about the frequency
> of the works inserted.
> 
> Why that? Because it pushed some useful informations to help a
> kernel developer when it's time to decide how to defer async jobs.
> Should he use the traditional and whole purposed events workqueues
> or a devoted workqueue?
> 

Moreover, I also think that such aync job behaviour instrumentation
may help to choose more deeply and carefully the best facility for
the right work, especially since the kernel gained more infrastructure
in that field:

- kevent/%d
- private workqueue
- async job
- slow work

Frederic.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-24 23:20                                   ` Andrew Morton
@ 2009-04-25  0:37                                     ` Frederic Weisbecker
  2009-04-25  1:28                                       ` Andrew Morton
  0 siblings, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-25  0:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: zhaolei, mingo, kosaki.motohiro, rostedt, tzanussi, linux-kernel, oleg

On Fri, Apr 24, 2009 at 04:20:56PM -0700, Andrew Morton wrote:
> On Sat, 25 Apr 2009 00:59:10 +0200
> Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > 
> > [useful info]
> >
> 
> OK, thanks.  It was waaaay more useful than the original description.
> 
> > So this latest patchset provides all these required informations on the events
> > tracing level.
> 
> Well.. required by who?
> 
> I don't recall ever seeing any problems of this nature, nor patches to
> solve any such problems.  If someone wants to get down and optimise our use
> of workqueues then good for them, but that exercise doesn't require the
> permanent addition of large amounts of code to the kernel.
> 
> The same amount of additional code and additional churn could be added
> to probably tens of core kernel subsystems, but what _point_ is there
> to all this?  Who is using it, what problems are they solving?
> 
> We keep on adding all these fancy debug gizmos to the core kernel which look
> like they will be used by one person, once.  If that!
> 


If you don't recall problems revealed by this tracer or patches
motivated by its results, I have examples.

Soon after writing the first version of th workqueue tracer, I saw
this incredible rate of unused workqueue on my box.
Note that's still the case today (I've cut the worklet informations):

# cat trace_stat/workqueues 
# CPU  INSERTED  EXECUTED   TASKNAME:PID
# |      |         |        `-WORKFUNC
# |      |         |          |
  0   9587       9650       events/0:9
  0      0          0       cpuset:11
  0   1384       1384       khelper:12
  0      0          0       kintegrityd/0:178
  0    385        385       kblockd/0:184
  0      0          0       kacpid:190
  0      0          0       kacpi_notify:191
  0      0          0       tifm:498
  0  24061      24061       ata/0:507
  0      0          0       ata_aux:511
  0      0          0       aio/0:871
  0      0          0       crypto/0:897
  0      0          0       scsi_tgtd/0:2368
  0      0          0       iscsi_eh:2387
  0      0          0       kpsmoused:2521
  0      0          0       hd-audio0:2660
  0      0          0       krxrpcd/0:2745
  0    147        147       reiserfs/0:2895
  1  11023      11046       events/1:10
  1      0          0       kintegrityd/1:181
  1    293        293       kblockd/1:185
  1  33783      33783       ata/1:509
  1      0          0       aio/1:878
  1      0          0       crypto/1:898
  1      0          0       scsi_tgtd/1:2370
  1      0          0       krxrpcd/1:2746
  1     99         99       reiserfs/1:2896


27 threads. 15 are unused, 12 are used.

And still, I've only two CPUS.
More than half of my workqueues are unused.
I also have a small config. I can't imagine the result
with a distro config which enables a lot of options
by default.

I discovered it with this tracer. Then it brought me to
write this patch:

http://lkml.org/lkml/2009/1/31/184

Btw, if one day I can see something like async but which is able
to accept work that _must_ be ran async in any case, then I'm still willing
to rework this patch.

Still with these same observations, I wrote this another one:

http://lkml.org/lkml/2009/1/26/363

Without this tracer, I wouldn't have had the informations which
motivated me to write these patches. And although they weren't accepted
(the first is more about pending than refused, my bad), which is
justified, they reveal a real problem.


Who knows what kind of other things we can discover with worklet tracing?

Frederic.



^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-25  0:37                                     ` Frederic Weisbecker
@ 2009-04-25  1:28                                       ` Andrew Morton
  2009-04-25  2:00                                         ` Steven Rostedt
  2009-04-26 10:47                                         ` Ingo Molnar
  0 siblings, 2 replies; 94+ messages in thread
From: Andrew Morton @ 2009-04-25  1:28 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: zhaolei, mingo, kosaki.motohiro, rostedt, tzanussi, linux-kernel, oleg

On Sat, 25 Apr 2009 02:37:03 +0200
Frederic Weisbecker <fweisbec@gmail.com> wrote:

> I discovered it with this tracer. Then it brought me to
> write this patch:
> 
> http://lkml.org/lkml/2009/1/31/184
> 
> ...
> 
> Still with these same observations, I wrote this another one:
> 
> http://lkml.org/lkml/2009/1/26/363

OK, it's great that you're working to improve the workqueue code.  But
does this justify permanently adding debug code to the core workqueue
code?  In fact, because you've discovered these problem, the reasons
for adding the debug code have lessened!

What we need are curious developers looking into how well subsystems
are performing and how well callers are using them.  Adding fairly
large amounts of permanent debug code into the core subsystems is a
peculiar way of encouraging such activity.

If a developer is motivated to improve (say) workqueues then they will
write a bit of ad-hoc code, or poke at it with systemtap or will
maintain a private ftrace patch - that's all pretty simple stuff for
such people.

So what is the remaining case for adding these patches?  What I see is

a) that their presence will entice people to run them and maybe find
   some problems and

b) the workqueue-maintainer's task is lessened a bit by not having
   to forward-port his debugging patch.

I dunno, it all seems a bit thin to me.  Especially when you multiply
it all by nr_core_subsystems?


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-25  1:28                                       ` Andrew Morton
@ 2009-04-25  2:00                                         ` Steven Rostedt
  2009-04-25  2:24                                           ` Andrew Morton
  2009-04-26 10:47                                         ` Ingo Molnar
  1 sibling, 1 reply; 94+ messages in thread
From: Steven Rostedt @ 2009-04-25  2:00 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, zhaolei, mingo, kosaki.motohiro, tzanussi,
	linux-kernel, oleg


On Fri, 24 Apr 2009, Andrew Morton wrote:

> On Sat, 25 Apr 2009 02:37:03 +0200
> Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > I discovered it with this tracer. Then it brought me to
> > write this patch:
> > 
> > http://lkml.org/lkml/2009/1/31/184
> > 
> > ...
> > 
> > Still with these same observations, I wrote this another one:
> > 
> > http://lkml.org/lkml/2009/1/26/363
> 
> OK, it's great that you're working to improve the workqueue code.  But
> does this justify permanently adding debug code to the core workqueue
> code?  In fact, because you've discovered these problem, the reasons
> for adding the debug code have lessened!
> 
> What we need are curious developers looking into how well subsystems
> are performing and how well callers are using them.  Adding fairly
> large amounts of permanent debug code into the core subsystems is a
> peculiar way of encouraging such activity.
> 
> If a developer is motivated to improve (say) workqueues then they will
> write a bit of ad-hoc code, or poke at it with systemtap or will
> maintain a private ftrace patch - that's all pretty simple stuff for
> such people.
> 
> So what is the remaining case for adding these patches?  What I see is
> 
> a) that their presence will entice people to run them and maybe find
>    some problems and
> 
> b) the workqueue-maintainer's task is lessened a bit by not having
>    to forward-port his debugging patch.
> 
> I dunno, it all seems a bit thin to me.  Especially when you multiply
> it all by nr_core_subsystems?

I agree that we need to be frugal with the addition of trace points. But 
I don't think the bugs that can be solved with this is always reproducible 
by the developer.

If you have a distribution kernel that is running at a customers location, 
you may not have the privilege of shutting down that kernel, patching the 
code, recompiling and booting up this temporary kernel. It would be nice 
to have strategic locations in the kernel where we can easily enable a 
trace point and monitor what is going on.

If the customer calls and tells you there's some strange performance 
issues when running such and such a load, it would be nice to look at 
things like workqueues to analyze the situation.

Point being, the events are not for me on the box that runs my machines.
Hell, I had Logdev for 10 years doing that for me. But now to have 
something that is running at a customers site with extremely low overhead 
that we can enable when problems arise. That is what makes this worth 
while.

Note, when I was contracting, I even had logdev prints inside the 
production (custom) kernel that I could turn on and off. This was exactly 
for this purpose. To monitor what is happening inside the kernel when in 
the field.

-- Steve


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-25  2:00                                         ` Steven Rostedt
@ 2009-04-25  2:24                                           ` Andrew Morton
  2009-04-25  2:51                                             ` Steven Rostedt
  0 siblings, 1 reply; 94+ messages in thread
From: Andrew Morton @ 2009-04-25  2:24 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Frederic Weisbecker, zhaolei, mingo, kosaki.motohiro, tzanussi,
	linux-kernel, oleg

On Fri, 24 Apr 2009 22:00:20 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> I agree that we need to be frugal with the addition of trace points. But 
> I don't think the bugs that can be solved with this is always reproducible 
> by the developer.
> 
> If you have a distribution kernel that is running at a customers location, 
> you may not have the privilege of shutting down that kernel, patching the 
> code, recompiling and booting up this temporary kernel. It would be nice 
> to have strategic locations in the kernel where we can easily enable a 
> trace point and monitor what is going on.
> 
> If the customer calls and tells you there's some strange performance 
> issues when running such and such a load, it would be nice to look at 
> things like workqueues to analyze the situation.

Would it?  What's the probability that anyone anywhere will *really*
solve an on-site problem using workqueue tracepoints?  Just one person?

I think the probability is quite small, and I doubt if it's high enough
to add permanent code to the kernel.

Plus: what we _really_ should be looking at is

p(someone uses this for something) -
	p(they could have used a kprobes-based tracer)

no?

> Point being, the events are not for me on the box that runs my machines.
> Hell, I had Logdev for 10 years doing that for me. But now to have 
> something that is running at a customers site with extremely low overhead 
> that we can enable when problems arise. That is what makes this worth 
> while.
> 
> Note, when I was contracting, I even had logdev prints inside the 
> production (custom) kernel that I could turn on and off. This was exactly 
> for this purpose. To monitor what is happening inside the kernel when in 
> the field.

We seem to be thrashing around grasping at straws which might justify
the merging of these tracing patches.  It ain't supposed to be that way.

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-25  2:24                                           ` Andrew Morton
@ 2009-04-25  2:51                                             ` Steven Rostedt
  2009-04-25  3:10                                               ` Andrew Morton
  0 siblings, 1 reply; 94+ messages in thread
From: Steven Rostedt @ 2009-04-25  2:51 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, zhaolei, Ingo Molnar, kosaki.motohiro,
	tzanussi, LKML, oleg


On Fri, 24 Apr 2009, Andrew Morton wrote:

> On Fri, 24 Apr 2009 22:00:20 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > 
> > I agree that we need to be frugal with the addition of trace points. But 
> > I don't think the bugs that can be solved with this is always reproducible 
> > by the developer.
> > 
> > If you have a distribution kernel that is running at a customers location, 
> > you may not have the privilege of shutting down that kernel, patching the 
> > code, recompiling and booting up this temporary kernel. It would be nice 
> > to have strategic locations in the kernel where we can easily enable a 
> > trace point and monitor what is going on.
> > 
> > If the customer calls and tells you there's some strange performance 
> > issues when running such and such a load, it would be nice to look at 
> > things like workqueues to analyze the situation.
> 
> Would it?  What's the probability that anyone anywhere will *really*
> solve an on-site problem using workqueue tracepoints?  Just one person?
> 
> I think the probability is quite small, and I doubt if it's high enough
> to add permanent code to the kernel.
> 
> Plus: what we _really_ should be looking at is
> 
> p(someone uses this for something) -
> 	p(they could have used a kprobes-based tracer)

This is starting to sound a lot like catch 22. We don't want it in the 
kernel if nobody is using it. But nobody is using it because it is not in 
the kernel.

> 
> no?
> 
> > Point being, the events are not for me on the box that runs my machines.
> > Hell, I had Logdev for 10 years doing that for me. But now to have 
> > something that is running at a customers site with extremely low overhead 
> > that we can enable when problems arise. That is what makes this worth 
> > while.
> > 
> > Note, when I was contracting, I even had logdev prints inside the 
> > production (custom) kernel that I could turn on and off. This was exactly 
> > for this purpose. To monitor what is happening inside the kernel when in 
> > the field.
> 
> We seem to be thrashing around grasping at straws which might justify
> the merging of these tracing patches.  It ain't supposed to be that way.


Unfortunately, analyzing system behavior is a lot like grasping at straws. 
You may never know what is causing some problem unless you view the entire 
picture.

Perhaps the workqueue tracer is not by itself useful for the majority of 
people. I'm not arguing that. It comes pretty much free if you are not 
using it.

I'm looking more at the TRACE_EVENTs in the workqueue (and other places). 
Because having strategically located trace points through out the kernel 
that you can enable all at once, can help analyze the system for issues 
that might be causing problems. You might be thinking you are having 
interrupt issues but enable all events, then you might notice that the 
issues is in the workqueues. Picking your own kprobe locations is not 
going to help in that regard.

In the old -rt patch series, we had trace points scattered all over the 
kernel. This was the original "event tracer". It was low overhead and can 
still give a good overview of the system when the function tracer was too 
much data. Yes, we solved many issues in -rt because of the event tracer. 

Ideally, you want to minimalize the trace points so that it does not look 
like a debug session going wild.  I could maintain a set of tracepoints 
out of tree, but it will only be good for me, and not others.

BTW, you work for Google, doesn't google claim to have some magical 
20-some tracepoints that is all they need? Could you give us a hint to 
what and where they are?

-- Steve

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-25  2:51                                             ` Steven Rostedt
@ 2009-04-25  3:10                                               ` Andrew Morton
  2009-04-25  3:32                                                 ` Steven Rostedt
  0 siblings, 1 reply; 94+ messages in thread
From: Andrew Morton @ 2009-04-25  3:10 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Frederic Weisbecker, zhaolei, Ingo Molnar, kosaki.motohiro,
	tzanussi, LKML, oleg

On Fri, 24 Apr 2009 22:51:03 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:

> In the old -rt patch series, we had trace points scattered all over the 
> kernel. This was the original "event tracer". It was low overhead and can 
> still give a good overview of the system when the function tracer was too 
> much data. Yes, we solved many issues in -rt because of the event tracer. 

Sure, tracers can be useful.  The ext3 tracer I did way back when
maintained a 32-element trace buffer inside each buffer_head and then would
emit that trace when an assertion failed against that buffer_head, so
you can see the last 32 things which happened to that bh.  It would
have been nigh impossible to fix many of the things which were fixed
without that facility.  (I doubt, incidentally, whether ftrace can do
this sort of data-centric tracing?).

But I never merged it into Linux.  Some of the tracepoints are in there
(grep TRACE fs/ext3/*.c) but the core was kept out-of-tree.

> BTW, you work for Google,

Hey, it's the other way round ;)

> doesn't google claim to have some magical 
> 20-some tracepoints that is all they need? Could you give us a hint to 
> what and where they are?

Wouldn't have a clue.  Jiaying Zhang should be able to find out.

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-25  3:10                                               ` Andrew Morton
@ 2009-04-25  3:32                                                 ` Steven Rostedt
  0 siblings, 0 replies; 94+ messages in thread
From: Steven Rostedt @ 2009-04-25  3:32 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, zhaolei, Ingo Molnar, kosaki.motohiro,
	tzanussi, LKML, oleg


On Fri, 24 Apr 2009, Andrew Morton wrote:

> On Fri, 24 Apr 2009 22:51:03 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > In the old -rt patch series, we had trace points scattered all over the 
> > kernel. This was the original "event tracer". It was low overhead and can 
> > still give a good overview of the system when the function tracer was too 
> > much data. Yes, we solved many issues in -rt because of the event tracer. 
> 
> Sure, tracers can be useful.  The ext3 tracer I did way back when
> maintained a 32-element trace buffer inside each buffer_head and then would
> emit that trace when an assertion failed against that buffer_head, so
> you can see the last 32 things which happened to that bh.  It would
> have been nigh impossible to fix many of the things which were fixed
> without that facility.  (I doubt, incidentally, whether ftrace can do
> this sort of data-centric tracing?).

Not currently, but be careful what you say to me. I might implement it ;-)

Remember what happened when Ingo hypothetically asked me if it would be 
possible to trace all branches.

> 
> But I never merged it into Linux.  Some of the tracepoints are in there
> (grep TRACE fs/ext3/*.c) but the core was kept out-of-tree.

 grep TRACE fs/ext3/*.c |wc
     79     308    5010

Some?

This is exactly my point. We must still be frugal about what trace 
points gets into the kernel. But I think places that give a good idea of 
the events that are happening (stategically placed) can help the general 
community.

> 
> > BTW, you work for Google,
> 
> Hey, it's the other way round ;)

lol

> 
> > doesn't google claim to have some magical 
> > 20-some tracepoints that is all they need? Could you give us a hint to 
> > what and where they are?
> 
> Wouldn't have a clue.  Jiaying Zhang should be able to find out.
> 

OK, maybe I'll ping her next week.

Thanks,

-- Steve


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-25  1:28                                       ` Andrew Morton
  2009-04-25  2:00                                         ` Steven Rostedt
@ 2009-04-26 10:47                                         ` Ingo Molnar
  2009-04-27  5:44                                           ` Andrew Morton
  1 sibling, 1 reply; 94+ messages in thread
From: Ingo Molnar @ 2009-04-26 10:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, zhaolei, kosaki.motohiro, rostedt, tzanussi,
	linux-kernel, oleg


* Andrew Morton <akpm@linux-foundation.org> wrote:

> On Sat, 25 Apr 2009 02:37:03 +0200
> Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > I discovered it with this tracer. Then it brought me to
> > write this patch:
> > 
> > http://lkml.org/lkml/2009/1/31/184
> > 
> > ...
> > 
> > Still with these same observations, I wrote this another one:
> > 
> > http://lkml.org/lkml/2009/1/26/363
> 
> OK, it's great that you're working to improve the workqueue code.  
> But does this justify permanently adding debug code to the core 
> workqueue code? [...]

Andrew - but this is not what you asked originally. Here's the 
exchange, not cropped:

> > > So this latest patchset provides all these required 
> > > informations on the events tracing level.
> >  Well.. required by who?
> >
> > I don't recall ever seeing any problems of this nature, nor 
> > patches to solve any such problems.

And Frederic replied that there's three recent examples of various 
patches and problem reports resulting out of the workqueue 
tracepoints.

Now you argue 'yes, there might have been an advantage but it's not 
permanent' - which appears to be a somewhat shifting position 
really. I dont think _our_ position has shifted in any way - please 
correct me if i'm wrong ;-)

And i'm, as the original author of the kernel/workqueue.c code 
(heck, i even coined the 'workqueue' term - if that matters) agree 
with Frederic here: more transparency in seeing what goes on in a 
subsystem brings certain advantages:

 - it spurs development
 - it helps the fixing of bugs
 - and generally it helps people understand the kernel better

weighed against the cost of maintaining (and seeing) those 
tracepoints.

In the scheduler we have more than 60 distinct points of 
instrumentation.

The patches we are discussing here add 6 new tracepoints to 
kernel/workqueue.c - and i'd argue they are pretty much the maximum 
we'd ever want to have there.

I've been maintaining the scheduler instrumentation for years, and 
its overhead is, in hindsight, rather low - and the advantage is 
significant. As long as tracing and statistics instrumentation has a 
very standard and low-key "function call" visual form, i dont even 
notice them most of the time.

And the thing is, the workqueue code has been pretty problematic 
lately - with lockups and other regressions. It's a pretty 'opaque' 
facility that _hides_ what goes on in it - so more transparency 
might be a good answer just on that basis alone.

> [...]  In fact, because you've discovered these problem, the 
> reasons for adding the debug code have lessened!
> 
> What we need are curious developers looking into how well 
> subsystems are performing and how well callers are using them.  
> Adding fairly large amounts of permanent debug code into the core 
> subsystems is a peculiar way of encouraging such activity.

but this - which you call peculiar - is exactly what happened when 
the first set of tracepoints were added.

Secondly, if we discount the (fairly standard) off-site tracepoints, 
is not "large amount of debug code" - the tracepoints are completely 
off site and are not a worry as long as the tracepoint arguments are 
kept intact. The bits in kernel/workqueue.c are on the 26 lines flux 
range:

 workqueue.c             |   26 ++++++++++++++++++++------

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-26 10:47                                         ` Ingo Molnar
@ 2009-04-27  5:44                                           ` Andrew Morton
  2009-04-27 15:02                                             ` Oleg Nesterov
  0 siblings, 1 reply; 94+ messages in thread
From: Andrew Morton @ 2009-04-27  5:44 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Frederic Weisbecker, zhaolei, kosaki.motohiro, rostedt, tzanussi,
	linux-kernel, oleg

On Sun, 26 Apr 2009 12:47:47 +0200 Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Andrew Morton <akpm@linux-foundation.org> wrote:
> 
> > On Sat, 25 Apr 2009 02:37:03 +0200
> > Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> > > I discovered it with this tracer. Then it brought me to
> > > write this patch:
> > > 
> > > http://lkml.org/lkml/2009/1/31/184
> > > 
> > > ...
> > > 
> > > Still with these same observations, I wrote this another one:
> > > 
> > > http://lkml.org/lkml/2009/1/26/363
> > 
> > OK, it's great that you're working to improve the workqueue code.  
> > But does this justify permanently adding debug code to the core 
> > workqueue code? [...]
> 
> Andrew - but this is not what you asked originally. Here's the 
> exchange, not cropped:
> 
> > > > So this latest patchset provides all these required 
> > > > informations on the events tracing level.
> > >  Well.. required by who?
> > >
> > > I don't recall ever seeing any problems of this nature, nor 
> > > patches to solve any such problems.
> 
> And Frederic replied that there's three recent examples of various 
> patches and problem reports resulting out of the workqueue 
> tracepoints.
> 
> Now you argue 'yes, there might have been an advantage but it's not 
> permanent' - which appears to be a somewhat shifting position 
> really. I dont think _our_ position has shifted in any way - please 
> correct me if i'm wrong ;-)

My point, as I'm sure you fully understand, is that this feature is
unlikely to see sufficient use as to warrant its inclusion.

> And i'm, as the original author of the kernel/workqueue.c code 
> (heck, i even coined the 'workqueue' term - if that matters)

Most workqueue work lately has come from Oleg.  I'm unaware that he has
expressed an interest in this feature?  Oleg, would it have been useful
in any of the work you've done?

> And the thing is, the workqueue code has been pretty problematic 
> lately - with lockups and other regressions. It's a pretty 'opaque' 
> facility that _hides_ what goes on in it - so more transparency 
> might be a good answer just on that basis alone.

OK, so there's a test case.  How many of the problems which we've
recently found and fixed in the workqueue would have been more easily
found and fixed had this feature been available?

Zero, methinks.

> Secondly, if we discount the (fairly standard) off-site tracepoints, 
> is not "large amount of debug code" - the tracepoints are completely 
> off site and are not a worry as long as the tracepoint arguments are 
> kept intact. The bits in kernel/workqueue.c are on the 26 lines flux 
> range:

Multiplied by gawd-knows-how-many other such subsystems.

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-27  5:44                                           ` Andrew Morton
@ 2009-04-27 15:02                                             ` Oleg Nesterov
  2009-04-27 15:43                                               ` Ingo Molnar
  2009-04-28 13:42                                               ` Frederic Weisbecker
  0 siblings, 2 replies; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-27 15:02 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ingo Molnar, Frederic Weisbecker, zhaolei, kosaki.motohiro,
	rostedt, tzanussi, linux-kernel

On 04/26, Andrew Morton wrote:
>
> Most workqueue work lately has come from Oleg.  I'm unaware that he has
> expressed an interest in this feature?  Oleg, would it have been useful
> in any of the work you've done?

Well. Probably not. But I don't think this matters. Other people (and
not only kernel developers) can find this useful.

I _think_ that if you are going to hack workqueue.c itself, it is
more easy to just add some printks, may be I am wrong. But, probably
tracepoints can help to write/debug, say, device drivers. Or admins
can use debugfs to see whats going on, or to provide more info for
the bugreports.

I try to avoid "do we need this feauture" discussions as much as
possible. Because I never know. My usage of kernel is very, very
limited, I never do something "interesting" on my machine. This reminds
me the discussion about the ability to trace /sbin/init. Some developers
were unhappy with the trivial patch I sent. They said it is trivial
to change your kernel if you need this. But no, it was not trivial
to me when I was admin. So, I just can't judge.

> > And the thing is, the workqueue code has been pretty problematic
> > lately - with lockups and other regressions.

Hmm. Perhaps I missed some bug-reports... But I don't remember any
recent problems with workueues except the "usual" bugs like "flush
shares the lock with work->func".


As for the patches, I can't review them now. They are on top of
some other changes which I didn't see (or perhaps I lost the patches
I was cc'ed? sorry in this case).

But at least the change in workqueue.c looks very simple, and do
not complicate the code for readers. And, if we add any tracing
to workqueues, then it is very natural to add entry/exit handlers.


I must admit, I don't really understand why trace_workqueue.c uses
cwq->thread as a "primary key". I have the feeling we can simplify
this code if we pass "struct workqueue_struct *" instead, but I am
not sure.

In particular, trace_workqueue_flush(cwq->thread) looks a bit strange
to me. I can't imagine how it can be useful per-thread and without
trace_workqueue_flush_end() or something. I mean, if we need to trace
flushes, then imho it makes much more sense to add flush_start/flush_end
handlers into flush_workqueue().

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-27 15:02                                             ` Oleg Nesterov
@ 2009-04-27 15:43                                               ` Ingo Molnar
  2009-04-27 19:09                                                 ` Oleg Nesterov
  2009-04-28 13:42                                               ` Frederic Weisbecker
  1 sibling, 1 reply; 94+ messages in thread
From: Ingo Molnar @ 2009-04-27 15:43 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Frederic Weisbecker, zhaolei, kosaki.motohiro,
	rostedt, tzanussi, linux-kernel


* Oleg Nesterov <oleg@redhat.com> wrote:

> On 04/26, Andrew Morton wrote:
> >
> > Most workqueue work lately has come from Oleg.  I'm unaware that 
> > he has expressed an interest in this feature?  Oleg, would it 
> > have been useful in any of the work you've done?
> 
> Well. Probably not. But I don't think this matters. Other people 
> (and not only kernel developers) can find this useful.
> 
> I _think_ that if you are going to hack workqueue.c itself, it is 
> more easy to just add some printks, may be I am wrong. But, 
> probably tracepoints can help to write/debug, say, device drivers. 
> Or admins can use debugfs to see whats going on, or to provide 
> more info for the bugreports.
> 
> I try to avoid "do we need this feauture" discussions as much as 
> possible. Because I never know. My usage of kernel is very, very 
> limited, I never do something "interesting" on my machine. This 
> reminds me the discussion about the ability to trace /sbin/init. 
> Some developers were unhappy with the trivial patch I sent. They 
> said it is trivial to change your kernel if you need this. But no, 
> it was not trivial to me when I was admin. So, I just can't judge.
> 
> > > And the thing is, the workqueue code has been pretty 
> > > problematic lately - with lockups and other regressions.
> 
> Hmm. Perhaps I missed some bug-reports... But I don't remember any 
> recent problems with workueues except the "usual" bugs like "flush 
> shares the lock with work->func".

Sorry - i mean lockups while _using_ workqueues. The workqueue code 
has been pretty OK - sans that thing with work_on_cpu() which was 
quite a pain with 4-5 regressions IIRC. So generally workqueue usage 
isnt particularly well developed - and having some basic 
instrumentation increases mindshare. IMHO.

> As for the patches, I can't review them now. They are on top of 
> some other changes which I didn't see (or perhaps I lost the 
> patches I was cc'ed? sorry in this case).

it's against the tracing tree - which has new/changed facilities 
which these patches are against. I think Frederic can send you an 
URI to a branch in his tree with these bits properly added in, for 
review. (a full patch in that case would be useful too i guess)

> But at least the change in workqueue.c looks very simple, and do 
> not complicate the code for readers. And, if we add any tracing to 
> workqueues, then it is very natural to add entry/exit handlers.

thanks!

> I must admit, I don't really understand why trace_workqueue.c uses 
> cwq->thread as a "primary key". I have the feeling we can simplify 
> this code if we pass "struct workqueue_struct *" instead, but I am 
> not sure.
> 
> In particular, trace_workqueue_flush(cwq->thread) looks a bit 
> strange to me. I can't imagine how it can be useful per-thread and 
> without trace_workqueue_flush_end() or something. I mean, if we 
> need to trace flushes, then imho it makes much more sense to add 
> flush_start/flush_end handlers into flush_workqueue().

If this is fixed (and no other problem surfaces), would you mind to 
ack these bits?

And if you can think of any way to make it even simpler / less 
intrusive, please let us know ...

	Ingo

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-27 15:43                                               ` Ingo Molnar
@ 2009-04-27 19:09                                                 ` Oleg Nesterov
  0 siblings, 0 replies; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-27 19:09 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Frederic Weisbecker, zhaolei, kosaki.motohiro,
	rostedt, tzanussi, linux-kernel

On 04/27, Ingo Molnar wrote:
>
> * Oleg Nesterov <oleg@redhat.com> wrote:
>
> > I must admit, I don't really understand why trace_workqueue.c uses
> > cwq->thread as a "primary key". I have the feeling we can simplify
> > this code if we pass "struct workqueue_struct *" instead, but I am
> > not sure.
> >
> > In particular, trace_workqueue_flush(cwq->thread) looks a bit
> > strange to me. I can't imagine how it can be useful per-thread and
> > without trace_workqueue_flush_end() or something. I mean, if we
> > need to trace flushes, then imho it makes much more sense to add
> > flush_start/flush_end handlers into flush_workqueue().
>
> If this is fixed (and no other problem surfaces), would you mind to
> ack these bits?

If you think my ack can help, you have it right now ;)

> And if you can think of any way to make it even simpler / less
> intrusive, please let us know ...

Sure. But currently I only have a gut feeling (probably wrong) and nothing
more. And just in case, my apologies to all, I can't be very responsive on
this topic in the near future.

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-27 15:02                                             ` Oleg Nesterov
  2009-04-27 15:43                                               ` Ingo Molnar
@ 2009-04-28 13:42                                               ` Frederic Weisbecker
  2009-04-28 16:43                                                 ` Oleg Nesterov
  1 sibling, 1 reply; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-28 13:42 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Ingo Molnar, zhaolei, kosaki.motohiro, rostedt,
	tzanussi, linux-kernel

On Mon, Apr 27, 2009 at 05:02:41PM +0200, Oleg Nesterov wrote:
> On 04/26, Andrew Morton wrote:
> >
> > Most workqueue work lately has come from Oleg.  I'm unaware that he has
> > expressed an interest in this feature?  Oleg, would it have been useful
> > in any of the work you've done?
> 
> Well. Probably not. But I don't think this matters. Other people (and
> not only kernel developers) can find this useful.
> 
> I _think_ that if you are going to hack workqueue.c itself, it is
> more easy to just add some printks, may be I am wrong. But, probably
> tracepoints can help to write/debug, say, device drivers. Or admins
> can use debugfs to see whats going on, or to provide more info for
> the bugreports.
> 
> I try to avoid "do we need this feauture" discussions as much as
> possible. Because I never know. My usage of kernel is very, very
> limited, I never do something "interesting" on my machine. This reminds
> me the discussion about the ability to trace /sbin/init. Some developers
> were unhappy with the trivial patch I sent. They said it is trivial
> to change your kernel if you need this. But no, it was not trivial
> to me when I was admin. So, I just can't judge.
> 
> > > And the thing is, the workqueue code has been pretty problematic
> > > lately - with lockups and other regressions.
> 
> Hmm. Perhaps I missed some bug-reports... But I don't remember any
> recent problems with workueues except the "usual" bugs like "flush
> shares the lock with work->func".
> 
> 
> As for the patches, I can't review them now. They are on top of
> some other changes which I didn't see (or perhaps I lost the patches
> I was cc'ed? sorry in this case).


I'm currently gathering Zhaolei patches and I will push them all in
git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git
 tracing/workqueue

Once it's done, I will also push the fixes that address some of your
comments from your last review.

 
> But at least the change in workqueue.c looks very simple, and do
> not complicate the code for readers. And, if we add any tracing
> to workqueues, then it is very natural to add entry/exit handlers.
> 
> 
> I must admit, I don't really understand why trace_workqueue.c uses
> cwq->thread as a "primary key". I have the feeling we can simplify
> this code if we pass "struct workqueue_struct *" instead, but I am
> not sure.



Indeed, I wanted to use it as the identifier first. The problem
is that this structure is privately defined inside kernel/workqueue.c
And because struct cpu_workqueue * and struct cpu_workqueue::thread
is a 1:1 relation, I've used the thread as the real identifier, so
that I can access the thread_comm and other things.

But actually it's not really a 1:1 matching in CONFIG_HOTPLUG_CPU
case, because the thread can destroyed and the cpu_workqueue assigned
with a new one later.

I managed it by also destroying the workqueue in that case on the stat.


> In particular, trace_workqueue_flush(cwq->thread) looks a bit strange
> to me. I can't imagine how it can be useful per-thread and without
> trace_workqueue_flush_end() or something. I mean, if we need to trace
> flushes, then imho it makes much more sense to add flush_start/flush_end
> handlers into flush_workqueue().


True. The flush would make more sense if it's embraced with a begin/end.
The result would be:

wq_name flush_begin
 cpu_workqueue1
	worklet1 exec
	worklet2 exec
 cpu_workqueue2
	worklet3 exec
	worklet4 exec
 ...
wq_name flush_end

So that we can more easily eye-parse this section and have the
execution time of the whole wait for completion.


> Oleg.
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-28 13:42                                               ` Frederic Weisbecker
@ 2009-04-28 16:43                                                 ` Oleg Nesterov
  2009-04-28 17:49                                                   ` Frederic Weisbecker
  0 siblings, 1 reply; 94+ messages in thread
From: Oleg Nesterov @ 2009-04-28 16:43 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Andrew Morton, Ingo Molnar, zhaolei, kosaki.motohiro, rostedt,
	tzanussi, linux-kernel

On 04/28, Frederic Weisbecker wrote:
>
> On Mon, Apr 27, 2009 at 05:02:41PM +0200, Oleg Nesterov wrote:
> >
> > I must admit, I don't really understand why trace_workqueue.c uses
> > cwq->thread as a "primary key". I have the feeling we can simplify
> > this code if we pass "struct workqueue_struct *" instead, but I am
> > not sure.
>
> Indeed, I wanted to use it as the identifier first. The problem
> is that this structure is privately defined inside kernel/workqueue.c

Perhaps we have to export some bits or add some simple helpers to
workqueue.c. But I am not sure trace/trace_workqueue.c actually needs
any additional info. Again, we can use "struct workqueue_struct *" as
a "void *", and probe_workqueue_creation() can use alloc_percpu() to
record the events per-cpu. (_Perhaps_, we can also add some fields
to workqueue_struct or cpu_workqueue_struct under #ifdef and eliminate
the list search code in trace_workqueue.c).

> But actually it's not really a 1:1 matching in CONFIG_HOTPLUG_CPU
> case, because the thread can destroyed and the cpu_workqueue assigned
> with a new one later.

Indeed, I also thought about this. This is subjective, but perhaps it
is better to keep the history. If not, we can clear it on CPU_DEAD.



But let me repeat, I am not very sure these changes are really good,
and I didn't try to really think about them. Even _if_ I am right
we can do this later.


> I'm currently gathering Zhaolei patches and I will push them all in
> git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git
>  tracing/workqueue

Thanks! Will try to look when I have the chance. Not that I think
I can really help though ;)

Oleg.


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-24 20:06                               ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Andrew Morton
  2009-04-24 22:59                                 ` Frederic Weisbecker
@ 2009-04-28 17:24                                 ` Frank Ch. Eigler
  2009-04-28 18:48                                   ` Andrew Morton
  1 sibling, 1 reply; 94+ messages in thread
From: Frank Ch. Eigler @ 2009-04-28 17:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Zhaolei, mingo, kosaki.motohiro, fweisbec, rostedt, tzanussi,
	linux-kernel, oleg

Andrew Morton <akpm@linux-foundation.org> writes:

> [...]  The patches introduce a moderate amount of tracing-specific
> hooks into the core workqueue code, which inevitably increases the
> maintenance load for that code.  It is important that it be
> demonstrated that the benefts of the code are worth that cost.
> Hence it is important that these benefits be demonstrated to us, by
> yourself.  Please.  [...]

To be fair, if you ask for someone to specify and quantify the
"benefits" side of this, shouldn't someone specify and quantify the
"cost" side of this too?  That way we would have something to compare.


- FChE

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-28 16:43                                                 ` Oleg Nesterov
@ 2009-04-28 17:49                                                   ` Frederic Weisbecker
  0 siblings, 0 replies; 94+ messages in thread
From: Frederic Weisbecker @ 2009-04-28 17:49 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Ingo Molnar, zhaolei, kosaki.motohiro, rostedt,
	tzanussi, linux-kernel

On Tue, Apr 28, 2009 at 06:43:42PM +0200, Oleg Nesterov wrote:
> On 04/28, Frederic Weisbecker wrote:
> >
> > On Mon, Apr 27, 2009 at 05:02:41PM +0200, Oleg Nesterov wrote:
> > >
> > > I must admit, I don't really understand why trace_workqueue.c uses
> > > cwq->thread as a "primary key". I have the feeling we can simplify
> > > this code if we pass "struct workqueue_struct *" instead, but I am
> > > not sure.
> >
> > Indeed, I wanted to use it as the identifier first. The problem
> > is that this structure is privately defined inside kernel/workqueue.c
> 
> Perhaps we have to export some bits or add some simple helpers to
> workqueue.c. But I am not sure trace/trace_workqueue.c actually needs
> any additional info. Again, we can use "struct workqueue_struct *" as
> a "void *", and probe_workqueue_creation() can use alloc_percpu() to
> record the events per-cpu. (_Perhaps_, we can also add some fields
> to workqueue_struct or cpu_workqueue_struct under #ifdef and eliminate
> the list search code in trace_workqueue.c).



Hmm. Yes, basing this tracing against the workqueue as a unit basis
using per_cpu buffers seems rather more natural.


 
> > But actually it's not really a 1:1 matching in CONFIG_HOTPLUG_CPU
> > case, because the thread can destroyed and the cpu_workqueue assigned
> > with a new one later.
> 
> Indeed, I also thought about this. This is subjective, but perhaps it
> is better to keep the history. If not, we can clear it on CPU_DEAD.
> 



Yeah, it means we should separate cleanup and destroy events.


> 
> But let me repeat, I am not very sure these changes are really good,
> and I didn't try to really think about them. Even _if_ I am right
> we can do this later.
> 


The changes you suggest seem to me logical and could simplify this
tracer.
So I will apply Zhaolei patches and then start addressing
your comments :-)


> > I'm currently gathering Zhaolei patches and I will push them all in
> > git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git
> >  tracing/workqueue
> 
> Thanks! Will try to look when I have the chance. Not that I think
> I can really help though ;)


Thanks!


> Oleg.
> 


^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-28 17:24                                 ` Frank Ch. Eigler
@ 2009-04-28 18:48                                   ` Andrew Morton
  2009-04-28 20:51                                     ` Frank Ch. Eigler
  2009-04-29  4:03                                     ` KOSAKI Motohiro
  0 siblings, 2 replies; 94+ messages in thread
From: Andrew Morton @ 2009-04-28 18:48 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: zhaolei, mingo, kosaki.motohiro, fweisbec, rostedt, tzanussi,
	linux-kernel, oleg

On Tue, 28 Apr 2009 13:24:57 -0400
fche@redhat.com (Frank Ch. Eigler) wrote:

> Andrew Morton <akpm@linux-foundation.org> writes:
> 
> > [...]  The patches introduce a moderate amount of tracing-specific
> > hooks into the core workqueue code, which inevitably increases the
> > maintenance load for that code.  It is important that it be
> > demonstrated that the benefts of the code are worth that cost.
> > Hence it is important that these benefits be demonstrated to us, by
> > yourself.  Please.  [...]
> 
> To be fair, if you ask for someone to specify and quantify the
> "benefits" side of this, shouldn't someone specify and quantify the
> "cost" side of this too?

Not if benefit==0 ;)

(Which I suspect will be close to the truth)

>  That way we would have something to compare.

Well, there are two aspects to this.  There's the immediate up-front
cost - a little added complexity, more code, etc.

But there's also the where-the-hell-is-this-all-going question.  Are we
going to end up with hundreds or thousands of tracepoints sprinkled all
over core kernel?  If so, what use are they?  What userspace tools will
be used to pull them into something useful?  Who will be developing
those tools and are they involved in the development of the
tracepoints?  Will this proliferation of static tracepoints actively
kill off the development and maturation of dynamic tracepoints?  If so,
is that good?


>From where I sit it looks like a mad scramble to sprinkle fairly random
tracepoints all over the place under the assumption that
this-may-be-useful-to-someone-one-day.  But of course, that means that
they many never be useful to anyone ever.

Part of this derives from the development approach.  With the various
tracers which we've developed in the past, the problem has been "I'm
working on subsystem X and I need help".  So the tracer is driven by a
particular development problem (original blktrace, ext3-debug, etc). 
This will axiomatically result in a tracer which is _useful_.

But in this case the approach is different - the problem statement is
"I need to add tracepoints to subsystem X".  It's not driven by any
particular development problem.  So there's no guarantee at all that the
end result will be _useful_ for anything!






^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-28 18:48                                   ` Andrew Morton
@ 2009-04-28 20:51                                     ` Frank Ch. Eigler
  2009-04-29  4:03                                     ` KOSAKI Motohiro
  1 sibling, 0 replies; 94+ messages in thread
From: Frank Ch. Eigler @ 2009-04-28 20:51 UTC (permalink / raw)
  To: Andrew Morton
  Cc: zhaolei, mingo, kosaki.motohiro, fweisbec, rostedt, tzanussi,
	linux-kernel, oleg

Hi, Andrew -

On Tue, Apr 28, 2009 at 11:48:27AM -0700, Andrew Morton wrote:
> [...]
> > To be fair, if you ask for someone to specify and quantify the
> > "benefits" side of this, shouldn't someone specify and quantify the
> > "cost" side of this too?
> [...]
> >  That way we would have something to compare.
> 
> Well, there are two aspects to this.  There's the immediate up-front
> cost - a little added complexity, more code, etc.

Yes, a little.


> But there's also the where-the-hell-is-this-all-going question.  Are we
> going to end up with hundreds or thousands of tracepoints sprinkled all
> over core kernel?  If so, what use are they?  What userspace tools will
> be used to pull them into something useful?  Who will be developing
> those tools and are they involved in the development of the
> tracepoints?  

(Well, that's back to the "benefits" side, isn't it?)


> Will this proliferation of static tracepoints actively kill off the
> development and maturation of dynamic tracepoints?  If so, is that
> good?

I hope not, but I believe the consensus has been that both dynamic and
static instrumentation are necessary & useful.


> From where I sit it looks like a mad scramble to sprinkle fairly
> random tracepoints all over the place under the assumption that
> this-may-be-useful-to-someone-one-day.  But of course, that means
> that they many never be useful to anyone ever. [...]

You may not be giving enough credit to the subsystem developers.  If
they opine that those tracepoints may accomplish (or have already
accomplished) something forseeably useful, at a reasonably "little"
cost, should it not require a large burden of proof to overcome their
judgement?


- FChE

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-28 18:48                                   ` Andrew Morton
  2009-04-28 20:51                                     ` Frank Ch. Eigler
@ 2009-04-29  4:03                                     ` KOSAKI Motohiro
  2009-04-29  4:29                                       ` Andrew Morton
  1 sibling, 1 reply; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-29  4:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kosaki.motohiro, Frank Ch. Eigler, zhaolei, mingo, fweisbec,
	rostedt, tzanussi, linux-kernel, oleg

> On Tue, 28 Apr 2009 13:24:57 -0400
> fche@redhat.com (Frank Ch. Eigler) wrote:
> 
> > Andrew Morton <akpm@linux-foundation.org> writes:
> > 
> > > [...]  The patches introduce a moderate amount of tracing-specific
> > > hooks into the core workqueue code, which inevitably increases the
> > > maintenance load for that code.  It is important that it be
> > > demonstrated that the benefts of the code are worth that cost.
> > > Hence it is important that these benefits be demonstrated to us, by
> > > yourself.  Please.  [...]
> > 
> > To be fair, if you ask for someone to specify and quantify the
> > "benefits" side of this, shouldn't someone specify and quantify the
> > "cost" side of this too?
> 
> Not if benefit==0 ;)
> 
> (Which I suspect will be close to the truth)
>
> 
> >  That way we would have something to compare.
> 
> Well, there are two aspects to this.  There's the immediate up-front
> cost - a little added complexity, more code, etc.
> 
> But there's also the where-the-hell-is-this-all-going question.  Are we
> going to end up with hundreds or thousands of tracepoints sprinkled all
> over core kernel?  If so, what use are they?  What userspace tools will
> be used to pull them into something useful?  Who will be developing
> those tools and are they involved in the development of the
> tracepoints?  Will this proliferation of static tracepoints actively
> kill off the development and maturation of dynamic tracepoints?  If so,
> is that good?
> 
> 
> From where I sit it looks like a mad scramble to sprinkle fairly random
> tracepoints all over the place under the assumption that
> this-may-be-useful-to-someone-one-day.  But of course, that means that
> they many never be useful to anyone ever.
> 
> Part of this derives from the development approach.  With the various
> tracers which we've developed in the past, the problem has been "I'm
> working on subsystem X and I need help".  So the tracer is driven by a
> particular development problem (original blktrace, ext3-debug, etc). 
> This will axiomatically result in a tracer which is _useful_.
> 
> But in this case the approach is different - the problem statement is
> "I need to add tracepoints to subsystem X".  It's not driven by any
> particular development problem.  So there's no guarantee at all that the
> end result will be _useful_ for anything!

May I explain my opinion? I am original patch author of latency enhancement of
workqueue tracer.

In real world, desktop and server user use various the out of tree driver and kernel
module (e.g. some graphics driver, DRBD, proprietery security software et.al).
and poor quality driver often make bug in asynchronous processing place
(e.g. timer, workqueue, irq).

the bug may not be easy fixable and analyzable. if kernel oops happend,
it's easy. oops log point out suspector clearly in almost case.
but if the poor driver makes large latency, the evidence often vanished
before latency occured.

When trouble happend, An administrator get large bafflement. "Oh, which software
is wrong? how do I divide good and wrong software?".
In past days, We always say "hehe, you use proprietery module. can you 
reproduce this bug on upstream kernel only?". this answer don't help
nor solve end-user. it is one of escape of accountability.

The good well defined static tracepoint help its situation largely.


In addition, As far as I know, typical DTrace user don't use dynamic 
tracing feature at all.
They think they don't know how choice proper probe point for dynamic tracing.
They repeatedly strongly hope to increase well defined static probe point. they
think dynamic trace feature is too hard to use.

I also strongly dislike random place tracepoint. but I don't think this patch
series is random.
and I think other asynchronous processing subsystem need static tracepoint.




^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-29  4:03                                     ` KOSAKI Motohiro
@ 2009-04-29  4:29                                       ` Andrew Morton
  2009-04-29  5:21                                         ` KOSAKI Motohiro
  0 siblings, 1 reply; 94+ messages in thread
From: Andrew Morton @ 2009-04-29  4:29 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Frank Ch. Eigler, zhaolei, mingo, fweisbec, rostedt, tzanussi,
	linux-kernel, oleg

On Wed, 29 Apr 2009 13:03:51 +0900 (JST) KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> > But in this case the approach is different - the problem statement is
> > "I need to add tracepoints to subsystem X".  It's not driven by any
> > particular development problem.  So there's no guarantee at all that the
> > end result will be _useful_ for anything!
> 
> May I explain my opinion? I am original patch author of latency enhancement of
> workqueue tracer.
> 
> In real world, desktop and server user use various the out of tree driver and kernel
> module (e.g. some graphics driver, DRBD, proprietery security software et.al).
> and poor quality driver often make bug in asynchronous processing place
> (e.g. timer, workqueue, irq).
> 
> the bug may not be easy fixable and analyzable. if kernel oops happend,
> it's easy. oops log point out suspector clearly in almost case.
> but if the poor driver makes large latency, the evidence often vanished
> before latency occured.
> 
> When trouble happend, An administrator get large bafflement. "Oh, which software
> is wrong? how do I divide good and wrong software?".
> In past days, We always say "hehe, you use proprietery module. can you 
> reproduce this bug on upstream kernel only?". this answer don't help
> nor solve end-user. it is one of escape of accountability.
> 
> The good well defined static tracepoint help its situation largely.
> 
> 
> In addition, As far as I know, typical DTrace user don't use dynamic 
> tracing feature at all.
> They think they don't know how choice proper probe point for dynamic tracing.
> They repeatedly strongly hope to increase well defined static probe point. they
> think dynamic trace feature is too hard to use.
> 
> I also strongly dislike random place tracepoint. but I don't think this patch
> series is random.
> and I think other asynchronous processing subsystem need static tracepoint.

OK.

It's quite unclear to me how we get from here to a situation where we
have something which your administrator can use.  Hopefully someone
some day will pull all this together into an overall integrated
toolset.  The fact that the kernel work is being done (afaict)
waaaaaaaay in advance of that development means that we'll probably
have to revist the kernel work.  So be it.

But your administrator wouldn't even know to go looking at workqueues! 
Unless the userspace support tools are very very good.  He might
instead spend hours poking at the sleep-tracer or the rwsem-tracer or
the slow-work-tracer or blah blah.

I expect that a generic function-entry tracer (which we already have)
would be the starting tool for diagnosing such a problem.  Probably it
would be the ending tool too.

What's the terminal state here?  The end result?  I see lots of random
somewhat-useless-looking tracers being added, but what are we actually
working towards?

Until we know that, how do we know that we're not adding stuff
which we won't need (as I suspect we are doing)?

^ permalink raw reply	[flat|nested] 94+ messages in thread

* Re: [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing
  2009-04-29  4:29                                       ` Andrew Morton
@ 2009-04-29  5:21                                         ` KOSAKI Motohiro
  0 siblings, 0 replies; 94+ messages in thread
From: KOSAKI Motohiro @ 2009-04-29  5:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kosaki.motohiro, Frank Ch. Eigler, zhaolei, mingo, fweisbec,
	rostedt, tzanussi, linux-kernel, oleg

> On Wed, 29 Apr 2009 13:03:51 +0900 (JST) KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> 
> > > But in this case the approach is different - the problem statement is
> > > "I need to add tracepoints to subsystem X".  It's not driven by any
> > > particular development problem.  So there's no guarantee at all that the
> > > end result will be _useful_ for anything!
> > 
> > May I explain my opinion? I am original patch author of latency enhancement of
> > workqueue tracer.
> > 
> > In real world, desktop and server user use various the out of tree driver and kernel
> > module (e.g. some graphics driver, DRBD, proprietery security software et.al).
> > and poor quality driver often make bug in asynchronous processing place
> > (e.g. timer, workqueue, irq).
> > 
> > the bug may not be easy fixable and analyzable. if kernel oops happend,
> > it's easy. oops log point out suspector clearly in almost case.
> > but if the poor driver makes large latency, the evidence often vanished
> > before latency occured.
> > 
> > When trouble happend, An administrator get large bafflement. "Oh, which software
> > is wrong? how do I divide good and wrong software?".
> > In past days, We always say "hehe, you use proprietery module. can you 
> > reproduce this bug on upstream kernel only?". this answer don't help
> > nor solve end-user. it is one of escape of accountability.
> > 
> > The good well defined static tracepoint help its situation largely.
> > 
> > 
> > In addition, As far as I know, typical DTrace user don't use dynamic 
> > tracing feature at all.
> > They think they don't know how choice proper probe point for dynamic tracing.
> > They repeatedly strongly hope to increase well defined static probe point. they
> > think dynamic trace feature is too hard to use.
> > 
> > I also strongly dislike random place tracepoint. but I don't think this patch
> > series is random.
> > and I think other asynchronous processing subsystem need static tracepoint.
> 
> OK.
> 
> It's quite unclear to me how we get from here to a situation where we
> have something which your administrator can use.  Hopefully someone
> some day will pull all this together into an overall integrated
> toolset.  The fact that the kernel work is being done (afaict)
> waaaaaaaay in advance of that development means that we'll probably
> have to revist the kernel work.  So be it.
> 
> But your administrator wouldn't even know to go looking at workqueues! 
> Unless the userspace support tools are very very good.  He might
> instead spend hours poking at the sleep-tracer or the rwsem-tracer or
> the slow-work-tracer or blah blah.

Agreed. then I add latency list displaying feature to worktracer stastics.
It display suspect workqueue (or no need doubt workqueue).

My expected use-case is:
good sorted-out stastics narrow down suspector. and most detailed information
can be get by event tracer.

IOW, generally I agree with you. good userspace tools is very very important.
but workqueue tracer latency view, I think current stastics is enough.


> I expect that a generic function-entry tracer (which we already have)
> would be the starting tool for diagnosing such a problem.  Probably it
> would be the ending tool too.

I think function tracer is control flow oriented tracer. event tracer is data flow
oriented tracer.
they see another aspect to one thing.

example, if big latency is caused complex funny locking dependency, control
flow analysis don't solve issue, we need data flow analysis.




> What's the terminal state here?  The end result?  I see lots of random
> somewhat-useless-looking tracers being added, but what are we actually
> working towards?
> 
> Until we know that, how do we know that we're not adding stuff
> which we won't need (as I suspect we are doing)?

Oh well. I can't say the goal of ftrace framework itself.

I only say:
many issue and trouble in real world isn't kernel itself
problem. but we aren't always white hands. There are various half gray
issue.
In past days, we always say "it isn't kernel bug, it's userland mis-use".
but I don't think it's real solution. it makes game of cat and mouse.

strong analysis helping feature seems right way of improve linux quality
of end-user view (not kernel developer view).

Fortunately, I'm working for big server vendor now and I can see
past trouble log and I also have some userland development experience. 
then, I think I know some frequently troubled place
than pure kernel developer and I can help to discuss "which is proper 
place of well known tracepoint".

but it's only "I think". I need good discussion.




^ permalink raw reply	[flat|nested] 94+ messages in thread

end of thread, other threads:[~2009-04-29  5:21 UTC | newest]

Thread overview: 94+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-04-13  0:53 [PATCH 0/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Zhaolei
2009-04-13  0:54 ` [PATCH 1/1] " Zhaolei
2009-04-13  1:44   ` KOSAKI Motohiro
2009-04-13  3:25     ` KOSAKI Motohiro
2009-04-13  3:49       ` Zhaolei
2009-04-13  3:57         ` Ingo Molnar
2009-04-13  4:07           ` Re: [PATCH 1/1] tracing, workqueuetrace: Make workqueuetracepoints " Zhaolei
2009-04-13  3:57         ` [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints " KOSAKI Motohiro
2009-04-13  5:52           ` [PATCH v2 1/4] ftrace, " KOSAKI Motohiro
2009-04-13  5:53             ` [PATCH v2 2/4] ftrace: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry KOSAKI Motohiro
2009-04-13 16:25               ` Frederic Weisbecker
2009-04-15 10:22                 ` Oleg Nesterov
2009-04-15 11:09                   ` Ingo Molnar
2009-04-15 11:33                     ` Oleg Nesterov
2009-04-15 11:45                       ` Ingo Molnar
2009-04-15 12:05                         ` Oleg Nesterov
2009-04-13  5:53             ` [PATCH v2 3/4] ftrace: add max execution time mesurement to workqueue tracer KOSAKI Motohiro
2009-04-13 16:16               ` Frederic Weisbecker
2009-04-13 21:21                 ` Ingo Molnar
2009-04-14  1:43                 ` KOSAKI Motohiro
2009-04-14 11:40                   ` Frederic Weisbecker
2009-04-15  0:31                     ` KOSAKI Motohiro
2009-04-13  5:55             ` [PATCH v2 4/4] ftrace: add latecy mesurement feature " KOSAKI Motohiro
2009-04-14  2:50               ` KOSAKI Motohiro
2009-04-13 15:24             ` [PATCH v2 1/4] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
2009-04-14  4:03             ` [PATCH v2 5/4] ftrace, workqueuetrace: display work name KOSAKI Motohiro
2009-04-14 21:16               ` Frederic Weisbecker
2009-04-14 23:55                 ` KOSAKI Motohiro
2009-04-15  1:15                   ` Frederic Weisbecker
2009-04-15  6:13                     ` KOSAKI Motohiro
2009-04-15  6:17                       ` Zhaolei
2009-04-15  9:44                       ` Ingo Molnar
2009-04-15 16:23                       ` Frederic Weisbecker
2009-04-17  6:56                       ` [PATCH v3 0/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Zhaolei
2009-04-17  7:15                         ` [PATCH v3 1/1] " Zhaolei
2009-04-17 13:45                           ` Ingo Molnar
2009-04-20  1:30                             ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueuetracepoints " Zhaolei
2009-04-20  1:38                               ` KOSAKI Motohiro
2009-04-20  1:43                                 ` Zhaolei
2009-04-20  1:49                                   ` KOSAKI Motohiro
2009-04-20  8:46                                     ` Ingo Molnar
2009-04-20 22:25                                       ` Oleg Nesterov
2009-04-20 23:48                                         ` Frederic Weisbecker
2009-04-21 15:28                                           ` Oleg Nesterov
2009-04-21 15:50                                             ` Oleg Nesterov
2009-04-21 18:33                                               ` Frederic Weisbecker
2009-04-21 18:28                                             ` Frederic Weisbecker
2009-04-21 19:37                                               ` Oleg Nesterov
2009-04-24 11:42                             ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Zhaolei
2009-04-24 11:43                               ` [PATCH 1/4] workqueue_tracepoint: introduce workqueue_handler_exit tracepoint and rename workqueue_execution to workqueue_handler_entry Zhaolei
2009-04-24 11:45                               ` [PATCH 2/4] workqueue_tracepoint: Add workqueue_flush and worklet_cancel tracepoint Zhaolei
2009-04-24 11:45                               ` [PATCH 3/4] workqueue_tracepoint: Change tracepoint name to fit worklet and workqueue lifecycle Zhaolei
2009-04-24 11:46                               ` [PATCH 4/4] workqueue_trace: Separate worklet_insertion into worklet_enqueue and worklet_enqueue_delayed Zhaolei
2009-04-24 20:06                               ` [PATCH 0/4] workqueue_tracepoint: Add worklet tracepoints for worklet lifecycle tracing Andrew Morton
2009-04-24 22:59                                 ` Frederic Weisbecker
2009-04-24 23:20                                   ` Andrew Morton
2009-04-25  0:37                                     ` Frederic Weisbecker
2009-04-25  1:28                                       ` Andrew Morton
2009-04-25  2:00                                         ` Steven Rostedt
2009-04-25  2:24                                           ` Andrew Morton
2009-04-25  2:51                                             ` Steven Rostedt
2009-04-25  3:10                                               ` Andrew Morton
2009-04-25  3:32                                                 ` Steven Rostedt
2009-04-26 10:47                                         ` Ingo Molnar
2009-04-27  5:44                                           ` Andrew Morton
2009-04-27 15:02                                             ` Oleg Nesterov
2009-04-27 15:43                                               ` Ingo Molnar
2009-04-27 19:09                                                 ` Oleg Nesterov
2009-04-28 13:42                                               ` Frederic Weisbecker
2009-04-28 16:43                                                 ` Oleg Nesterov
2009-04-28 17:49                                                   ` Frederic Weisbecker
2009-04-24 23:27                                   ` Frederic Weisbecker
2009-04-28 17:24                                 ` Frank Ch. Eigler
2009-04-28 18:48                                   ` Andrew Morton
2009-04-28 20:51                                     ` Frank Ch. Eigler
2009-04-29  4:03                                     ` KOSAKI Motohiro
2009-04-29  4:29                                       ` Andrew Morton
2009-04-29  5:21                                         ` KOSAKI Motohiro
2009-04-20 17:11                           ` [PATCH v3 1/1] ftrace, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
2009-04-21  1:20                             ` KOSAKI Motohiro
2009-04-20  6:42                       ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Zhaolei
2009-04-20  6:58                         ` [PATCH 1/4] trace_workqueue: use list_for_each_entry() instead of list_for_each_entry_safe() Zhaolei
2009-04-20  7:26                           ` Frederic Weisbecker
2009-04-20  6:59                         ` [PATCH 2/4] trace_workqueue: Remove cpu_workqueue_stats->first_entry Zhaolei
2009-04-20  7:02                         ` [PATCH 3/4] trace_workqueue: Remove blank line between each cpu Zhaolei
2009-04-20  7:09                         ` [PATCH 4/4] trace_workqueue: Add worklet information Zhaolei
2009-04-20 11:36                           ` Frederic Weisbecker
2009-04-21  1:57                             ` Zhaolei
2009-04-21 23:39                               ` Frederic Weisbecker
2009-04-20  7:23                         ` [PATCH 0/4] ftrace, workqueuetrace: Add worklet informationame Frederic Weisbecker
2009-04-13 14:34     ` [PATCH 1/1] tracing, workqueuetrace: Make workqueue tracepoints use TRACE_EVENT macro Frederic Weisbecker
2009-04-13 14:13   ` Frederic Weisbecker
2009-04-15  6:47     ` KOSAKI Motohiro
2009-04-13 13:57 ` [PATCH 0/1] " Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).