All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Sleep Profiling v3
@ 2011-12-22  0:15 Arun Sharma
  2011-12-22  0:15 ` [PATCH] tracing, sched: Add a new tracepoint for sleeptime Arun Sharma
  0 siblings, 1 reply; 3+ messages in thread
From: Arun Sharma @ 2011-12-22  0:15 UTC (permalink / raw)
  To: linux-kernel; +Cc: Arun Sharma

Changes since v2:

* Zero {block,sleep}_start after reading them
* Fix trailing whitespace

Changes since v1:

* Define a new sched_stat tracepoint
* Rebased to -tip

When debugging latency, users want to know where their code
is executing on the CPU (cycle profiling) as well as where
it's waiting sleeping or waiting for IO. The following patch set
tries to address the latter.

Normal users can use this tracepoint without root privileges
and on a system with lots of context switches, load can be
reduced by filtering out uninteresting switches.

Sample command lines:

perf record -gPe sched:sched_stat_sleeptime --filter "sleeptime > 10000" -- ./test
perf report --stdio -g graph -G

Arun Sharma (1):
  tracing, sched: Add a new tracepoint for sleeptime

 include/trace/events/sched.h |   50 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c          |    1 +
 kernel/sched/fair.c          |    2 -
 3 files changed, 51 insertions(+), 2 deletions(-)

-- 
1.7.4


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] tracing, sched: Add a new tracepoint for sleeptime
  2011-12-22  0:15 [PATCH] Sleep Profiling v3 Arun Sharma
@ 2011-12-22  0:15 ` Arun Sharma
  2011-12-24  7:31   ` [tip:sched/core] sched/tracing: " tip-bot for Arun Sharma
  0 siblings, 1 reply; 3+ messages in thread
From: Arun Sharma @ 2011-12-22  0:15 UTC (permalink / raw)
  To: linux-kernel
  Cc: Arun Sharma, Peter Zijlstra, Steven Rostedt, Mathieu Desnoyers,
	Arnaldo Carvalho de Melo, Andrew Vagin, Frederic Weisbecker,
	Ingo Molnar

If CONFIG_SCHEDSTATS is defined, the kernel maintains
information about how long the task was sleeping or
in the case of iowait, blocking in the kernel before
getting woken up.

Note: this information is only provided for sched_fair.
Other scheduling classes may choose to provide this in
the future.

Note: the delay includes the time spent on the runqueue
as well.

Signed-off-by: Arun Sharma <asharma@fb.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: linux-kernel@vger.kernel.org
---
 include/trace/events/sched.h |   50 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c          |    1 +
 kernel/sched/fair.c          |    2 -
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 959ff18..3442c6d 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -363,6 +363,56 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
+#ifdef CREATE_TRACE_POINTS
+static inline u64 trace_get_sleeptime(struct task_struct *tsk)
+{
+#ifdef CONFIG_SCHEDSTATS
+	u64 block, sleep;
+
+	block = tsk->se.statistics.block_start;
+	sleep = tsk->se.statistics.sleep_start;
+	tsk->se.statistics.block_start = 0;
+	tsk->se.statistics.sleep_start = 0;
+
+	return block ? block : sleep ? sleep : 0;
+#else
+	return 0;
+#endif
+}
+#endif
+
+/*
+ * Tracepoint for accounting sleeptime (time the task is sleeping
+ * or waiting for I/O).
+ */
+TRACE_EVENT(sched_stat_sleeptime,
+
+	TP_PROTO(struct task_struct *tsk, u64 now),
+
+	TP_ARGS(tsk, now),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	sleeptime		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->sleeptime = trace_get_sleeptime(tsk);
+		__entry->sleeptime = __entry->sleeptime ?
+				now - __entry->sleeptime : 0;
+	)
+	TP_perf_assign(
+		__perf_count(__entry->sleeptime);
+	),
+
+	TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->sleeptime)
+);
+
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ca8fd44..172e6ee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1899,6 +1899,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 	finish_lock_switch(rq, prev);
+	trace_sched_stat_sleeptime(current, rq->clock);
 
 	fire_sched_in_preempt_notifiers(current);
 	if (mm)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cd3b642..86deb3b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,7 +1003,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.sleep_max))
 			se->statistics.sleep_max = delta;
 
-		se->statistics.sleep_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
@@ -1020,7 +1019,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.block_max))
 			se->statistics.block_max = delta;
 
-		se->statistics.block_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
-- 
1.7.4


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [tip:sched/core] sched/tracing: Add a new tracepoint for sleeptime
  2011-12-22  0:15 ` [PATCH] tracing, sched: Add a new tracepoint for sleeptime Arun Sharma
@ 2011-12-24  7:31   ` tip-bot for Arun Sharma
  0 siblings, 0 replies; 3+ messages in thread
From: tip-bot for Arun Sharma @ 2011-12-24  7:31 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, mathieu.desnoyers, a.p.zijlstra,
	avagin, fweisbec, rostedt, acme, tglx, mingo, asharma

Commit-ID:  1ac9bc6943edf7d181b4b1cc734981350d4f6bae
Gitweb:     http://git.kernel.org/tip/1ac9bc6943edf7d181b4b1cc734981350d4f6bae
Author:     Arun Sharma <asharma@fb.com>
AuthorDate: Wed, 21 Dec 2011 16:15:40 -0800
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 23 Dec 2011 17:56:17 +0100

sched/tracing: Add a new tracepoint for sleeptime

If CONFIG_SCHEDSTATS is defined, the kernel maintains
information about how long the task was sleeping or
in the case of iowait, blocking in the kernel before
getting woken up.

This will be useful for sleep time profiling.

Note: this information is only provided for sched_fair.
Other scheduling classes may choose to provide this in
the future.

Note: the delay includes the time spent on the runqueue
as well.

Signed-off-by: Arun Sharma <asharma@fb.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1324512940-32060-2-git-send-email-asharma@fb.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h |   50 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c          |    1 +
 kernel/sched/fair.c          |    2 -
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index e33ed1b..6ba596b 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -370,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
+#ifdef CREATE_TRACE_POINTS
+static inline u64 trace_get_sleeptime(struct task_struct *tsk)
+{
+#ifdef CONFIG_SCHEDSTATS
+	u64 block, sleep;
+
+	block = tsk->se.statistics.block_start;
+	sleep = tsk->se.statistics.sleep_start;
+	tsk->se.statistics.block_start = 0;
+	tsk->se.statistics.sleep_start = 0;
+
+	return block ? block : sleep ? sleep : 0;
+#else
+	return 0;
+#endif
+}
+#endif
+
+/*
+ * Tracepoint for accounting sleeptime (time the task is sleeping
+ * or waiting for I/O).
+ */
+TRACE_EVENT(sched_stat_sleeptime,
+
+	TP_PROTO(struct task_struct *tsk, u64 now),
+
+	TP_ARGS(tsk, now),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	sleeptime		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->sleeptime = trace_get_sleeptime(tsk);
+		__entry->sleeptime = __entry->sleeptime ?
+				now - __entry->sleeptime : 0;
+	)
+	TP_perf_assign(
+		__perf_count(__entry->sleeptime);
+	),
+
+	TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->sleeptime)
+);
+
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8ffe523..4dbfd04 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1937,6 +1937,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	local_irq_enable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 	finish_lock_switch(rq, prev);
+	trace_sched_stat_sleeptime(current, rq->clock);
 
 	fire_sched_in_preempt_notifiers(current);
 	if (mm)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bdf1883..8e42de9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,7 +1003,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.sleep_max))
 			se->statistics.sleep_max = delta;
 
-		se->statistics.sleep_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
@@ -1020,7 +1019,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if (unlikely(delta > se->statistics.block_max))
 			se->statistics.block_max = delta;
 
-		se->statistics.block_start = 0;
 		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-12-24  7:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-12-22  0:15 [PATCH] Sleep Profiling v3 Arun Sharma
2011-12-22  0:15 ` [PATCH] tracing, sched: Add a new tracepoint for sleeptime Arun Sharma
2011-12-24  7:31   ` [tip:sched/core] sched/tracing: " tip-bot for Arun Sharma

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.