All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace
@ 2012-10-17  2:56 Hiraku Toyooka
  2012-10-17  2:56 ` [PATCH v2 -tip 1/4] tracing: change tracer's integer flags to bool Hiraku Toyooka
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Hiraku Toyooka @ 2012-10-17  2:56 UTC (permalink / raw)
  To: rostedt; +Cc: yrl.pp-manager.tt, linux-kernel

Hi, Steven,

Thank you for your review.
I changed the function name "resize_buffer_even" to 
"resize_buffer_duplicate_size".

(v1: https://lkml.org/lkml/2012/10/2/67)

---

Hiraku Toyooka (4):
      tracing: add description of snapshot to Documentation/trace/ftrace.txt
      tracing: make a snapshot feature available from userspace
      tracing: add a resize function for making one buffer equivalent to the other buffer
      tracing: change tracer's integer flags to bool


 Documentation/trace/ftrace.txt    |   97 ++++++++++++++
 include/linux/ftrace_event.h      |    3 
 kernel/trace/Kconfig              |   11 ++
 kernel/trace/trace.c              |  248 +++++++++++++++++++++++++++++++------
 kernel/trace/trace.h              |    5 -
 kernel/trace/trace_irqsoff.c      |   12 +-
 kernel/trace/trace_sched_wakeup.c |    8 +
 7 files changed, 333 insertions(+), 51 deletions(-)

-- 
Hiraku TOYOOKA
Linux Technology Center
Yokohama Research Laboratory
Hitachi Ltd.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 -tip  1/4] tracing: change tracer's integer flags to bool
  2012-10-17  2:56 [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
@ 2012-10-17  2:56 ` Hiraku Toyooka
  2012-10-17  2:56 ` [PATCH v2 -tip 2/4] tracing: add a resize function for making one buffer equivalent to the other buffer Hiraku Toyooka
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 13+ messages in thread
From: Hiraku Toyooka @ 2012-10-17  2:56 UTC (permalink / raw)
  To: rostedt
  Cc: yrl.pp-manager.tt, linux-kernel, Hiraku Toyooka, Steven Rostedt,
	Frederic Weisbecker, Ingo Molnar, linux-kernel

print_max and use_max_tr in struct tracer are "int" variables and
used like flags. This is wasteful, so change the type to "bool".

Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-kernel@vger.kernel.org
---

 kernel/trace/trace.h              |    4 ++--
 kernel/trace/trace_irqsoff.c      |   12 ++++++------
 kernel/trace/trace_sched_wakeup.c |    8 ++++----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 593debe..0eb6a1a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -285,8 +285,8 @@ struct tracer {
 	int			(*set_flag)(u32 old_flags, u32 bit, int set);
 	struct tracer		*next;
 	struct tracer_flags	*flags;
-	int			print_max;
-	int			use_max_tr;
+	bool			print_max;
+	bool			use_max_tr;
 };
 
 
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index d98ee82..5c823c1 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -604,7 +604,7 @@ static struct tracer irqsoff_tracer __read_mostly =
 	.reset		= irqsoff_tracer_reset,
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header   = irqsoff_print_header,
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
@@ -614,7 +614,7 @@ static struct tracer irqsoff_tracer __read_mostly =
 #endif
 	.open           = irqsoff_trace_open,
 	.close          = irqsoff_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 # define register_irqsoff(trace) register_tracer(&trace)
 #else
@@ -637,7 +637,7 @@ static struct tracer preemptoff_tracer __read_mostly =
 	.reset		= irqsoff_tracer_reset,
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header   = irqsoff_print_header,
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
@@ -647,7 +647,7 @@ static struct tracer preemptoff_tracer __read_mostly =
 #endif
 	.open		= irqsoff_trace_open,
 	.close		= irqsoff_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 # define register_preemptoff(trace) register_tracer(&trace)
 #else
@@ -672,7 +672,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
 	.reset		= irqsoff_tracer_reset,
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header   = irqsoff_print_header,
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
@@ -682,7 +682,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
 #endif
 	.open		= irqsoff_trace_open,
 	.close		= irqsoff_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 
 # define register_preemptirqsoff(trace) register_tracer(&trace)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 02170c0..43f8abc 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -589,7 +589,7 @@ static struct tracer wakeup_tracer __read_mostly =
 	.reset		= wakeup_tracer_reset,
 	.start		= wakeup_tracer_start,
 	.stop		= wakeup_tracer_stop,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header	= wakeup_print_header,
 	.print_line	= wakeup_print_line,
 	.flags		= &tracer_flags,
@@ -599,7 +599,7 @@ static struct tracer wakeup_tracer __read_mostly =
 #endif
 	.open		= wakeup_trace_open,
 	.close		= wakeup_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 
 static struct tracer wakeup_rt_tracer __read_mostly =
@@ -610,7 +610,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
 	.start		= wakeup_tracer_start,
 	.stop		= wakeup_tracer_stop,
 	.wait_pipe	= poll_wait_pipe,
-	.print_max	= 1,
+	.print_max	= true,
 	.print_header	= wakeup_print_header,
 	.print_line	= wakeup_print_line,
 	.flags		= &tracer_flags,
@@ -620,7 +620,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
 #endif
 	.open		= wakeup_trace_open,
 	.close		= wakeup_trace_close,
-	.use_max_tr	= 1,
+	.use_max_tr	= true,
 };
 
 __init static int init_wakeup_tracer(void)


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 -tip 2/4] tracing: add a resize function for making one buffer equivalent to the other buffer
  2012-10-17  2:56 [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
  2012-10-17  2:56 ` [PATCH v2 -tip 1/4] tracing: change tracer's integer flags to bool Hiraku Toyooka
@ 2012-10-17  2:56 ` Hiraku Toyooka
  2012-10-17  2:56 ` [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 13+ messages in thread
From: Hiraku Toyooka @ 2012-10-17  2:56 UTC (permalink / raw)
  To: rostedt
  Cc: yrl.pp-manager.tt, linux-kernel, Hiraku Toyooka, Steven Rostedt,
	Frederic Weisbecker, Ingo Molnar, linux-kernel

Trace buffer size is now per-cpu, so that there are following two
patterns in resize of the buffers.

  (1) resize per-cpu buffers to same given size
  (2) resize per-cpu buffers to the other trace_array's buffer size
      for each CPU (such as preparing the max_tr which is equivalent
      to the global_trace's size)

__tracing_resize_ring_buffer() can be used for (1), and had
implemented (2) inside it for resetting the global_trace to the
original size.

(2) was also implemented in other place. So this patch assembles
them in a new function - resize_buffer_duplicate_size().

Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-kernel@vger.kernel.org
---

 kernel/trace/trace.c |   58 +++++++++++++++++++++++++++-----------------------
 1 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 08acf42..d71eee1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3017,6 +3017,31 @@ static void set_buffer_entries(struct trace_array *tr, unsigned long val)
 		tr->data[cpu]->entries = val;
 }
 
+/* resize @tr's buffer to the size of @size_tr's entries */
+static int resize_buffer_duplicate_size(struct trace_array *tr,
+					struct trace_array *size_tr, int cpu_id)
+{
+	int cpu, ret = 0;
+
+	if (cpu_id == RING_BUFFER_ALL_CPUS) {
+		for_each_tracing_cpu(cpu) {
+			ret = ring_buffer_resize(tr->buffer,
+					size_tr->data[cpu]->entries, cpu);
+			if (ret < 0)
+				break;
+			tr->data[cpu]->entries = size_tr->data[cpu]->entries;
+		}
+	} else {
+		ret = ring_buffer_resize(tr->buffer,
+					size_tr->data[cpu_id]->entries, cpu_id);
+		if (ret == 0)
+			tr->data[cpu_id]->entries =
+				size_tr->data[cpu_id]->entries;
+	}
+
+	return ret;
+}
+
 static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 {
 	int ret;
@@ -3037,23 +3062,8 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 
 	ret = ring_buffer_resize(max_tr.buffer, size, cpu);
 	if (ret < 0) {
-		int r = 0;
-
-		if (cpu == RING_BUFFER_ALL_CPUS) {
-			int i;
-			for_each_tracing_cpu(i) {
-				r = ring_buffer_resize(global_trace.buffer,
-						global_trace.data[i]->entries,
-						i);
-				if (r < 0)
-					break;
-			}
-		} else {
-			r = ring_buffer_resize(global_trace.buffer,
-						global_trace.data[cpu]->entries,
-						cpu);
-		}
-
+		int r = resize_buffer_duplicate_size(&global_trace,
+						     &global_trace, cpu);
 		if (r < 0) {
 			/*
 			 * AARGH! We are left with different
@@ -3191,17 +3201,11 @@ static int tracing_set_tracer(const char *buf)
 
 	topts = create_trace_option_files(t);
 	if (t->use_max_tr) {
-		int cpu;
 		/* we need to make per cpu buffer sizes equivalent */
-		for_each_tracing_cpu(cpu) {
-			ret = ring_buffer_resize(max_tr.buffer,
-						global_trace.data[cpu]->entries,
-						cpu);
-			if (ret < 0)
-				goto out;
-			max_tr.data[cpu]->entries =
-					global_trace.data[cpu]->entries;
-		}
+		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
+						   RING_BUFFER_ALL_CPUS);
+		if (ret < 0)
+			goto out;
 	}
 
 	if (t->init) {


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-10-17  2:56 [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
  2012-10-17  2:56 ` [PATCH v2 -tip 1/4] tracing: change tracer's integer flags to bool Hiraku Toyooka
  2012-10-17  2:56 ` [PATCH v2 -tip 2/4] tracing: add a resize function for making one buffer equivalent to the other buffer Hiraku Toyooka
@ 2012-10-17  2:56 ` Hiraku Toyooka
  2012-11-16  1:46   ` Steven Rostedt
  2012-10-17  2:56 ` [PATCH v2 -tip 4/4] tracing: add description of snapshot to Documentation/trace/ftrace.txt Hiraku Toyooka
  2012-11-16  0:16 ` [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Steven Rostedt
  4 siblings, 1 reply; 13+ messages in thread
From: Hiraku Toyooka @ 2012-10-17  2:56 UTC (permalink / raw)
  To: rostedt
  Cc: yrl.pp-manager.tt, linux-kernel, Hiraku Toyooka, Steven Rostedt,
	Frederic Weisbecker, Ingo Molnar, Jiri Olsa, Li Zefan,
	linux-kernel

Ftrace has a snapshot feature available from kernel space and
latency tracers (e.g. irqsoff) are using it. This patch enables
user applictions to take a snapshot via debugfs.

Add following two debugfs files in "tracing" directory.

  snapshot:
    This is used to take a snapshot and to read the output of the
    snapshot.
     # echo 1 > snapshot
     # cat snapshot

  snapshot_allocate:
    Echoing 1 to the "snapshot" allocates the max_tr buffer if
    it is not allocated. So taking a snapshot may delay or fail
    beacuse of memory allocation. To avoid that, this file
    provides a mean to pre-allocate (or free) the max_tr buffer.
     # echo 1 > snapshot_allocate
     [...]
     # echo 1 > snapshot

Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: linux-kernel@vger.kernel.org
---

 include/linux/ftrace_event.h |    3 +
 kernel/trace/Kconfig         |   11 ++
 kernel/trace/trace.c         |  190 +++++++++++++++++++++++++++++++++++++++---
 kernel/trace/trace.h         |    1 
 4 files changed, 193 insertions(+), 12 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 642928c..8c32c6e 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -84,6 +84,9 @@ struct trace_iterator {
 	long			idx;
 
 	cpumask_var_t		started;
+
+	/* it's true when current open file is snapshot */
+	bool			snapshot;
 };
 
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4cea4f4..73d56d5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -102,6 +102,17 @@ config RING_BUFFER_ALLOW_SWAP
 	 Allow the use of ring_buffer_swap_cpu.
 	 Adds a very slight overhead to tracing when enabled.
 
+config TRACER_SNAPSHOT
+	bool
+	default y
+	select TRACER_MAX_TRACE
+	help
+	  Allow tracing users to take snapshot of the current buffer using the
+	  ftrace interface, e.g.:
+
+	      echo 1 > /sys/kernel/debug/tracing/snapshot
+	      cat snapshot
+
 # All tracer options should select GENERIC_TRACER. For those options that are
 # enabled by all tracers (context switch and event tracer) they select TRACING.
 # This allows those options to appear when no other tracer is selected. But the
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d71eee1..b0d097e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -699,7 +699,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-	if (!current_trace->use_max_tr) {
+	if (!current_trace->allocated_snapshot) {
 		WARN_ON_ONCE(1);
 		return;
 	}
@@ -729,7 +729,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-	if (!current_trace->use_max_tr) {
+	if (!current_trace->allocated_snapshot) {
 		WARN_ON_ONCE(1);
 		return;
 	}
@@ -1902,7 +1902,8 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	}
 	mutex_unlock(&trace_types_lock);
 
-	atomic_inc(&trace_record_cmdline_disabled);
+	if (!iter->snapshot)
+		atomic_inc(&trace_record_cmdline_disabled);
 
 	if (*pos != iter->pos) {
 		iter->ent = NULL;
@@ -1941,7 +1942,8 @@ static void s_stop(struct seq_file *m, void *p)
 {
 	struct trace_iterator *iter = m->private;
 
-	atomic_dec(&trace_record_cmdline_disabled);
+	if (!iter->snapshot)
+		atomic_dec(&trace_record_cmdline_disabled);
 	trace_access_unlock(iter->cpu_file);
 	trace_event_read_unlock();
 }
@@ -2375,7 +2377,7 @@ static const struct seq_operations tracer_seq_ops = {
 };
 
 static struct trace_iterator *
-__tracing_open(struct inode *inode, struct file *file)
+__tracing_open(struct inode *inode, struct file *file, int snapshot)
 {
 	long cpu_file = (long) inode->i_private;
 	struct trace_iterator *iter;
@@ -2408,10 +2410,11 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
 		goto fail;
 
-	if (current_trace && current_trace->print_max)
+	if ((current_trace && current_trace->print_max) || snapshot)
 		iter->tr = &max_tr;
 	else
 		iter->tr = &global_trace;
+	iter->snapshot = !!snapshot;
 	iter->pos = -1;
 	mutex_init(&iter->mutex);
 	iter->cpu_file = cpu_file;
@@ -2424,8 +2427,9 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (ring_buffer_overruns(iter->tr->buffer))
 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
 
-	/* stop the trace while dumping */
-	tracing_stop();
+	/* stop the trace while dumping if we are not opening "snapshot" */
+	if (!iter->snapshot)
+		tracing_stop();
 
 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
 		for_each_tracing_cpu(cpu) {
@@ -2488,8 +2492,9 @@ static int tracing_release(struct inode *inode, struct file *file)
 	if (iter->trace && iter->trace->close)
 		iter->trace->close(iter);
 
-	/* reenable tracing if it was previously enabled */
-	tracing_start();
+	if (!iter->snapshot)
+		/* reenable tracing if it was previously enabled */
+		tracing_start();
 	mutex_unlock(&trace_types_lock);
 
 	mutex_destroy(&iter->mutex);
@@ -2517,7 +2522,7 @@ static int tracing_open(struct inode *inode, struct file *file)
 	}
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(inode, file);
+		iter = __tracing_open(inode, file, 0);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3186,7 +3191,8 @@ static int tracing_set_tracer(const char *buf)
 	trace_branch_disable();
 	if (current_trace && current_trace->reset)
 		current_trace->reset(tr);
-	if (current_trace && current_trace->use_max_tr) {
+	if (current_trace && current_trace->allocated_snapshot) {
+		tracing_reset_online_cpus(&max_tr);
 		/*
 		 * We don't free the ring buffer. instead, resize it because
 		 * The max_tr ring buffer has some state (e.g. ring->clock) and
@@ -3194,6 +3200,7 @@ static int tracing_set_tracer(const char *buf)
 		 */
 		ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
 		set_buffer_entries(&max_tr, 1);
+		current_trace->allocated_snapshot = false;
 	}
 	destroy_trace_option_files(topts);
 
@@ -3206,6 +3213,7 @@ static int tracing_set_tracer(const char *buf)
 						   RING_BUFFER_ALL_CPUS);
 		if (ret < 0)
 			goto out;
+		t->allocated_snapshot = true;
 	}
 
 	if (t->init) {
@@ -4029,6 +4037,139 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
 	return single_open(file, tracing_clock_show, NULL);
 }
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+static int tracing_snapshot_open(struct inode *inode, struct file *file)
+{
+	struct trace_iterator *iter;
+	int ret = 0;
+
+	if (file->f_mode & FMODE_READ) {
+		iter = __tracing_open(inode, file, 1);
+		if (IS_ERR(iter))
+			ret = PTR_ERR(iter);
+	}
+	return ret;
+}
+
+static ssize_t tracing_snapshot_read(struct file *filp, char __user *ubuf,
+				     size_t cnt, loff_t *ppos)
+{
+	ssize_t ret = 0;
+
+	mutex_lock(&trace_types_lock);
+	if (current_trace && current_trace->use_max_tr)
+		ret = -EBUSY;
+	mutex_unlock(&trace_types_lock);
+	if (ret < 0)
+		return ret;
+
+	ret = seq_read(filp, ubuf, cnt, ppos);
+
+	return ret;
+}
+
+static ssize_t
+tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+		       loff_t *ppos)
+{
+	unsigned long val = 0;
+	int ret;
+
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&trace_types_lock);
+
+	if (current_trace->use_max_tr) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (!current_trace->allocated_snapshot) {
+		/* allocate spare buffer for snapshot */
+		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
+						   RING_BUFFER_ALL_CPUS);
+		if (ret < 0)
+			goto out;
+		current_trace->allocated_snapshot = true;
+	}
+
+	if (val) {
+		local_irq_disable();
+		update_max_tr(&global_trace, current, smp_processor_id());
+		local_irq_enable();
+	} else
+		tracing_reset_online_cpus(&max_tr);
+
+	*ppos += cnt;
+	ret = cnt;
+out:
+	mutex_unlock(&trace_types_lock);
+	return ret;
+}
+
+static ssize_t
+tracing_snapshot_allocate_read(struct file *filp, char __user *ubuf,
+			       size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = sprintf(buf, "%d\n", current_trace->allocated_snapshot);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_snapshot_allocate_write(struct file *filp, const char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&trace_types_lock);
+
+	if (current_trace->use_max_tr) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (current_trace->allocated_snapshot == !val) {
+		if (val) {
+			/* allocate spare buffer for snapshot */
+			ret = resize_buffer_duplicate_size(&max_tr,
+					&global_trace, RING_BUFFER_ALL_CPUS);
+			if (ret < 0)
+				goto out;
+		} else {
+			tracing_reset_online_cpus(&max_tr);
+			ring_buffer_resize(max_tr.buffer, 1,
+					   RING_BUFFER_ALL_CPUS);
+			set_buffer_entries(&max_tr, 1);
+		}
+		current_trace->allocated_snapshot = !!val;
+	}
+
+	*ppos += cnt;
+	ret = cnt;
+out:
+	mutex_unlock(&trace_types_lock);
+	return ret;
+}
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -4092,6 +4233,23 @@ static const struct file_operations trace_clock_fops = {
 	.write		= tracing_clock_write,
 };
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+static const struct file_operations snapshot_fops = {
+	.open		= tracing_snapshot_open,
+	.read		= tracing_snapshot_read,
+	.write		= tracing_snapshot_write,
+	.llseek		= tracing_seek,
+	.release	= tracing_release,
+};
+
+static const struct file_operations snapshot_allocate_fops = {
+	.open           = tracing_open_generic,
+	.read           = tracing_snapshot_allocate_read,
+	.write          = tracing_snapshot_allocate_write,
+	.llseek         = generic_file_llseek,
+};
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
 struct ftrace_buffer_info {
 	struct trace_array	*tr;
 	void			*spare;
@@ -4878,6 +5036,14 @@ static __init int tracer_init_debugfs(void)
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+	trace_create_file("snapshot", 0644, d_tracer,
+			  (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
+
+	trace_create_file("snapshot_allocate", 0644, d_tracer,
+			  NULL, &snapshot_allocate_fops);
+#endif
+
 	create_trace_options_dir();
 
 	for_each_tracing_cpu(cpu)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0eb6a1a..66a8631 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -287,6 +287,7 @@ struct tracer {
 	struct tracer_flags	*flags;
 	bool			print_max;
 	bool			use_max_tr;
+	bool			allocated_snapshot;
 };
 
 


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 -tip 4/4] tracing: add description of snapshot to Documentation/trace/ftrace.txt
  2012-10-17  2:56 [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
                   ` (2 preceding siblings ...)
  2012-10-17  2:56 ` [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
@ 2012-10-17  2:56 ` Hiraku Toyooka
  2012-11-16  0:16 ` [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Steven Rostedt
  4 siblings, 0 replies; 13+ messages in thread
From: Hiraku Toyooka @ 2012-10-17  2:56 UTC (permalink / raw)
  To: rostedt
  Cc: yrl.pp-manager.tt, linux-kernel, Hiraku Toyooka, Steven Rostedt,
	Frederic Weisbecker, Ingo Molnar, Rob Landley, linux-doc,
	linux-kernel

This patch adds snapshot description in ftrace documentation.
This description includes what the snapshot is and how to use it.

Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rob Landley <rob@landley.net>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---

 Documentation/trace/ftrace.txt |   97 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 97 insertions(+), 0 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 6f51fed..68ac294 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1842,6 +1842,103 @@ an error.
  # cat buffer_size_kb
 85
 
+Snapshot
+--------
+CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature
+available to all non latency tracers. (Latency tracers which
+record max latency, such as "irqsoff" or "wakeup", can't use
+this feature, since those are already using the snapshot
+mechanism internally.)
+
+Snapshot preserves a trace buffer at a particular point in time
+without stopping tracing. Ftrace swaps the current buffer with a
+spare buffer, and tracing continues in the (previous) spare
+buffer.
+
+The following debugfs files in "tracing" are related to this
+feature:
+
+  snapshot:
+
+	This is used to take a snapshot and to read the output
+	of the snapshot. Echo 1 into this file to allocate a
+	spare buffer and to take a snapshot, then read the
+	snapshot from the file in the same format as "trace"
+	(described above in the section "The File System"). Both
+	reads snapshot and tracing are executable in parallel.
+	Echoing 0 erases the snapshot contents.
+
+  snapshot_allocate:
+
+	This is used to pre-allocate or free a spare buffer.
+	Echo 1 into this file to pre-allocate a spare buffer if
+	you don't want to fail in the next snapshot due to
+	memory allocation failure, or if you don't want to lose
+	older trace data while allocating buffer. Echo 0 to free
+	the spare buffer when the snapshot becomes unnecessary.
+	If you take the next snapshot again, you can reuse the
+	buffer, then just erase the snapshot contents by echoing
+	1 into the "snapshot" file, instead of freeing the
+	buffer.
+
+	Reads from this file display whether the spare buffer is
+	allocated. When current_tracer is changed, the allocated
+	spare buffer is freed. If the next tracer is one of the
+	latency tracers, this value turns into 1 and can't be
+	changed, or else the value starts with 0.
+
+
+Here is an example of using the snapshot feature.
+
+ # echo 1 > snapshot_allocate (if you want to pre-allocate the spare buffer)
+ # echo 1 > events/sched/enable
+ # echo 1 > snapshot
+ # cat snapshot
+# tracer: nop
+#
+# entries-in-buffer/entries-written: 71/71   #P:8
+#
+#                              _-----=> irqs-off
+#                             / _----=> need-resched
+#                            | / _---=> hardirq/softirq
+#                            || / _--=> preempt-depth
+#                            ||| /     delay
+#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+#              | |       |   ||||       |         |
+          <idle>-0     [005] d...  2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120
+           sleep-2242  [005] d...  2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120
+[...]
+          <idle>-0     [002] d...  2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120
+ # cat trace
+# tracer: nop
+#
+# entries-in-buffer/entries-written: 77/77   #P:8
+#
+#                              _-----=> irqs-off
+#                             / _----=> need-resched
+#                            | / _---=> hardirq/softirq
+#                            || / _--=> preempt-depth
+#                            ||| /     delay
+#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+#              | |       |   ||||       |         |
+          <idle>-0     [007] d...  2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120
+ snapshot-test-2-2229  [002] d...  2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120
+[...]
+
+
+If you try to use this snapshot feature when current tracer is
+one of the latency tracers, you will get the following results.
+
+ # echo wakeup > current_tracer
+ # cat snapshot_allocate
+1
+ # echo 1 > snapshot_allocate
+bash: echo: write error: Device or resource busy
+ # echo 1 > snapshot
+bash: echo: write error: Device or resource busy
+ # cat snapshot
+cat: snapshot: Device or resource busy
+
 -----------
 
 More details can be found in the source code, in the


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace
  2012-10-17  2:56 [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
                   ` (3 preceding siblings ...)
  2012-10-17  2:56 ` [PATCH v2 -tip 4/4] tracing: add description of snapshot to Documentation/trace/ftrace.txt Hiraku Toyooka
@ 2012-11-16  0:16 ` Steven Rostedt
  4 siblings, 0 replies; 13+ messages in thread
From: Steven Rostedt @ 2012-11-16  0:16 UTC (permalink / raw)
  To: Hiraku Toyooka; +Cc: yrl.pp-manager.tt, linux-kernel

On Wed, 2012-10-17 at 11:56 +0900, Hiraku Toyooka wrote:
> Hi, Steven,
> 
> Thank you for your review.
> I changed the function name "resize_buffer_even" to 
> "resize_buffer_duplicate_size".
> 
> (v1: https://lkml.org/lkml/2012/10/2/67)
> 

OK, I finally got around to start playing with it. There's some things I
started to fix, but decided I'll just throw them back at you ;-)

I'll comment on the individual patches.

-- Steve



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-10-17  2:56 ` [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
@ 2012-11-16  1:46   ` Steven Rostedt
  2012-11-30  5:46     ` Hiraku Toyooka
  0 siblings, 1 reply; 13+ messages in thread
From: Steven Rostedt @ 2012-11-16  1:46 UTC (permalink / raw)
  To: Hiraku Toyooka
  Cc: yrl.pp-manager.tt, linux-kernel, Frederic Weisbecker,
	Ingo Molnar, Jiri Olsa, Li Zefan

On Wed, 2012-10-17 at 11:56 +0900, Hiraku Toyooka wrote:
> Ftrace has a snapshot feature available from kernel space and
> latency tracers (e.g. irqsoff) are using it. This patch enables
> user applictions to take a snapshot via debugfs.
> 
> Add following two debugfs files in "tracing" directory.
> 
>   snapshot:
>     This is used to take a snapshot and to read the output of the
>     snapshot.
>      # echo 1 > snapshot
>      # cat snapshot
> 
>   snapshot_allocate:
>     Echoing 1 to the "snapshot" allocates the max_tr buffer if
>     it is not allocated. So taking a snapshot may delay or fail
>     beacuse of memory allocation. To avoid that, this file
>     provides a mean to pre-allocate (or free) the max_tr buffer.
>      # echo 1 > snapshot_allocate
>      [...]
>      # echo 1 > snapshot

I was thinking about this some more, and I don't like the
snapshot_allocate part. Also, I think the snapshot should not be
allocated by default, and not used until the user explicitly asks for
it.

Have this:

---
 # cat snapshot
Snapshot is not allocated. To allocate it write the ASCII "1" into
this file:

  echo 1 > snapshot

This will allocate the buffer for you. To free the snapshot echo "0"
into this file.

  echo "0" > snapshot

Anything else will reset the snapshot if it is allocated, or return
EINVAL if it is not allocated.
---


Also we can add a "trace_snapshot" to the kernel parameters to have it
allocated on boot. But I can add that if you update these patches.



> 
> Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: Li Zefan <lizf@cn.fujitsu.com>
> Cc: linux-kernel@vger.kernel.org
> ---
> 
>  include/linux/ftrace_event.h |    3 +
>  kernel/trace/Kconfig         |   11 ++
>  kernel/trace/trace.c         |  190 +++++++++++++++++++++++++++++++++++++++---
>  kernel/trace/trace.h         |    1 
>  4 files changed, 193 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> index 642928c..8c32c6e 100644
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -84,6 +84,9 @@ struct trace_iterator {
>  	long			idx;
>  
>  	cpumask_var_t		started;
> +
> +	/* it's true when current open file is snapshot */
> +	bool			snapshot;
>  };
>  
> 
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 4cea4f4..73d56d5 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -102,6 +102,17 @@ config RING_BUFFER_ALLOW_SWAP
>  	 Allow the use of ring_buffer_swap_cpu.
>  	 Adds a very slight overhead to tracing when enabled.
>  

Move this config down below FTRACE_SYSCALLS and give it a prompt. As
well as do not make it default y.


> +config TRACER_SNAPSHOT
> +	bool

	bool "Create a snapshot trace buffer"


> +	default y
> +	select TRACER_MAX_TRACE
> +	help
> +	  Allow tracing users to take snapshot of the current buffer using the
> +	  ftrace interface, e.g.:
> +
> +	      echo 1 > /sys/kernel/debug/tracing/snapshot
> +	      cat snapshot
> +
>  # All tracer options should select GENERIC_TRACER. For those options that are
>  # enabled by all tracers (context switch and event tracer) they select TRACING.
>  # This allows those options to appear when no other tracer is selected. But the
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index d71eee1..b0d097e 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -699,7 +699,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
>  		return;
>  
>  	WARN_ON_ONCE(!irqs_disabled());
> -	if (!current_trace->use_max_tr) {
> +	if (!current_trace->allocated_snapshot) {
>  		WARN_ON_ONCE(1);
>  		return;
>  	}
> @@ -729,7 +729,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
>  		return;
>  
>  	WARN_ON_ONCE(!irqs_disabled());
> -	if (!current_trace->use_max_tr) {
> +	if (!current_trace->allocated_snapshot) {
>  		WARN_ON_ONCE(1);
>  		return;
>  	}
> @@ -1902,7 +1902,8 @@ static void *s_start(struct seq_file *m, loff_t *pos)
>  	}
>  	mutex_unlock(&trace_types_lock);
>  
> -	atomic_inc(&trace_record_cmdline_disabled);
> +	if (!iter->snapshot)
> +		atomic_inc(&trace_record_cmdline_disabled);
>  
>  	if (*pos != iter->pos) {
>  		iter->ent = NULL;
> @@ -1941,7 +1942,8 @@ static void s_stop(struct seq_file *m, void *p)
>  {
>  	struct trace_iterator *iter = m->private;
>  
> -	atomic_dec(&trace_record_cmdline_disabled);
> +	if (!iter->snapshot)
> +		atomic_dec(&trace_record_cmdline_disabled);
>  	trace_access_unlock(iter->cpu_file);
>  	trace_event_read_unlock();
>  }
> @@ -2375,7 +2377,7 @@ static const struct seq_operations tracer_seq_ops = {
>  };
>  
>  static struct trace_iterator *
> -__tracing_open(struct inode *inode, struct file *file)
> +__tracing_open(struct inode *inode, struct file *file, int snapshot)

	bool snapshot

>  {
>  	long cpu_file = (long) inode->i_private;
>  	struct trace_iterator *iter;
> @@ -2408,10 +2410,11 @@ __tracing_open(struct inode *inode, struct file *file)
>  	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
>  		goto fail;
>  
> -	if (current_trace && current_trace->print_max)
> +	if ((current_trace && current_trace->print_max) || snapshot)
>  		iter->tr = &max_tr;
>  	else
>  		iter->tr = &global_trace;
> +	iter->snapshot = !!snapshot;

Get rid of the !!

>  	iter->pos = -1;
>  	mutex_init(&iter->mutex);
>  	iter->cpu_file = cpu_file;
> @@ -2424,8 +2427,9 @@ __tracing_open(struct inode *inode, struct file *file)
>  	if (ring_buffer_overruns(iter->tr->buffer))
>  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
>  
> -	/* stop the trace while dumping */
> -	tracing_stop();
> +	/* stop the trace while dumping if we are not opening "snapshot" */
> +	if (!iter->snapshot)
> +		tracing_stop();
>  
>  	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
>  		for_each_tracing_cpu(cpu) {
> @@ -2488,8 +2492,9 @@ static int tracing_release(struct inode *inode, struct file *file)
>  	if (iter->trace && iter->trace->close)
>  		iter->trace->close(iter);
>  
> -	/* reenable tracing if it was previously enabled */
> -	tracing_start();
> +	if (!iter->snapshot)
> +		/* reenable tracing if it was previously enabled */
> +		tracing_start();
>  	mutex_unlock(&trace_types_lock);
>  
>  	mutex_destroy(&iter->mutex);
> @@ -2517,7 +2522,7 @@ static int tracing_open(struct inode *inode, struct file *file)
>  	}
>  
>  	if (file->f_mode & FMODE_READ) {
> -		iter = __tracing_open(inode, file);
> +		iter = __tracing_open(inode, file, 0);

	, false)

>  		if (IS_ERR(iter))
>  			ret = PTR_ERR(iter);
>  		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
> @@ -3186,7 +3191,8 @@ static int tracing_set_tracer(const char *buf)
>  	trace_branch_disable();
>  	if (current_trace && current_trace->reset)
>  		current_trace->reset(tr);
> -	if (current_trace && current_trace->use_max_tr) {
> +	if (current_trace && current_trace->allocated_snapshot) {
> +		tracing_reset_online_cpus(&max_tr);

max_tr->buffer could be NULL.

Either test here, or better yet, put the test into
tracing_reset_online_cpus().

	if (!buffer)
		return;

>  		/*
>  		 * We don't free the ring buffer. instead, resize it because
>  		 * The max_tr ring buffer has some state (e.g. ring->clock) and
> @@ -3194,6 +3200,7 @@ static int tracing_set_tracer(const char *buf)
>  		 */
>  		ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
>  		set_buffer_entries(&max_tr, 1);
> +		current_trace->allocated_snapshot = false;
>  	}
>  	destroy_trace_option_files(topts);
>  
> @@ -3206,6 +3213,7 @@ static int tracing_set_tracer(const char *buf)
>  						   RING_BUFFER_ALL_CPUS);
>  		if (ret < 0)
>  			goto out;
> +		t->allocated_snapshot = true;
>  	}
>  
>  	if (t->init) {
> @@ -4029,6 +4037,139 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
>  	return single_open(file, tracing_clock_show, NULL);
>  }
>  
> +#ifdef CONFIG_TRACER_SNAPSHOT
> +static int tracing_snapshot_open(struct inode *inode, struct file *file)
> +{
> +	struct trace_iterator *iter;
> +	int ret = 0;
> +
> +	if (file->f_mode & FMODE_READ) {
> +		iter = __tracing_open(inode, file, 1);
> +		if (IS_ERR(iter))
> +			ret = PTR_ERR(iter);
> +	}
> +	return ret;
> +}
> +
> +static ssize_t tracing_snapshot_read(struct file *filp, char __user *ubuf,
> +				     size_t cnt, loff_t *ppos)
> +{
> +	ssize_t ret = 0;
> +
> +	mutex_lock(&trace_types_lock);
> +	if (current_trace && current_trace->use_max_tr)
> +		ret = -EBUSY;
> +	mutex_unlock(&trace_types_lock);

I don't like this, as it is racy. The current tracer could change after
the unlock, and your back to the problem.

Now what we may be able to do, but it would take a little checking for
lock ordering with trace_access_lock() and trace_event_read_lock(), but
we could add the mutex to trace_types_lock to both s_start() and
s_stop() and do the check in s_start() if iter->snapshot is true.

This means we can nuke the tracing_snapshot_read() and just use
seq_read() directly.

> +	if (ret < 0)
> +		return ret;
> +
> +	ret = seq_read(filp, ubuf, cnt, ppos);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
> +		       loff_t *ppos)
> +{
> +	unsigned long val = 0;
> +	int ret;
> +
> +	ret = tracing_update_buffers();
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	mutex_lock(&trace_types_lock);
> +
> +	if (current_trace->use_max_tr) {
> +		ret = -EBUSY;
> +		goto out;
> +	}

Again, I would check the value of 'val' and if it is '1' then allocate
the buffers, '0' free them, and anything else just clear them or return
EINVAL if the buffers are not allocated.

If it is '1' and the buffers are already allocated, then clear them too.

Then we can just remove the 'snapshot_allocate' file.

Thanks!

-- Steve

> +
> +	if (!current_trace->allocated_snapshot) {
> +		/* allocate spare buffer for snapshot */
> +		ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
> +						   RING_BUFFER_ALL_CPUS);
> +		if (ret < 0)
> +			goto out;
> +		current_trace->allocated_snapshot = true;
> +	}
> +
> +	if (val) {
> +		local_irq_disable();
> +		update_max_tr(&global_trace, current, smp_processor_id());
> +		local_irq_enable();
> +	} else
> +		tracing_reset_online_cpus(&max_tr);
> +
> +	*ppos += cnt;
> +	ret = cnt;
> +out:
> +	mutex_unlock(&trace_types_lock);
> +	return ret;
> +}
> +
> +static ssize_t
> +tracing_snapshot_allocate_read(struct file *filp, char __user *ubuf,
> +			       size_t cnt, loff_t *ppos)
> +{
> +	char buf[64];
> +	int r;
> +
> +	r = sprintf(buf, "%d\n", current_trace->allocated_snapshot);
> +	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
> +}
> +
> +static ssize_t
> +tracing_snapshot_allocate_write(struct file *filp, const char __user *ubuf,
> +				size_t cnt, loff_t *ppos)
> +{
> +	unsigned long val;
> +	int ret;
> +
> +	ret = tracing_update_buffers();
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
> +	if (ret)
> +		return ret;
> +
> +	mutex_lock(&trace_types_lock);
> +
> +	if (current_trace->use_max_tr) {
> +		ret = -EBUSY;
> +		goto out;
> +	}
> +
> +	if (current_trace->allocated_snapshot == !val) {
> +		if (val) {
> +			/* allocate spare buffer for snapshot */
> +			ret = resize_buffer_duplicate_size(&max_tr,
> +					&global_trace, RING_BUFFER_ALL_CPUS);
> +			if (ret < 0)
> +				goto out;
> +		} else {
> +			tracing_reset_online_cpus(&max_tr);
> +			ring_buffer_resize(max_tr.buffer, 1,
> +					   RING_BUFFER_ALL_CPUS);
> +			set_buffer_entries(&max_tr, 1);
> +		}
> +		current_trace->allocated_snapshot = !!val;
> +	}
> +
> +	*ppos += cnt;
> +	ret = cnt;
> +out:
> +	mutex_unlock(&trace_types_lock);
> +	return ret;
> +}
> +#endif /* CONFIG_TRACER_SNAPSHOT */
> +
>  static const struct file_operations tracing_max_lat_fops = {
>  	.open		= tracing_open_generic,
>  	.read		= tracing_max_lat_read,
> @@ -4092,6 +4233,23 @@ static const struct file_operations trace_clock_fops = {
>  	.write		= tracing_clock_write,
>  };
>  
> +#ifdef CONFIG_TRACER_SNAPSHOT
> +static const struct file_operations snapshot_fops = {
> +	.open		= tracing_snapshot_open,
> +	.read		= tracing_snapshot_read,
> +	.write		= tracing_snapshot_write,
> +	.llseek		= tracing_seek,
> +	.release	= tracing_release,
> +};
> +
> +static const struct file_operations snapshot_allocate_fops = {
> +	.open           = tracing_open_generic,
> +	.read           = tracing_snapshot_allocate_read,
> +	.write          = tracing_snapshot_allocate_write,
> +	.llseek         = generic_file_llseek,
> +};
> +#endif /* CONFIG_TRACER_SNAPSHOT */
> +
>  struct ftrace_buffer_info {
>  	struct trace_array	*tr;
>  	void			*spare;
> @@ -4878,6 +5036,14 @@ static __init int tracer_init_debugfs(void)
>  			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
>  #endif
>  
> +#ifdef CONFIG_TRACER_SNAPSHOT
> +	trace_create_file("snapshot", 0644, d_tracer,
> +			  (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
> +
> +	trace_create_file("snapshot_allocate", 0644, d_tracer,
> +			  NULL, &snapshot_allocate_fops);
> +#endif
> +
>  	create_trace_options_dir();
>  
>  	for_each_tracing_cpu(cpu)
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 0eb6a1a..66a8631 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -287,6 +287,7 @@ struct tracer {
>  	struct tracer_flags	*flags;
>  	bool			print_max;
>  	bool			use_max_tr;
> +	bool			allocated_snapshot;
>  };
>  
> 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-11-16  1:46   ` Steven Rostedt
@ 2012-11-30  5:46     ` Hiraku Toyooka
  2012-11-30 14:17       ` Steven Rostedt
  0 siblings, 1 reply; 13+ messages in thread
From: Hiraku Toyooka @ 2012-11-30  5:46 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: yrl.pp-manager.tt, linux-kernel, Frederic Weisbecker,
	Ingo Molnar, Jiri Olsa, Li Zefan

Hi, Steven,

Thank you for your review.

(2012/11/16 10:46), Steven Rostedt wrote:
[snip]
 > I was thinking about this some more, and I don't like the
 > snapshot_allocate part. Also, I think the snapshot should not be
 > allocated by default, and not used until the user explicitly asks for
 > it.
 >
 > Have this:
 >
 > ---
 >  # cat snapshot
 > Snapshot is not allocated. To allocate it write the ASCII "1" into
 > this file:
 >
 >   echo 1 > snapshot
 >
 > This will allocate the buffer for you. To free the snapshot echo "0"
 > into this file.
 >
 >   echo "0" > snapshot
 >
 > Anything else will reset the snapshot if it is allocated, or return
 > EINVAL if it is not allocated.
 > ---
 >

Your idea about "snapshot" is like following table, isn't it?

  status\input |     0      |     1      |    else    |
--------------+------------+------------+------------+
not allocated |   EINVAL   | alloc+swap |   EINVAL   |
--------------+------------+------------+------------+
   allocated   |    free    | clear+swap |   clear    |
--------------+------------+------------+------------+

I think it is almost OK, but there is a problem.
When we echo "1" to the allocated snapshot, the clear operation adds
some delay because the time cost of tracing_reset_online_cpus() is in
proportion to the number of CPUs.
(It takes 72ms in my 8 CPU environment.)

So, when the snapshot is already cleared by echoing "else" values, we
can avoid the delay on echoing "1" by keeping "cleared" status
internally. For example, we can add the "cleared" flag to struct tracer.
What do you think about it?

 >
 > Also we can add a "trace_snapshot" to the kernel parameters to have it
 > allocated on boot. But I can add that if you update these patches.
 >

OK, I'll update my patches.

[snip]
 >> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
 >> index 4cea4f4..73d56d5 100644
 >> --- a/kernel/trace/Kconfig
 >> +++ b/kernel/trace/Kconfig
 >> @@ -102,6 +102,17 @@ config RING_BUFFER_ALLOW_SWAP
 >>       Allow the use of ring_buffer_swap_cpu.
 >>       Adds a very slight overhead to tracing when enabled.
 >>
 >
 > Move this config down below FTRACE_SYSCALLS and give it a prompt. As
 > well as do not make it default y.
 >

I'll modify it.

 >
 >> +config TRACER_SNAPSHOT
 >> +    bool
 >
 >     bool "Create a snapshot trace buffer"
 >

I'll fix it.


 >
 >> +    default y
 >> +    select TRACER_MAX_TRACE
 >> +    help
 >> +      Allow tracing users to take snapshot of the current buffer 
using the
 >> +      ftrace interface, e.g.:
 >> +
 >> +          echo 1 > /sys/kernel/debug/tracing/snapshot
 >> +          cat snapshot
 >> +
[snip]
 >>  static struct trace_iterator *
 >> -__tracing_open(struct inode *inode, struct file *file)
 >> +__tracing_open(struct inode *inode, struct file *file, int snapshot)
 >
 >     bool snapshot
 >

I'll fix it.


 >>  {
 >>      long cpu_file = (long) inode->i_private;
 >>      struct trace_iterator *iter;
 >> @@ -2408,10 +2410,11 @@ __tracing_open(struct inode *inode, struct 
file *file)
 >>      if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
 >>          goto fail;
 >>
 >> -    if (current_trace && current_trace->print_max)
 >> +    if ((current_trace && current_trace->print_max) || snapshot)
 >>          iter->tr = &max_tr;
 >>      else
 >>          iter->tr = &global_trace;
 >> +    iter->snapshot = !!snapshot;
 >
 > Get rid of the !!
 >

I'll fix it.

[snip]
 >> @@ -2517,7 +2522,7 @@ static int tracing_open(struct inode *inode, 
struct file *file)
 >>      }
 >>
 >>      if (file->f_mode & FMODE_READ) {
 >> -        iter = __tracing_open(inode, file);
 >> +        iter = __tracing_open(inode, file, 0);
 >
 >     , false)
 >

I'll fix it.

 >>          if (IS_ERR(iter))
 >>              ret = PTR_ERR(iter);
 >>          else if (trace_flags & TRACE_ITER_LATENCY_FMT)
 >> @@ -3186,7 +3191,8 @@ static int tracing_set_tracer(const char *buf)
 >>      trace_branch_disable();
 >>      if (current_trace && current_trace->reset)
 >>          current_trace->reset(tr);
 >> -    if (current_trace && current_trace->use_max_tr) {
 >> +    if (current_trace && current_trace->allocated_snapshot) {
 >> +        tracing_reset_online_cpus(&max_tr);
 >
 > max_tr->buffer could be NULL.
 >
 > Either test here, or better yet, put the test into
 > tracing_reset_online_cpus().
 >
 >     if (!buffer)
 >         return;
 >

I see. I'll add the test to tracing_reset_online_cpus(). Should I make a
separated patch?

[snip]
 >> +static ssize_t tracing_snapshot_read(struct file *filp, char __user 
*ubuf,
 >> +                     size_t cnt, loff_t *ppos)
 >> +{
 >> +    ssize_t ret = 0;
 >> +
 >> +    mutex_lock(&trace_types_lock);
 >> +    if (current_trace && current_trace->use_max_tr)
 >> +        ret = -EBUSY;
 >> +    mutex_unlock(&trace_types_lock);
 >
 > I don't like this, as it is racy. The current tracer could change after
 > the unlock, and your back to the problem.
 >

You're right...
This is racy.

 > Now what we may be able to do, but it would take a little checking for
 > lock ordering with trace_access_lock() and trace_event_read_lock(), but
 > we could add the mutex to trace_types_lock to both s_start() and
 > s_stop() and do the check in s_start() if iter->snapshot is true.
 >

If I catch your meaning, do s_start() and s_stop() become like following
code?
(Now, s_start() is used from two files - "trace" and "snapshot", so we
should change static "old_tracer" to per open-file.)

static void *s_start(struct seq_file *m, loff_t *pos)
{
      struct trace_iterator *iter = m->private;
-    static struct tracer *old_tracer;
...
      /* copy the tracer to avoid using a global lock all around */
      mutex_lock(&trace_types_lock);
-    if (unlikely(old_tracer != current_trace && current_trace)) {
-        old_tracer = current_trace;
+    if (unlikely(iter->old_tracer != current_trace && current_trace)) {
+        iter->old_tracer = current_trace;
          *iter->trace = *current_trace;
      }
      mutex_unlock(&trace_types_lock);

+    if (iter->snapshot && iter->trace->use_max_tr)
+        return ERR_PTR(-EBUSY);
+
...
}

static void s_stop(struct seq_file *m, void *p)
{
      struct trace_iterator *iter = m->private;

+    if (iter->snapshot && iter->trace->use_max_tr)
+        return;
...
}

 > This means we can nuke the tracing_snapshot_read() and just use
 > seq_read() directly.
 >

Yes, I see.

Thanks,
Hiraku Toyooka


 >> +    if (ret < 0)
 >> +        return ret;
 >> +
 >> +    ret = seq_read(filp, ubuf, cnt, ppos);
 >> +
 >> +    return ret;
 >> +}
 >> +
 >> +static ssize_t
 >> +tracing_snapshot_write(struct file *filp, const char __user *ubuf,
size_t cnt,
 >> +               loff_t *ppos)
 >> +{
 >> +    unsigned long val = 0;
 >> +    int ret;
 >> +
 >> +    ret = tracing_update_buffers();
 >> +    if (ret < 0)
 >> +        return ret;
 >> +
 >> +    ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
 >> +    if (ret)
 >> +        return ret;
 >> +
 >> +    mutex_lock(&trace_types_lock);
 >> +
 >> +    if (current_trace->use_max_tr) {
 >> +        ret = -EBUSY;
 >> +        goto out;
 >> +    }
 >
 > Again, I would check the value of 'val' and if it is '1' then allocate
 > the buffers, '0' free them, and anything else just clear them or return
 > EINVAL if the buffers are not allocated.
 >
 > If it is '1' and the buffers are already allocated, then clear them too.
 >
 > Then we can just remove the 'snapshot_allocate' file.
 >
 > Thanks!
 >
 > -- Steve
 >
 >> +
 >> +    if (!current_trace->allocated_snapshot) {
 >> +        /* allocate spare buffer for snapshot */
 >> +        ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
 >> +                           RING_BUFFER_ALL_CPUS);
 >> +        if (ret < 0)
 >> +            goto out;
 >> +        current_trace->allocated_snapshot = true;
 >> +    }
 >> +
 >> +    if (val) {
 >> +        local_irq_disable();
 >> +        update_max_tr(&global_trace, current, smp_processor_id());
 >> +        local_irq_enable();
 >> +    } else
 >> +        tracing_reset_online_cpus(&max_tr);
 >> +
 >> +    *ppos += cnt;
 >> +    ret = cnt;
 >> +out:
 >> +    mutex_unlock(&trace_types_lock);
 >> +    return ret;
 >> +}
 >> +
 >> +static ssize_t
 >> +tracing_snapshot_allocate_read(struct file *filp, char __user *ubuf,
 >> +                   size_t cnt, loff_t *ppos)
 >> +{
 >> +    char buf[64];
 >> +    int r;
 >> +
 >> +    r = sprintf(buf, "%d\n", current_trace->allocated_snapshot);
 >> +    return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 >> +}
 >> +
 >> +static ssize_t
 >> +tracing_snapshot_allocate_write(struct file *filp, const char
__user *ubuf,
 >> +                size_t cnt, loff_t *ppos)
 >> +{
 >> +    unsigned long val;
 >> +    int ret;
 >> +
 >> +    ret = tracing_update_buffers();
 >> +    if (ret < 0)
 >> +        return ret;
 >> +
 >> +    ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
 >> +    if (ret)
 >> +        return ret;
 >> +
 >> +    mutex_lock(&trace_types_lock);
 >> +
 >> +    if (current_trace->use_max_tr) {
 >> +        ret = -EBUSY;
 >> +        goto out;
 >> +    }
 >> +
 >> +    if (current_trace->allocated_snapshot == !val) {
 >> +        if (val) {
 >> +            /* allocate spare buffer for snapshot */
 >> +            ret = resize_buffer_duplicate_size(&max_tr,
 >> +                    &global_trace, RING_BUFFER_ALL_CPUS);
 >> +            if (ret < 0)
 >> +                goto out;
 >> +        } else {
 >> +            tracing_reset_online_cpus(&max_tr);
 >> +            ring_buffer_resize(max_tr.buffer, 1,
 >> +                       RING_BUFFER_ALL_CPUS);
 >> +            set_buffer_entries(&max_tr, 1);
 >> +        }
 >> +        current_trace->allocated_snapshot = !!val;
 >> +    }
 >> +
 >> +    *ppos += cnt;
 >> +    ret = cnt;
 >> +out:
 >> +    mutex_unlock(&trace_types_lock);
 >> +    return ret;
 >> +}
 >> +#endif /* CONFIG_TRACER_SNAPSHOT */
 >> +
 >>  static const struct file_operations tracing_max_lat_fops = {
 >>      .open        = tracing_open_generic,
 >>      .read        = tracing_max_lat_read,
 >> @@ -4092,6 +4233,23 @@ static const struct file_operations
trace_clock_fops = {
 >>      .write        = tracing_clock_write,
 >>  };
 >>
 >> +#ifdef CONFIG_TRACER_SNAPSHOT
 >> +static const struct file_operations snapshot_fops = {
 >> +    .open        = tracing_snapshot_open,
 >> +    .read        = tracing_snapshot_read,
 >> +    .write        = tracing_snapshot_write,
 >> +    .llseek        = tracing_seek,
 >> +    .release    = tracing_release,
 >> +};
 >> +
 >> +static const struct file_operations snapshot_allocate_fops = {
 >> +    .open           = tracing_open_generic,
 >> +    .read           = tracing_snapshot_allocate_read,
 >> +    .write          = tracing_snapshot_allocate_write,
 >> +    .llseek         = generic_file_llseek,
 >> +};
 >> +#endif /* CONFIG_TRACER_SNAPSHOT */
 >> +
 >>  struct ftrace_buffer_info {
 >>      struct trace_array    *tr;
 >>      void            *spare;
 >> @@ -4878,6 +5036,14 @@ static __init int tracer_init_debugfs(void)
 >>              &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 >>  #endif
 >>
 >> +#ifdef CONFIG_TRACER_SNAPSHOT
 >> +    trace_create_file("snapshot", 0644, d_tracer,
 >> +              (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
 >> +
 >> +    trace_create_file("snapshot_allocate", 0644, d_tracer,
 >> +              NULL, &snapshot_allocate_fops);
 >> +#endif
 >> +
 >>      create_trace_options_dir();
 >>
 >>      for_each_tracing_cpu(cpu)
 >> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
 >> index 0eb6a1a..66a8631 100644
 >> --- a/kernel/trace/trace.h
 >> +++ b/kernel/trace/trace.h
 >> @@ -287,6 +287,7 @@ struct tracer {
 >>      struct tracer_flags    *flags;
 >>      bool            print_max;
 >>      bool            use_max_tr;
 >> +    bool            allocated_snapshot;
 >>  };
 >>
 >>
 >
 >



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-11-30  5:46     ` Hiraku Toyooka
@ 2012-11-30 14:17       ` Steven Rostedt
  2012-12-07  2:07         ` Hiraku Toyooka
  0 siblings, 1 reply; 13+ messages in thread
From: Steven Rostedt @ 2012-11-30 14:17 UTC (permalink / raw)
  To: Hiraku Toyooka
  Cc: yrl.pp-manager.tt, linux-kernel, Frederic Weisbecker,
	Ingo Molnar, Jiri Olsa, Li Zefan

On Fri, 2012-11-30 at 14:46 +0900, Hiraku Toyooka wrote:
> Hi, Steven,
> 
> Thank you for your review.
> 
> (2012/11/16 10:46), Steven Rostedt wrote:
> [snip]
>  > I was thinking about this some more, and I don't like the
>  > snapshot_allocate part. Also, I think the snapshot should not be
>  > allocated by default, and not used until the user explicitly asks for
>  > it.
>  >
>  > Have this:
>  >
>  > ---
>  >  # cat snapshot
>  > Snapshot is not allocated. To allocate it write the ASCII "1" into
>  > this file:
>  >
>  >   echo 1 > snapshot
>  >
>  > This will allocate the buffer for you. To free the snapshot echo "0"
>  > into this file.
>  >
>  >   echo "0" > snapshot
>  >
>  > Anything else will reset the snapshot if it is allocated, or return
>  > EINVAL if it is not allocated.
>  > ---
>  >
> 
> Your idea about "snapshot" is like following table, isn't it?
> 
>   status\input |     0      |     1      |    else    |
> --------------+------------+------------+------------+
> not allocated |   EINVAL   | alloc+swap |   EINVAL   |
> --------------+------------+------------+------------+
>    allocated   |    free    | clear+swap |   clear    |
> --------------+------------+------------+------------+

Actually, I would have:

  status\input |     0      |     1      |    else    |
 --------------+------------+------------+------------+
 not allocated |(do nothing)| alloc+swap |   EINVAL   |
 --------------+------------+------------+------------+
   allocated   |    free    |   swap     |   clear    |
 --------------+------------+------------+------------+

Perhaps we don't need to do the clear on swap, just let the trace
continue where it left off? But in case we should swap...

There's a fast way to clear the tracer. Look at what the wakeup tracer
does. We can make that generic. If you want, I can write that code up
too. Hmm, maybe I'll do that, as it will speed things up for
everyone :-)


> 
> I think it is almost OK, but there is a problem.
> When we echo "1" to the allocated snapshot, the clear operation adds
> some delay because the time cost of tracing_reset_online_cpus() is in
> proportion to the number of CPUs.
> (It takes 72ms in my 8 CPU environment.)
> 
> So, when the snapshot is already cleared by echoing "else" values, we
> can avoid the delay on echoing "1" by keeping "cleared" status
> internally. For example, we can add the "cleared" flag to struct tracer.
> What do you think about it?
> 
>  >
>  > Also we can add a "trace_snapshot" to the kernel parameters to have it
>  > allocated on boot. But I can add that if you update these patches.
>  >
> 
> OK, I'll update my patches.

This part (kernel parameter) can be a separate patch.


>  > Either test here, or better yet, put the test into
>  > tracing_reset_online_cpus().
>  >
>  >     if (!buffer)
>  >         return;
>  >
> 
> I see. I'll add the test to tracing_reset_online_cpus(). Should I make a
> separated patch?

It's a small change, you can add it to your patch or make it separate.
I'll leave that up to you.

> 
> [snip]
>  >> +static ssize_t tracing_snapshot_read(struct file *filp, char __user 
> *ubuf,
>  >> +                     size_t cnt, loff_t *ppos)
>  >> +{
>  >> +    ssize_t ret = 0;
>  >> +
>  >> +    mutex_lock(&trace_types_lock);
>  >> +    if (current_trace && current_trace->use_max_tr)
>  >> +        ret = -EBUSY;
>  >> +    mutex_unlock(&trace_types_lock);
>  >
>  > I don't like this, as it is racy. The current tracer could change after
>  > the unlock, and your back to the problem.
>  >
> 
> You're right...
> This is racy.
> 
>  > Now what we may be able to do, but it would take a little checking for
>  > lock ordering with trace_access_lock() and trace_event_read_lock(), but
>  > we could add the mutex to trace_types_lock to both s_start() and
>  > s_stop() and do the check in s_start() if iter->snapshot is true.
>  >
> 
> If I catch your meaning, do s_start() and s_stop() become like following
> code?
> (Now, s_start() is used from two files - "trace" and "snapshot", so we
> should change static "old_tracer" to per open-file.)

Actually, lets nuke all the old_tracer totally, and instead do:

if (unlikely(strcmp(iter->trace->name, current_trace->name) != 0)) {

You can make this into a separate patch. You can add a check if
current_trace is not NULL too, but I need to fix that, as current_trace
should never be NULL (except for very early boot). But don't worry about
that, I'll handle that.

Or I can write up this patch and send it to you, and you can include it
in your series.

> 
> static void *s_start(struct seq_file *m, loff_t *pos)
> {
>       struct trace_iterator *iter = m->private;
> -    static struct tracer *old_tracer;
> ...
>       /* copy the tracer to avoid using a global lock all around */
>       mutex_lock(&trace_types_lock);
> -    if (unlikely(old_tracer != current_trace && current_trace)) {
> -        old_tracer = current_trace;
> +    if (unlikely(iter->old_tracer != current_trace && current_trace)) {
> +        iter->old_tracer = current_trace;
>           *iter->trace = *current_trace;
>       }
>       mutex_unlock(&trace_types_lock);
> 
> +    if (iter->snapshot && iter->trace->use_max_tr)
> +        return ERR_PTR(-EBUSY);
> +
> ...
> }
> 
> static void s_stop(struct seq_file *m, void *p)
> {
>       struct trace_iterator *iter = m->private;
> 
> +    if (iter->snapshot && iter->trace->use_max_tr)
> +        return;

This part shouldn't be needed, as if s_start fails it wont call
s_stop(). But if you are paranoid (like I am ;-) then we can do:

	if (WARN_ON_ONCE(iter->snapshot && iter->trace->use_max_tr)
		return;



-- Steve



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-11-30 14:17       ` Steven Rostedt
@ 2012-12-07  2:07         ` Hiraku Toyooka
  2012-12-08  1:00           ` Steven Rostedt
  2012-12-14 17:08           ` Steven Rostedt
  0 siblings, 2 replies; 13+ messages in thread
From: Hiraku Toyooka @ 2012-12-07  2:07 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: yrl.pp-manager.tt, linux-kernel, Frederic Weisbecker,
	Ingo Molnar, Jiri Olsa, Li Zefan

Hi, Steven,

(2012/11/30 23:17), Steven Rostedt wrote:
[snip]
 >
 > Actually, I would have:
 >
 >   status\input |     0      |     1      |    else    |
 >  --------------+------------+------------+------------+
 >  not allocated |(do nothing)| alloc+swap |   EINVAL   |
 >  --------------+------------+------------+------------+
 >    allocated   |    free    |   swap     |   clear    |
 >  --------------+------------+------------+------------+
 >
 > Perhaps we don't need to do the clear on swap, just let the trace
 > continue where it left off? But in case we should swap...
 >

I think we don't need the clear on swap too.
I'll update my patches like this table.

 > There's a fast way to clear the tracer. Look at what the wakeup tracer
 > does. We can make that generic. If you want, I can write that code up
 > too. Hmm, maybe I'll do that, as it will speed things up for
 > everyone :-)
 >

(I looked over the wakeup tracer, but I couldn't find that code...)

 >
 >> I think it is almost OK, but there is a problem.
 >> When we echo "1" to the allocated snapshot, the clear operation adds
 >> some delay because the time cost of tracing_reset_online_cpus() is in
 >> proportion to the number of CPUs.
 >> (It takes 72ms in my 8 CPU environment.)
 >>
 >> So, when the snapshot is already cleared by echoing "else" values, we
 >> can avoid the delay on echoing "1" by keeping "cleared" status
 >> internally. For example, we can add the "cleared" flag to struct tracer.
 >> What do you think about it?
 >>
 >>  >
 >>  > Also we can add a "trace_snapshot" to the kernel parameters to 
have it
 >>  > allocated on boot. But I can add that if you update these patches.
 >>  >
 >>
 >> OK, I'll update my patches.
 >
 > This part (kernel parameter) can be a separate patch.
 >

Yes.

 >
 >>  > Either test here, or better yet, put the test into
 >>  > tracing_reset_online_cpus().
 >>  >
 >>  >     if (!buffer)
 >>  >         return;
 >>  >
 >>
 >> I see. I'll add the test to tracing_reset_online_cpus(). Should I make a
 >> separated patch?
 >
 > It's a small change, you can add it to your patch or make it separate.
 > I'll leave that up to you.
 >

I see. Perhaps I'll make it separate.

 >> [snip]
 >>  >> +static ssize_t tracing_snapshot_read(struct file *filp, char 
__user *ubuf,
 >>  >> +                     size_t cnt, loff_t *ppos)
 >>  >> +{
 >>  >> +    ssize_t ret = 0;
 >>  >> +
 >>  >> +    mutex_lock(&trace_types_lock);
 >>  >> +    if (current_trace && current_trace->use_max_tr)
 >>  >> +        ret = -EBUSY;
 >>  >> +    mutex_unlock(&trace_types_lock);
 >>  >
 >>  > I don't like this, as it is racy. The current tracer could change 
after
 >>  > the unlock, and your back to the problem.
 >>  >
 >>
 >> You're right...
 >> This is racy.
 >>
 >>  > Now what we may be able to do, but it would take a little 
checking for
 >>  > lock ordering with trace_access_lock() and 
trace_event_read_lock(), but
 >>  > we could add the mutex to trace_types_lock to both s_start() and
 >>  > s_stop() and do the check in s_start() if iter->snapshot is true.
 >>  >
 >>
 >> If I catch your meaning, do s_start() and s_stop() become like following
 >> code?
 >> (Now, s_start() is used from two files - "trace" and "snapshot", so we
 >> should change static "old_tracer" to per open-file.)
 >
 > Actually, lets nuke all the old_tracer totally, and instead do:
 >
 > if (unlikely(strcmp(iter->trace->name, current_trace->name) != 0)) {
 >
 > You can make this into a separate patch. You can add a check if
 > current_trace is not NULL too, but I need to fix that, as current_trace
 > should never be NULL (except for very early boot). But don't worry about
 > that, I'll handle that.
 >

O.K. I'll change all the old_tracer checking to the strcmp.

 > Or I can write up this patch and send it to you, and you can include it
 > in your series.
 >
 >> static void *s_start(struct seq_file *m, loff_t *pos)
 >> {
 >>       struct trace_iterator *iter = m->private;
 >> -    static struct tracer *old_tracer;
 >> ...
 >>       /* copy the tracer to avoid using a global lock all around */
 >>       mutex_lock(&trace_types_lock);
 >> -    if (unlikely(old_tracer != current_trace && current_trace)) {
 >> -        old_tracer = current_trace;
 >> +    if (unlikely(iter->old_tracer != current_trace && current_trace)) {
 >> +        iter->old_tracer = current_trace;
 >>           *iter->trace = *current_trace;
 >>       }
 >>       mutex_unlock(&trace_types_lock);
 >>
 >> +    if (iter->snapshot && iter->trace->use_max_tr)
 >> +        return ERR_PTR(-EBUSY);
 >> +
 >> ...
 >> }
 >>
 >> static void s_stop(struct seq_file *m, void *p)
 >> {
 >>       struct trace_iterator *iter = m->private;
 >>
 >> +    if (iter->snapshot && iter->trace->use_max_tr)
 >> +        return;
 >
 > This part shouldn't be needed, as if s_start fails it wont call
 > s_stop(). But if you are paranoid (like I am ;-) then we can do:
 >
 >     if (WARN_ON_ONCE(iter->snapshot && iter->trace->use_max_tr)
 >         return;

I think that seq_read() calls s_stop() even if s_start() failed.

seq_read()@fs/seq_file.c:

         p = m->op->start(m, &pos);
         while (1) {
                 err = PTR_ERR(p);
                 if (!p || IS_ERR(p))
                         break;
                 ...
         }
         m->op->stop(m, p);

So, I think we need the check in s_stop(), don't we?


Thanks,
Hiraku Toyooka

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-12-07  2:07         ` Hiraku Toyooka
@ 2012-12-08  1:00           ` Steven Rostedt
  2012-12-14 17:08           ` Steven Rostedt
  1 sibling, 0 replies; 13+ messages in thread
From: Steven Rostedt @ 2012-12-08  1:00 UTC (permalink / raw)
  To: Hiraku Toyooka
  Cc: yrl.pp-manager.tt, linux-kernel, Frederic Weisbecker,
	Ingo Molnar, Jiri Olsa, Li Zefan

On Fri, 2012-12-07 at 11:07 +0900, Hiraku Toyooka wrote:
> Hi, Steven,
> 
> (2012/11/30 23:17), Steven Rostedt wrote:
> [snip]
>  >
>  > Actually, I would have:
>  >
>  >   status\input |     0      |     1      |    else    |
>  >  --------------+------------+------------+------------+
>  >  not allocated |(do nothing)| alloc+swap |   EINVAL   |
>  >  --------------+------------+------------+------------+
>  >    allocated   |    free    |   swap     |   clear    |
>  >  --------------+------------+------------+------------+
>  >
>  > Perhaps we don't need to do the clear on swap, just let the trace
>  > continue where it left off? But in case we should swap...
>  >
> 
> I think we don't need the clear on swap too.
> I'll update my patches like this table.
> 
>  > There's a fast way to clear the tracer. Look at what the wakeup tracer
>  > does. We can make that generic. If you want, I can write that code up
>  > too. Hmm, maybe I'll do that, as it will speed things up for
>  > everyone :-)
>  >
> 
> (I looked over the wakeup tracer, but I couldn't find that code...)

Heh, sorry, you needed to look at the "update_max_tr()" in
kernel/trace/trace.c. Where we update the time_start value. Then the
output skips all timestamps before that start. This is much more
efficient than a 'reset', as we don't need to sync or anything. Just
record the timestamp of where we want to consider the buffer started,
and ignore any event before that.


> I think that seq_read() calls s_stop() even if s_start() failed.
> 
> seq_read()@fs/seq_file.c:
> 
>          p = m->op->start(m, &pos);
>          while (1) {
>                  err = PTR_ERR(p);
>                  if (!p || IS_ERR(p))
>                          break;
>                  ...
>          }
>          m->op->stop(m, p);
> 
> So, I think we need the check in s_stop(), don't we?

Crap, you're right. Hmm, why was I thinking that it didn't. I better go
and review some of my recent code to make sure that I didn't have that
wrong assumption.

-- Steve



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-12-07  2:07         ` Hiraku Toyooka
  2012-12-08  1:00           ` Steven Rostedt
@ 2012-12-14 17:08           ` Steven Rostedt
  2012-12-17 10:50             ` Hiraku Toyooka
  1 sibling, 1 reply; 13+ messages in thread
From: Steven Rostedt @ 2012-12-14 17:08 UTC (permalink / raw)
  To: Hiraku Toyooka
  Cc: yrl.pp-manager.tt, linux-kernel, Frederic Weisbecker,
	Ingo Molnar, Jiri Olsa, Li Zefan

On Fri, 2012-12-07 at 11:07 +0900, Hiraku Toyooka wrote:
> Hi, Steven,
> 
> (2012/11/30 23:17), Steven Rostedt wrote:
> [snip]
>  >
>  > Actually, I would have:
>  >
>  >   status\input |     0      |     1      |    else    |
>  >  --------------+------------+------------+------------+
>  >  not allocated |(do nothing)| alloc+swap |   EINVAL   |
>  >  --------------+------------+------------+------------+
>  >    allocated   |    free    |   swap     |   clear    |
>  >  --------------+------------+------------+------------+
>  >
>  > Perhaps we don't need to do the clear on swap, just let the trace
>  > continue where it left off? But in case we should swap...
>  >
> 
> I think we don't need the clear on swap too.
> I'll update my patches like this table.
> 
>  > There's a fast way to clear the tracer. Look at what the wakeup tracer
>  > does. We can make that generic. If you want, I can write that code up
>  > too. Hmm, maybe I'll do that, as it will speed things up for
>  > everyone :-)
>  >
> 

BTW, any update on this? I really like to get this into 3.9.

Thanks!

-- Steve



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace
  2012-12-14 17:08           ` Steven Rostedt
@ 2012-12-17 10:50             ` Hiraku Toyooka
  0 siblings, 0 replies; 13+ messages in thread
From: Hiraku Toyooka @ 2012-12-17 10:50 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: yrl.pp-manager.tt, linux-kernel, Frederic Weisbecker,
	Ingo Molnar, Jiri Olsa, Li Zefan

(12/15/2012 02:08 AM), Steven Rostedt wrote:
> On Fri, 2012-12-07 at 11:07 +0900, Hiraku Toyooka wrote:
>> Hi, Steven,
>>
>> (2012/11/30 23:17), Steven Rostedt wrote:
>> [snip]
>>   >
>>   > Actually, I would have:
>>   >
>>   >   status\input |     0      |     1      |    else    |
>>   >  --------------+------------+------------+------------+
>>   >  not allocated |(do nothing)| alloc+swap |   EINVAL   |
>>   >  --------------+------------+------------+------------+
>>   >    allocated   |    free    |   swap     |   clear    |
>>   >  --------------+------------+------------+------------+
>>   >
>>   > Perhaps we don't need to do the clear on swap, just let the trace
>>   > continue where it left off? But in case we should swap...
>>   >
>>
>> I think we don't need the clear on swap too.
>> I'll update my patches like this table.
>>
>>   > There's a fast way to clear the tracer. Look at what the wakeup tracer
>>   > does. We can make that generic. If you want, I can write that code up
>>   > too. Hmm, maybe I'll do that, as it will speed things up for
>>   > everyone :-)
>>   >
>>
>
> BTW, any update on this? I really like to get this into 3.9.
>

I'm applying your review comment and rebasing now. So, I'll post the
updated patch series in a few days.

Thanks,
Hiraku Toyooka


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-12-17 10:52 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-17  2:56 [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
2012-10-17  2:56 ` [PATCH v2 -tip 1/4] tracing: change tracer's integer flags to bool Hiraku Toyooka
2012-10-17  2:56 ` [PATCH v2 -tip 2/4] tracing: add a resize function for making one buffer equivalent to the other buffer Hiraku Toyooka
2012-10-17  2:56 ` [PATCH v2 -tip 3/4] tracing: make a snapshot feature available from userspace Hiraku Toyooka
2012-11-16  1:46   ` Steven Rostedt
2012-11-30  5:46     ` Hiraku Toyooka
2012-11-30 14:17       ` Steven Rostedt
2012-12-07  2:07         ` Hiraku Toyooka
2012-12-08  1:00           ` Steven Rostedt
2012-12-14 17:08           ` Steven Rostedt
2012-12-17 10:50             ` Hiraku Toyooka
2012-10-17  2:56 ` [PATCH v2 -tip 4/4] tracing: add description of snapshot to Documentation/trace/ftrace.txt Hiraku Toyooka
2012-11-16  0:16 ` [PATCH v2 -tip 0/4] tracing: make a snapshot feature available from userspace Steven Rostedt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.