linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] new irq tracer
@ 2009-02-18 19:53 Jason Baron
  2009-02-18 20:08 ` Steven Rostedt
                   ` (3 more replies)
  0 siblings, 4 replies; 48+ messages in thread
From: Jason Baron @ 2009-02-18 19:53 UTC (permalink / raw)
  To: mingo, rostedt; +Cc: linux-kernel, acme, fweisbec, fche, peterz, compudj

hi,

Using a copule of tracepoints i've implemented an 'irq tracer' which has
the following column format:

[time][irq][action][handled][cpu][length]

for example on my system:

[ 1038.927248381]     28     eth0      1        3     0.000002745
[ 1038.927544688]     28     eth0      1        3     0.000002520
[ 1038.927593609]     28     eth0      1        3     0.000002509
[ 1038.974087675]     29     ahci      1        1     0.000013135
[ 1038.974288475]     29     ahci      1        1     0.000005542
[ 1038.974414324]     29     ahci      1        1     0.000007953

I think this patch is useful in understanding irq system behavior, and
for tracking down irq driver handlers that are firing too often or
spending too much time handling an irq. This approach could be extended
to the timer irq and for softirqs...

thanks,

-Jason

Signed-off-by: Jason Baron <jbaron@redhat.com>


---

 include/trace/irq.h      |   24 +++++++
 kernel/irq/handle.c      |    6 ++
 kernel/trace/Kconfig     |    7 ++
 kernel/trace/Makefile    |    1 
 kernel/trace/trace.h     |    8 ++
 kernel/trace/trace_irq.c |  151 ++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 197 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/irq.h
 create mode 100644 kernel/trace/trace_irq.c


diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644
index 0000000..e4669c2
--- /dev/null
+++ b/include/trace/irq.h
@@ -0,0 +1,24 @@
+#ifndef _TRACE_IRQ_H
+#define _TRACE_IRQ_H
+
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+struct irq_trace {
+	int retval;
+	int irq;
+	const char *name;
+	int cpu;
+	int handled;
+	struct timespec duration;
+	struct timespec start_time;
+};
+
+DECLARE_TRACE(irq_entry,
+	TPPROTO(unsigned int id),
+	TPARGS(id));
+DECLARE_TRACE(irq_exit,
+	TPPROTO(unsigned int id, irqreturn_t retval),
+	TPARGS(id, retval));
+
+#endif /* _TRACE_IRQ_H */
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fdff380..34053b5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,6 +18,7 @@
 #include <linux/rculist.h>
 #include <linux/hash.h>
 #include <linux/bootmem.h>
+#include <trace/irq.h>
 
 #include "internals.h"
 
@@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
 	return IRQ_NONE;
 }
 
+DEFINE_TRACE(irq_entry);
+DEFINE_TRACE(irq_exit);
+
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
@@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 		local_irq_enable_in_hardirq();
 
 	do {
+		trace_irq_entry(irq);
 		ret = action->handler(irq, action->dev_id);
+		trace_irq_exit(irq, ret);
 		if (ret == IRQ_HANDLED)
 			status |= action->flags;
 		retval |= ret;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 620f9cd..f62165c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -239,6 +239,13 @@ config POWER_TRACER
 	  power management decisions, specifically the C-state and P-state
 	  behavior.
 
+config IRQ_TRACER
+	bool "Trace irq latencies"
+	depends on DEBUG_KERNEL
+	depends on X86
+	select TRACING
+	help
+	  This tracer helps developers anaylze irq latencies.
 
 config STACK_TRACER
 	bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 627090b..58b9f7e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
+obj-$(CONFIG_IRQ_TRACER) += trace_irq.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dbff020..6a34aaa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
 #include <trace/boot.h>
 #include <trace/kmemtrace.h>
 #include <trace/power.h>
+#include <trace/irq.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -33,6 +34,7 @@ enum trace_type {
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
 	TRACE_POWER,
+	TRACE_IRQ,
 	TRACE_BLK,
 
 	__TRACE_LAST_TYPE,
@@ -173,6 +175,11 @@ struct trace_power {
 	struct power_trace	state_data;
 };
 
+struct trace_irq {
+	struct trace_entry	ent;
+	struct irq_trace	irq_data;
+};
+
 struct kmemtrace_alloc_entry {
 	struct trace_entry	ent;
 	enum kmemtrace_type_id type_id;
@@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
  		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+		IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
diff --git a/kernel/trace/trace_irq.c b/kernel/trace/trace_irq.c
new file mode 100644
index 0000000..1808316
--- /dev/null
+++ b/kernel/trace/trace_irq.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2009 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <trace/irq.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/ktime.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+static struct trace_array *irq_trace;
+static int __read_mostly trace_irq_enabled;
+static ktime_t start_times[NR_IRQS];
+
+static void probe_irq_entry(unsigned int irq)
+{
+	if (!trace_irq_enabled)
+		return;
+	start_times[irq] = ktime_get();
+}
+
+static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
+{
+	struct ring_buffer_event *event;
+	struct trace_irq *entry;
+	struct trace_array *tr = irq_trace;
+	struct irq_desc *desc;
+	ktime_t end_time;
+
+	if (!trace_irq_enabled)
+		return;
+	end_time = ktime_get();
+	desc = irq_to_desc(irq);
+	event = trace_buffer_lock_reserve(tr, TRACE_IRQ,
+					  sizeof(*entry), 0, 0);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	entry->irq_data.retval = retval;
+	entry->irq_data.name = desc->action->name;
+	entry->irq_data.irq = irq;
+	entry->irq_data.cpu = raw_smp_processor_id();
+	if (retval == IRQ_HANDLED)
+		entry->irq_data.handled = 1;
+	else
+		entry->irq_data.handled = 0;
+	entry->irq_data.duration =
+		ktime_to_timespec(ktime_sub(end_time, start_times[irq]));
+	entry->irq_data.start_time = ktime_to_timespec(start_times[irq]);
+	trace_buffer_unlock_commit(tr, event, 0, 0);
+ out:
+	preempt_enable();
+}
+
+static int tracing_irq_register(void)
+{
+	int ret;
+
+	ret = register_trace_irq_entry(probe_irq_entry);
+	if (ret) {
+		pr_info("irq trace: irq entry tracepoint failed to register\n");
+		return ret;
+	}
+	ret = register_trace_irq_exit(probe_irq_exit);
+	if (ret) {
+		pr_info("irq trace: irq exit tracepoint failed to register\n");
+		goto fail_exit;
+	}
+	return ret;
+fail_exit:
+	unregister_trace_irq_entry(probe_irq_entry);
+	return ret;
+}
+
+static void start_irq_trace(struct trace_array *tr)
+{
+	trace_irq_enabled = 1;
+	tracing_irq_register();
+}
+
+static void stop_irq_trace(struct trace_array *tr)
+{
+	trace_irq_enabled = 0;
+	unregister_trace_irq_entry(probe_irq_entry);
+	unregister_trace_irq_exit(probe_irq_exit);
+}
+
+
+static int irq_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	irq_trace = tr;
+
+	trace_irq_enabled = 1;
+	tracing_irq_register();
+	for_each_cpu(cpu, cpu_possible_mask)
+		tracing_reset(tr, cpu);
+	return 0;
+}
+
+static enum print_line_t irq_print_line(struct trace_iterator *iter)
+{
+	int ret = 0;
+	struct trace_entry *entry = iter->ent;
+	struct trace_irq *field;
+	struct irq_trace *it;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, entry);
+	it = &field->irq_data;
+	if (entry->type == TRACE_IRQ) {
+		ret = trace_seq_printf(s, "[%5ld.%09ld]     %d     %s      %d        %d     %ld.%09ld\n",
+					it->start_time.tv_sec,
+					it->start_time.tv_nsec,
+					it->irq, it->name, it->handled, it->cpu,
+					it->duration.tv_sec,
+					it->duration.tv_nsec);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		return TRACE_TYPE_HANDLED;
+	}
+	return TRACE_TYPE_UNHANDLED;
+}
+
+static void irq_print_headers(struct seq_file *s)
+{
+	seq_printf(s, "      [time]         [irq] [action] [handled] [cpu] [length]\n");
+	seq_printf(s, "         |             |      |        |        |       |   \n\n");
+}
+
+static struct tracer irq_tracer __read_mostly =
+{
+	.name		= "irq",
+	.init		= irq_trace_init,
+	.start		= start_irq_trace,
+	.stop		= stop_irq_trace,
+	.reset		= stop_irq_trace,
+	.print_header	= irq_print_headers,
+	.print_line	= irq_print_line,
+};
+
+static int init_irq_trace(void)
+{
+	return register_tracer(&irq_tracer);
+}
+device_initcall(init_irq_trace);


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 19:53 [PATCH] new irq tracer Jason Baron
@ 2009-02-18 20:08 ` Steven Rostedt
  2009-02-18 20:26 ` Mathieu Desnoyers
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 48+ messages in thread
From: Steven Rostedt @ 2009-02-18 20:08 UTC (permalink / raw)
  To: Jason Baron; +Cc: mingo, linux-kernel, acme, fweisbec, fche, peterz, compudj


On Wed, 18 Feb 2009, Jason Baron wrote:
> hi,
> 
> Using a copule of tracepoints i've implemented an 'irq tracer' which has
> the following column format:
> 
> [time][irq][action][handled][cpu][length]
> 
> for example on my system:
> 
> [ 1038.927248381]     28     eth0      1        3     0.000002745
> [ 1038.927544688]     28     eth0      1        3     0.000002520
> [ 1038.927593609]     28     eth0      1        3     0.000002509
> [ 1038.974087675]     29     ahci      1        1     0.000013135
> [ 1038.974288475]     29     ahci      1        1     0.000005542
> [ 1038.974414324]     29     ahci      1        1     0.000007953
> 
> I think this patch is useful in understanding irq system behavior, and
> for tracking down irq driver handlers that are firing too often or
> spending too much time handling an irq. This approach could be extended
> to the timer irq and for softirqs...
> 
> thanks,
> 
> -Jason
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>
> 
> 
> ---
> 
>  include/trace/irq.h      |   24 +++++++
>  kernel/irq/handle.c      |    6 ++
>  kernel/trace/Kconfig     |    7 ++
>  kernel/trace/Makefile    |    1 
>  kernel/trace/trace.h     |    8 ++
>  kernel/trace/trace_irq.c |  151 ++++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 197 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/irq.h
>  create mode 100644 kernel/trace/trace_irq.c
> 
> 
> diff --git a/include/trace/irq.h b/include/trace/irq.h
> new file mode 100644
> index 0000000..e4669c2
> --- /dev/null
> +++ b/include/trace/irq.h
> @@ -0,0 +1,24 @@
> +#ifndef _TRACE_IRQ_H
> +#define _TRACE_IRQ_H
> +
> +#include <linux/interrupt.h>
> +#include <linux/tracepoint.h>
> +
> +struct irq_trace {
> +	int retval;
> +	int irq;
> +	const char *name;
> +	int cpu;

You can get rid of "cpu" and probably even name.

> +	int handled;
> +	struct timespec duration;
> +	struct timespec start_time;
> +};
> +
> +DECLARE_TRACE(irq_entry,
> +	TPPROTO(unsigned int id),
> +	TPARGS(id));
> +DECLARE_TRACE(irq_exit,
> +	TPPROTO(unsigned int id, irqreturn_t retval),
> +	TPARGS(id, retval));
> +
> +#endif /* _TRACE_IRQ_H */
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index fdff380..34053b5 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -18,6 +18,7 @@
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
>  #include <linux/bootmem.h>
> +#include <trace/irq.h>
>  
>  #include "internals.h"
>  
> @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
>  	return IRQ_NONE;
>  }
>  
> +DEFINE_TRACE(irq_entry);
> +DEFINE_TRACE(irq_exit);
> +
>  /**
>   * handle_IRQ_event - irq action chain handler
>   * @irq:	the interrupt number
> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>  		local_irq_enable_in_hardirq();
>  
>  	do {
> +		trace_irq_entry(irq);
>  		ret = action->handler(irq, action->dev_id);
> +		trace_irq_exit(irq, ret);
>  		if (ret == IRQ_HANDLED)
>  			status |= action->flags;
>  		retval |= ret;
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 620f9cd..f62165c 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -239,6 +239,13 @@ config POWER_TRACER
>  	  power management decisions, specifically the C-state and P-state
>  	  behavior.
>  
> +config IRQ_TRACER
> +	bool "Trace irq latencies"
> +	depends on DEBUG_KERNEL
> +	depends on X86
> +	select TRACING
> +	help
> +	  This tracer helps developers anaylze irq latencies.
>  
>  config STACK_TRACER
>  	bool "Trace max stack"
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 627090b..58b9f7e 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
>  obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
>  obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
>  obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
> +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o
>  
>  libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index dbff020..6a34aaa 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -11,6 +11,7 @@
>  #include <trace/boot.h>
>  #include <trace/kmemtrace.h>
>  #include <trace/power.h>
> +#include <trace/irq.h>
>  
>  enum trace_type {
>  	__TRACE_FIRST_TYPE = 0,
> @@ -33,6 +34,7 @@ enum trace_type {
>  	TRACE_KMEM_ALLOC,
>  	TRACE_KMEM_FREE,
>  	TRACE_POWER,
> +	TRACE_IRQ,
>  	TRACE_BLK,
>  
>  	__TRACE_LAST_TYPE,
> @@ -173,6 +175,11 @@ struct trace_power {
>  	struct power_trace	state_data;
>  };
>  
> +struct trace_irq {
> +	struct trace_entry	ent;
> +	struct irq_trace	irq_data;
> +};
> +
>  struct kmemtrace_alloc_entry {
>  	struct trace_entry	ent;
>  	enum kmemtrace_type_id type_id;
> @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
>  			  TRACE_GRAPH_RET);		\
>  		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
>   		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
> +		IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
>  			  TRACE_KMEM_ALLOC);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
> diff --git a/kernel/trace/trace_irq.c b/kernel/trace/trace_irq.c
> new file mode 100644
> index 0000000..1808316
> --- /dev/null
> +++ b/kernel/trace/trace_irq.c
> @@ -0,0 +1,151 @@
> +/*
> + * Copyright (C) 2009 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
> + */
> +
> +#include <linux/init.h>
> +#include <linux/debugfs.h>
> +#include <trace/irq.h>
> +#include <linux/kallsyms.h>
> +#include <linux/module.h>
> +#include <linux/interrupt.h>
> +#include <linux/ktime.h>
> +
> +#include "trace.h"
> +#include "trace_output.h"
> +
> +static struct trace_array *irq_trace;
> +static int __read_mostly trace_irq_enabled;
> +static ktime_t start_times[NR_IRQS];
> +
> +static void probe_irq_entry(unsigned int irq)
> +{
> +	if (!trace_irq_enabled)
> +		return;
> +	start_times[irq] = ktime_get();
> +}
> +
> +static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
> +{
> +	struct ring_buffer_event *event;
> +	struct trace_irq *entry;
> +	struct trace_array *tr = irq_trace;
> +	struct irq_desc *desc;
> +	ktime_t end_time;
> +
> +	if (!trace_irq_enabled)
> +		return;
> +	end_time = ktime_get();
> +	desc = irq_to_desc(irq);
> +	event = trace_buffer_lock_reserve(tr, TRACE_IRQ,
> +					  sizeof(*entry), 0, 0);
> +	if (!event)
> +		goto out;
> +	entry	= ring_buffer_event_data(event);
> +	entry->irq_data.retval = retval;
> +	entry->irq_data.name = desc->action->name;

Can't you get the name information at output time?

> +	entry->irq_data.irq = irq;
> +	entry->irq_data.cpu = raw_smp_processor_id();

The CPU is held via which ring buffer is used. This information is passed 
to the tracer via iter->cpu.

> +	if (retval == IRQ_HANDLED)
> +		entry->irq_data.handled = 1;
> +	else
> +		entry->irq_data.handled = 0;

What about:

	entry->irq_data.handled = retval == IRQ_HANDLED;

> +	entry->irq_data.duration =
> +		ktime_to_timespec(ktime_sub(end_time, start_times[irq]));
> +	entry->irq_data.start_time = ktime_to_timespec(start_times[irq]);
> +	trace_buffer_unlock_commit(tr, event, 0, 0);
> + out:
> +	preempt_enable();

Where's the matching preempt_disable()? And since we are being called from
an interrupt handler, I doubt it is even needed.

> +}
> +
> +static int tracing_irq_register(void)
> +{
> +	int ret;
> +
> +	ret = register_trace_irq_entry(probe_irq_entry);
> +	if (ret) {
> +		pr_info("irq trace: irq entry tracepoint failed to register\n");
> +		return ret;
> +	}
> +	ret = register_trace_irq_exit(probe_irq_exit);
> +	if (ret) {
> +		pr_info("irq trace: irq exit tracepoint failed to register\n");
> +		goto fail_exit;
> +	}
> +	return ret;
> +fail_exit:
> +	unregister_trace_irq_entry(probe_irq_entry);
> +	return ret;
> +}
> +
> +static void start_irq_trace(struct trace_array *tr)
> +{
> +	trace_irq_enabled = 1;
> +	tracing_irq_register();
> +}
> +
> +static void stop_irq_trace(struct trace_array *tr)
> +{
> +	trace_irq_enabled = 0;
> +	unregister_trace_irq_entry(probe_irq_entry);
> +	unregister_trace_irq_exit(probe_irq_exit);
> +}
> +
> +
> +static int irq_trace_init(struct trace_array *tr)
> +{
> +	int cpu;
> +	irq_trace = tr;
> +
> +	trace_irq_enabled = 1;
> +	tracing_irq_register();
> +	for_each_cpu(cpu, cpu_possible_mask)
> +		tracing_reset(tr, cpu);
> +	return 0;
> +}
> +
> +static enum print_line_t irq_print_line(struct trace_iterator *iter)
> +{
> +	int ret = 0;
> +	struct trace_entry *entry = iter->ent;
> +	struct trace_irq *field;
> +	struct irq_trace *it;
> +	struct trace_seq *s = &iter->seq;
> +
> +	trace_assign_type(field, entry);
> +	it = &field->irq_data;
> +	if (entry->type == TRACE_IRQ) {
> +		ret = trace_seq_printf(s, "[%5ld.%09ld]     %d     %s      %d        %d     %ld.%09ld\n",
> +					it->start_time.tv_sec,
> +					it->start_time.tv_nsec,
> +					it->irq, it->name, it->handled, it->cpu,

iter->cpu is the same as it->cpu.

-- Steve

> +					it->duration.tv_sec,
> +					it->duration.tv_nsec);
> +		if (!ret)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +		return TRACE_TYPE_HANDLED;
> +	}
> +	return TRACE_TYPE_UNHANDLED;
> +}
> +
> +static void irq_print_headers(struct seq_file *s)
> +{
> +	seq_printf(s, "      [time]         [irq] [action] [handled] [cpu] [length]\n");
> +	seq_printf(s, "         |             |      |        |        |       |   \n\n");
> +}
> +
> +static struct tracer irq_tracer __read_mostly =
> +{
> +	.name		= "irq",
> +	.init		= irq_trace_init,
> +	.start		= start_irq_trace,
> +	.stop		= stop_irq_trace,
> +	.reset		= stop_irq_trace,
> +	.print_header	= irq_print_headers,
> +	.print_line	= irq_print_line,
> +};
> +
> +static int init_irq_trace(void)
> +{
> +	return register_tracer(&irq_tracer);
> +}
> +device_initcall(init_irq_trace);
> 
> 
> 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 19:53 [PATCH] new irq tracer Jason Baron
  2009-02-18 20:08 ` Steven Rostedt
@ 2009-02-18 20:26 ` Mathieu Desnoyers
  2009-02-19  1:42   ` KOSAKI Motohiro
  2009-02-18 20:30 ` Kyle McMartin
  2009-02-18 21:15 ` Peter Zijlstra
  3 siblings, 1 reply; 48+ messages in thread
From: Mathieu Desnoyers @ 2009-02-18 20:26 UTC (permalink / raw)
  To: Jason Baron; +Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, peterz

* Jason Baron (jbaron@redhat.com) wrote:
> hi,
> 
> Using a copule of tracepoints i've implemented an 'irq tracer' which has
> the following column format:
> 
> [time][irq][action][handled][cpu][length]
> 
> for example on my system:
> 
> [ 1038.927248381]     28     eth0      1        3     0.000002745
> [ 1038.927544688]     28     eth0      1        3     0.000002520
> [ 1038.927593609]     28     eth0      1        3     0.000002509
> [ 1038.974087675]     29     ahci      1        1     0.000013135
> [ 1038.974288475]     29     ahci      1        1     0.000005542
> [ 1038.974414324]     29     ahci      1        1     0.000007953
> 
> I think this patch is useful in understanding irq system behavior, and
> for tracking down irq driver handlers that are firing too often or
> spending too much time handling an irq. This approach could be extended
> to the timer irq and for softirqs...
> 
> thanks,
> 

Hi Jason,

If you are in the process of rewriting the LTTng instrumentation from
scratch, please have a look at the git tree first, at least as an
inspiration source, otherwise we will end up tring to merge two subtily
different sets of instrumentation into the mainline.

Please see :

http://git.kernel.org/?p=linux/kernel/git/compudj/linux-2.6-lttng.git

Thanks,

Mathieu

> -Jason
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>
> 
> 
> ---
> 
>  include/trace/irq.h      |   24 +++++++
>  kernel/irq/handle.c      |    6 ++
>  kernel/trace/Kconfig     |    7 ++
>  kernel/trace/Makefile    |    1 
>  kernel/trace/trace.h     |    8 ++
>  kernel/trace/trace_irq.c |  151 ++++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 197 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/irq.h
>  create mode 100644 kernel/trace/trace_irq.c
> 
> 
> diff --git a/include/trace/irq.h b/include/trace/irq.h
> new file mode 100644
> index 0000000..e4669c2
> --- /dev/null
> +++ b/include/trace/irq.h
> @@ -0,0 +1,24 @@
> +#ifndef _TRACE_IRQ_H
> +#define _TRACE_IRQ_H
> +
> +#include <linux/interrupt.h>
> +#include <linux/tracepoint.h>
> +
> +struct irq_trace {
> +	int retval;
> +	int irq;
> +	const char *name;
> +	int cpu;
> +	int handled;
> +	struct timespec duration;
> +	struct timespec start_time;
> +};
> +
> +DECLARE_TRACE(irq_entry,
> +	TPPROTO(unsigned int id),
> +	TPARGS(id));
> +DECLARE_TRACE(irq_exit,
> +	TPPROTO(unsigned int id, irqreturn_t retval),
> +	TPARGS(id, retval));
> +
> +#endif /* _TRACE_IRQ_H */
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index fdff380..34053b5 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -18,6 +18,7 @@
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
>  #include <linux/bootmem.h>
> +#include <trace/irq.h>
>  
>  #include "internals.h"
>  
> @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
>  	return IRQ_NONE;
>  }
>  
> +DEFINE_TRACE(irq_entry);
> +DEFINE_TRACE(irq_exit);
> +
>  /**
>   * handle_IRQ_event - irq action chain handler
>   * @irq:	the interrupt number
> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>  		local_irq_enable_in_hardirq();
>  
>  	do {
> +		trace_irq_entry(irq);
>  		ret = action->handler(irq, action->dev_id);
> +		trace_irq_exit(irq, ret);
>  		if (ret == IRQ_HANDLED)
>  			status |= action->flags;
>  		retval |= ret;
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 620f9cd..f62165c 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -239,6 +239,13 @@ config POWER_TRACER
>  	  power management decisions, specifically the C-state and P-state
>  	  behavior.
>  
> +config IRQ_TRACER
> +	bool "Trace irq latencies"
> +	depends on DEBUG_KERNEL
> +	depends on X86
> +	select TRACING
> +	help
> +	  This tracer helps developers anaylze irq latencies.
>  
>  config STACK_TRACER
>  	bool "Trace max stack"
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 627090b..58b9f7e 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
>  obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
>  obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
>  obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
> +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o
>  
>  libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index dbff020..6a34aaa 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -11,6 +11,7 @@
>  #include <trace/boot.h>
>  #include <trace/kmemtrace.h>
>  #include <trace/power.h>
> +#include <trace/irq.h>
>  
>  enum trace_type {
>  	__TRACE_FIRST_TYPE = 0,
> @@ -33,6 +34,7 @@ enum trace_type {
>  	TRACE_KMEM_ALLOC,
>  	TRACE_KMEM_FREE,
>  	TRACE_POWER,
> +	TRACE_IRQ,
>  	TRACE_BLK,
>  
>  	__TRACE_LAST_TYPE,
> @@ -173,6 +175,11 @@ struct trace_power {
>  	struct power_trace	state_data;
>  };
>  
> +struct trace_irq {
> +	struct trace_entry	ent;
> +	struct irq_trace	irq_data;
> +};
> +
>  struct kmemtrace_alloc_entry {
>  	struct trace_entry	ent;
>  	enum kmemtrace_type_id type_id;
> @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
>  			  TRACE_GRAPH_RET);		\
>  		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
>   		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
> +		IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
>  			  TRACE_KMEM_ALLOC);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
> diff --git a/kernel/trace/trace_irq.c b/kernel/trace/trace_irq.c
> new file mode 100644
> index 0000000..1808316
> --- /dev/null
> +++ b/kernel/trace/trace_irq.c
> @@ -0,0 +1,151 @@
> +/*
> + * Copyright (C) 2009 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
> + */
> +
> +#include <linux/init.h>
> +#include <linux/debugfs.h>
> +#include <trace/irq.h>
> +#include <linux/kallsyms.h>
> +#include <linux/module.h>
> +#include <linux/interrupt.h>
> +#include <linux/ktime.h>
> +
> +#include "trace.h"
> +#include "trace_output.h"
> +
> +static struct trace_array *irq_trace;
> +static int __read_mostly trace_irq_enabled;
> +static ktime_t start_times[NR_IRQS];
> +
> +static void probe_irq_entry(unsigned int irq)
> +{
> +	if (!trace_irq_enabled)
> +		return;
> +	start_times[irq] = ktime_get();
> +}
> +
> +static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
> +{
> +	struct ring_buffer_event *event;
> +	struct trace_irq *entry;
> +	struct trace_array *tr = irq_trace;
> +	struct irq_desc *desc;
> +	ktime_t end_time;
> +
> +	if (!trace_irq_enabled)
> +		return;
> +	end_time = ktime_get();
> +	desc = irq_to_desc(irq);
> +	event = trace_buffer_lock_reserve(tr, TRACE_IRQ,
> +					  sizeof(*entry), 0, 0);
> +	if (!event)
> +		goto out;
> +	entry	= ring_buffer_event_data(event);
> +	entry->irq_data.retval = retval;
> +	entry->irq_data.name = desc->action->name;
> +	entry->irq_data.irq = irq;
> +	entry->irq_data.cpu = raw_smp_processor_id();
> +	if (retval == IRQ_HANDLED)
> +		entry->irq_data.handled = 1;
> +	else
> +		entry->irq_data.handled = 0;
> +	entry->irq_data.duration =
> +		ktime_to_timespec(ktime_sub(end_time, start_times[irq]));
> +	entry->irq_data.start_time = ktime_to_timespec(start_times[irq]);
> +	trace_buffer_unlock_commit(tr, event, 0, 0);
> + out:
> +	preempt_enable();
> +}
> +
> +static int tracing_irq_register(void)
> +{
> +	int ret;
> +
> +	ret = register_trace_irq_entry(probe_irq_entry);
> +	if (ret) {
> +		pr_info("irq trace: irq entry tracepoint failed to register\n");
> +		return ret;
> +	}
> +	ret = register_trace_irq_exit(probe_irq_exit);
> +	if (ret) {
> +		pr_info("irq trace: irq exit tracepoint failed to register\n");
> +		goto fail_exit;
> +	}
> +	return ret;
> +fail_exit:
> +	unregister_trace_irq_entry(probe_irq_entry);
> +	return ret;
> +}
> +
> +static void start_irq_trace(struct trace_array *tr)
> +{
> +	trace_irq_enabled = 1;
> +	tracing_irq_register();
> +}
> +
> +static void stop_irq_trace(struct trace_array *tr)
> +{
> +	trace_irq_enabled = 0;
> +	unregister_trace_irq_entry(probe_irq_entry);
> +	unregister_trace_irq_exit(probe_irq_exit);
> +}
> +
> +
> +static int irq_trace_init(struct trace_array *tr)
> +{
> +	int cpu;
> +	irq_trace = tr;
> +
> +	trace_irq_enabled = 1;
> +	tracing_irq_register();
> +	for_each_cpu(cpu, cpu_possible_mask)
> +		tracing_reset(tr, cpu);
> +	return 0;
> +}
> +
> +static enum print_line_t irq_print_line(struct trace_iterator *iter)
> +{
> +	int ret = 0;
> +	struct trace_entry *entry = iter->ent;
> +	struct trace_irq *field;
> +	struct irq_trace *it;
> +	struct trace_seq *s = &iter->seq;
> +
> +	trace_assign_type(field, entry);
> +	it = &field->irq_data;
> +	if (entry->type == TRACE_IRQ) {
> +		ret = trace_seq_printf(s, "[%5ld.%09ld]     %d     %s      %d        %d     %ld.%09ld\n",
> +					it->start_time.tv_sec,
> +					it->start_time.tv_nsec,
> +					it->irq, it->name, it->handled, it->cpu,
> +					it->duration.tv_sec,
> +					it->duration.tv_nsec);
> +		if (!ret)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +		return TRACE_TYPE_HANDLED;
> +	}
> +	return TRACE_TYPE_UNHANDLED;
> +}
> +
> +static void irq_print_headers(struct seq_file *s)
> +{
> +	seq_printf(s, "      [time]         [irq] [action] [handled] [cpu] [length]\n");
> +	seq_printf(s, "         |             |      |        |        |       |   \n\n");
> +}
> +
> +static struct tracer irq_tracer __read_mostly =
> +{
> +	.name		= "irq",
> +	.init		= irq_trace_init,
> +	.start		= start_irq_trace,
> +	.stop		= stop_irq_trace,
> +	.reset		= stop_irq_trace,
> +	.print_header	= irq_print_headers,
> +	.print_line	= irq_print_line,
> +};
> +
> +static int init_irq_trace(void)
> +{
> +	return register_tracer(&irq_tracer);
> +}
> +device_initcall(init_irq_trace);
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 19:53 [PATCH] new irq tracer Jason Baron
  2009-02-18 20:08 ` Steven Rostedt
  2009-02-18 20:26 ` Mathieu Desnoyers
@ 2009-02-18 20:30 ` Kyle McMartin
  2009-02-18 21:15 ` Peter Zijlstra
  3 siblings, 0 replies; 48+ messages in thread
From: Kyle McMartin @ 2009-02-18 20:30 UTC (permalink / raw)
  To: Jason Baron
  Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, peterz, compudj

On Wed, Feb 18, 2009 at 02:53:28PM -0500, Jason Baron wrote:
> hi,
> 
> I think this patch is useful in understanding irq system behavior, and
> for tracking down irq driver handlers that are firing too often or
> spending too much time handling an irq. This approach could be extended
> to the timer irq and for softirqs...
> 

Very cool, I had written something similar a week or two ago to debug
something but never had gotten around to posting it. :)

Looks like it works just as well too. Glad to see it.

cheers, Kyle

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 19:53 [PATCH] new irq tracer Jason Baron
                   ` (2 preceding siblings ...)
  2009-02-18 20:30 ` Kyle McMartin
@ 2009-02-18 21:15 ` Peter Zijlstra
  2009-02-18 21:35   ` Jason Baron
  2009-02-19  1:46   ` Frederic Weisbecker
  3 siblings, 2 replies; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-18 21:15 UTC (permalink / raw)
  To: Jason Baron; +Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj

On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote:
> hi,
> 
> Using a copule of tracepoints i've implemented an 'irq tracer' which has
> the following column format:
> 
> [time][irq][action][handled][cpu][length]
> 
> for example on my system:
> 
> [ 1038.927248381]     28     eth0      1        3     0.000002745
> [ 1038.927544688]     28     eth0      1        3     0.000002520
> [ 1038.927593609]     28     eth0      1        3     0.000002509
> [ 1038.974087675]     29     ahci      1        1     0.000013135
> [ 1038.974288475]     29     ahci      1        1     0.000005542
> [ 1038.974414324]     29     ahci      1        1     0.000007953
> 
> I think this patch is useful in understanding irq system behavior, and
> for tracking down irq driver handlers that are firing too often or
> spending too much time handling an irq. This approach could be extended
> to the timer irq and for softirqs...

The function graph tracer can already do this, it has a special section
to recognize irq entry function, and already measures time spend.

I guess with Steve's new triggers (probes or whatever they ended up
being called) you could limit it to just IRQ entry points.




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 21:15 ` Peter Zijlstra
@ 2009-02-18 21:35   ` Jason Baron
  2009-02-18 21:46     ` Peter Zijlstra
  2009-02-19  1:46   ` Frederic Weisbecker
  1 sibling, 1 reply; 48+ messages in thread
From: Jason Baron @ 2009-02-18 21:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj

On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote:
> On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote:
> > hi,
> > 
> > Using a copule of tracepoints i've implemented an 'irq tracer' which has
> > the following column format:
> > 
> > [time][irq][action][handled][cpu][length]
> > 
> > for example on my system:
> > 
> > [ 1038.927248381]     28     eth0      1        3     0.000002745
> > [ 1038.927544688]     28     eth0      1        3     0.000002520
> > [ 1038.927593609]     28     eth0      1        3     0.000002509
> > [ 1038.974087675]     29     ahci      1        1     0.000013135
> > [ 1038.974288475]     29     ahci      1        1     0.000005542
> > [ 1038.974414324]     29     ahci      1        1     0.000007953
> > 
> > I think this patch is useful in understanding irq system behavior, and
> > for tracking down irq driver handlers that are firing too often or
> > spending too much time handling an irq. This approach could be extended
> > to the timer irq and for softirqs...
> 
> The function graph tracer can already do this, it has a special section
> to recognize irq entry function, and already measures time spend.
> 
> I guess with Steve's new triggers (probes or whatever they ended up
> being called) you could limit it to just IRQ entry points.
> 

i did look at the graph tracer first. While it does measure irq related
functions, it does not give this level of detail concerning which irq #,
which irq handler is involved, and whether or not the irq was handled
successfully or not. So I believe this tracepoints add a level of detail
that the graph tracer does not have. Furthermore, this patch requires 2
tracepoints, not instrumentation for all kernel functions.

thanks,

-Jason




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 21:35   ` Jason Baron
@ 2009-02-18 21:46     ` Peter Zijlstra
  2009-02-18 22:02       ` Frank Ch. Eigler
                         ` (2 more replies)
  0 siblings, 3 replies; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-18 21:46 UTC (permalink / raw)
  To: Jason Baron; +Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj

On Wed, 2009-02-18 at 16:35 -0500, Jason Baron wrote:
> On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote:
> > On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote:
> > > hi,
> > > 
> > > Using a copule of tracepoints i've implemented an 'irq tracer' which has
> > > the following column format:
> > > 
> > > [time][irq][action][handled][cpu][length]
> > > 
> > > for example on my system:
> > > 
> > > [ 1038.927248381]     28     eth0      1        3     0.000002745
> > > [ 1038.927544688]     28     eth0      1        3     0.000002520
> > > [ 1038.927593609]     28     eth0      1        3     0.000002509
> > > [ 1038.974087675]     29     ahci      1        1     0.000013135
> > > [ 1038.974288475]     29     ahci      1        1     0.000005542
> > > [ 1038.974414324]     29     ahci      1        1     0.000007953
> > > 
> > > I think this patch is useful in understanding irq system behavior, and
> > > for tracking down irq driver handlers that are firing too often or
> > > spending too much time handling an irq. This approach could be extended
> > > to the timer irq and for softirqs...
> > 
> > The function graph tracer can already do this, it has a special section
> > to recognize irq entry function, and already measures time spend.
> > 
> > I guess with Steve's new triggers (probes or whatever they ended up
> > being called) you could limit it to just IRQ entry points.
> > 
> 
> i did look at the graph tracer first. While it does measure irq related
> functions, it does not give this level of detail concerning which irq #,
> which irq handler is involved, and whether or not the irq was handled
> successfully or not. So I believe this tracepoints add a level of detail
> that the graph tracer does not have. Furthermore, this patch requires 2
> tracepoints, not instrumentation for all kernel functions.

How useful is that return value?

Much of the other data is already available, /proc/interrupts will
happily tell you the source of your interrupt storm. The irq-off latency
tracer will tell you if stuff takes too much time, the graph tracer can
tell you what is taking how much time.

I really am having a difficult time seeing the use in such narrow
tracers.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 21:46     ` Peter Zijlstra
@ 2009-02-18 22:02       ` Frank Ch. Eigler
  2009-02-18 22:10         ` Peter Zijlstra
  2009-02-18 23:34       ` Kyle McMartin
  2009-02-19  2:13       ` Frederic Weisbecker
  2 siblings, 1 reply; 48+ messages in thread
From: Frank Ch. Eigler @ 2009-02-18 22:02 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj

Hi -

On Wed, Feb 18, 2009 at 10:46:20PM +0100, Peter Zijlstra wrote:
> [...]
> How useful is that return value?

"somewhat"?

> Much of the other data is already available, /proc/interrupts will
> happily tell you the source of your interrupt storm. The irq-off
> latency tracer will tell you if stuff takes too much time, the graph
> tracer can tell you what is taking how much time.

Doesn't it seem like this is too much work to have answer a simple if
particular question?

> I really am having a difficult time seeing the use in such narrow
> tracers.

Part of the problem may come from defining "tracers" as something
limited to ftrace engines.  Once such tracepoints are in the kernel,
more powerful analytical tools may be attached to them.


- FChE

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 22:02       ` Frank Ch. Eigler
@ 2009-02-18 22:10         ` Peter Zijlstra
  2009-02-18 22:23           ` Frank Ch. Eigler
  2009-02-20 19:52           ` Jason Baron
  0 siblings, 2 replies; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-18 22:10 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj

On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote:

> > I really am having a difficult time seeing the use in such narrow
> > tracers.
> 
> Part of the problem may come from defining "tracers" as something
> limited to ftrace engines.  Once such tracepoints are in the kernel,
> more powerful analytical tools may be attached to them.

ftrace graph tracer is by far the most powerful thing I've seen, there's
nothing limiting about ftrace.

What is limiting are these puny little tracers that have no real value.

A much better purpose for these tracepoints would be augmenting data in
existing tracers like the graph/function/sched tracer.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 22:10         ` Peter Zijlstra
@ 2009-02-18 22:23           ` Frank Ch. Eigler
  2009-02-18 23:21             ` Peter Zijlstra
  2009-02-20 19:52           ` Jason Baron
  1 sibling, 1 reply; 48+ messages in thread
From: Frank Ch. Eigler @ 2009-02-18 22:23 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj

Hi -

On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote:
> > > I really am having a difficult time seeing the use in such narrow
> > > tracers.
> > 
> > Part of the problem may come from defining "tracers" as something
> > limited to ftrace engines.  Once such tracepoints are in the kernel,
> > more powerful analytical tools may be attached to them.
> 
> ftrace graph tracer is by far the most powerful thing I've seen [...]

Be that as it may, what you suggested required separate correlation of
data with /proc/interrupts contents.


> What is limiting are these puny little tracers that have no real value.

Which limited resource would even puny tracers exhaust?


> A much better purpose for these tracepoints would be augmenting data in
> existing tracers like the graph/function/sched tracer.

Be more specific.  How would you augment those tracers with e.g.
individual irq numbers, their disposition status (HANDLED etc.).


- FChE

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 22:23           ` Frank Ch. Eigler
@ 2009-02-18 23:21             ` Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-18 23:21 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj

On Wed, 2009-02-18 at 17:23 -0500, Frank Ch. Eigler wrote:
> Hi -
> 
> On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote:
> > > > I really am having a difficult time seeing the use in such narrow
> > > > tracers.
> > > 
> > > Part of the problem may come from defining "tracers" as something
> > > limited to ftrace engines.  Once such tracepoints are in the kernel,
> > > more powerful analytical tools may be attached to them.
> > 
> > ftrace graph tracer is by far the most powerful thing I've seen [...]
> 
> Be that as it may, what you suggested required separate correlation of
> data with /proc/interrupts contents.

That was to illustrate that the proposed tracer doesn't add much value.
Its almost 200 lines of code that needs maintenance to provide
information that's mostly already available, seems like the wrong
tradeoff to me.

> > What is limiting are these puny little tracers that have no real value.
> 
> Which limited resource would even puny tracers exhaust?

Usability and maintenance. But loss of the bigger picture is the main
complaint.

> > A much better purpose for these tracepoints would be augmenting data in
> > existing tracers like the graph/function/sched tracer.
> 
> Be more specific.  How would you augment those tracers with e.g.
> individual irq numbers, their disposition status (HANDLED etc.).

With a mixture of creativity and code ;-)

Perhaps by creating a way to provide argument and return values to the
function call data, and registering tracepoints to obtain these.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 21:46     ` Peter Zijlstra
  2009-02-18 22:02       ` Frank Ch. Eigler
@ 2009-02-18 23:34       ` Kyle McMartin
  2009-02-19  2:13       ` Frederic Weisbecker
  2 siblings, 0 replies; 48+ messages in thread
From: Kyle McMartin @ 2009-02-18 23:34 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj

On Wed, Feb 18, 2009 at 10:46:20PM +0100, Peter Zijlstra wrote:
> Much of the other data is already available, /proc/interrupts will
> happily tell you the source of your interrupt storm. The irq-off latency
> tracer will tell you if stuff takes too much time, the graph tracer can
> tell you what is taking how much time.
> 

How so? It will only tell you which irq line you need to be looking at,
not which handlers device is spewing the IRQs...

Perhaps I'm missing something obvious, but that's what I've used a
similar instrument for.

regards, Kyle

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 20:26 ` Mathieu Desnoyers
@ 2009-02-19  1:42   ` KOSAKI Motohiro
  0 siblings, 0 replies; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-19  1:42 UTC (permalink / raw)
  To: Mathieu Desnoyers, Jason Baron
  Cc: kosaki.motohiro, mingo, rostedt, linux-kernel, acme, fweisbec,
	fche, peterz

Hi Jason,

> * Jason Baron (jbaron@redhat.com) wrote:
> > hi,
> > 
> > Using a copule of tracepoints i've implemented an 'irq tracer' which has
> > the following column format:
> > 
> > [time][irq][action][handled][cpu][length]
> > 
> > for example on my system:
> > 
> > [ 1038.927248381]     28     eth0      1        3     0.000002745
> > [ 1038.927544688]     28     eth0      1        3     0.000002520
> > [ 1038.927593609]     28     eth0      1        3     0.000002509
> > [ 1038.974087675]     29     ahci      1        1     0.000013135
> > [ 1038.974288475]     29     ahci      1        1     0.000005542
> > [ 1038.974414324]     29     ahci      1        1     0.000007953
> > 
> > I think this patch is useful in understanding irq system behavior, and
> > for tracking down irq driver handlers that are firing too often or
> > spending too much time handling an irq. This approach could be extended
> > to the timer irq and for softirqs...
> > 
> > thanks,



> > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> >  		local_irq_enable_in_hardirq();
> >  
> >  	do {
> > +		trace_irq_entry(irq);
> >  		ret = action->handler(irq, action->dev_id);
> > +		trace_irq_exit(irq, ret);
> >  		if (ret == IRQ_HANDLED)
> >  			status |= action->flags;
> >  		retval |= ret;

May I ask why you select this line?

IOW, I guess your probe point is interest to irq action and mathieu's 
probe point is interest to interrupt itself.

I guess you also want to observe shared irq problem, right?



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 21:15 ` Peter Zijlstra
  2009-02-18 21:35   ` Jason Baron
@ 2009-02-19  1:46   ` Frederic Weisbecker
  1 sibling, 0 replies; 48+ messages in thread
From: Frederic Weisbecker @ 2009-02-19  1:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fche, compudj

On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote:
> On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote:
> > hi,
> > 
> > Using a copule of tracepoints i've implemented an 'irq tracer' which has
> > the following column format:
> > 
> > [time][irq][action][handled][cpu][length]
> > 
> > for example on my system:
> > 
> > [ 1038.927248381]     28     eth0      1        3     0.000002745
> > [ 1038.927544688]     28     eth0      1        3     0.000002520
> > [ 1038.927593609]     28     eth0      1        3     0.000002509
> > [ 1038.974087675]     29     ahci      1        1     0.000013135
> > [ 1038.974288475]     29     ahci      1        1     0.000005542
> > [ 1038.974414324]     29     ahci      1        1     0.000007953
> > 
> > I think this patch is useful in understanding irq system behavior, and
> > for tracking down irq driver handlers that are firing too often or
> > spending too much time handling an irq. This approach could be extended
> > to the timer irq and for softirqs...
> 
> The function graph tracer can already do this, it has a special section
> to recognize irq entry function, and already measures time spend.


Right, except that it doesn't trace the irq number, action and the return value,
though the latter will be handled one day (probably soon).
 

> I guess with Steve's new triggers (probes or whatever they ended up
> being called) you could limit it to just IRQ entry points.


We already could trace only irq function call tree through set_graph_function:
echo smp_apic_timer_interrupt > set_graph_function
echo do_IRQ >> set_graph_function

But you will see the whole call graph:

  321.913057 |   1)               |      do_IRQ() {
  321.913058 |   1)   0.571 us    |        exit_idle();
  321.913059 |   1)               |        irq_enter() {
  321.913059 |   1)   0.548 us    |          rcu_irq_enter();
  321.913060 |   1)   0.533 us    |          idle_cpu();
  321.913064 |   1)   2.696 us    |        }
  321.913065 |   1)               |        handle_irq() {
  321.913065 |   1)   0.533 us    |          irq_to_desc();
  321.913067 |   1)               |          handle_edge_irq() {
  321.913067 |   1)   0.653 us    |            _spin_lock();
  321.913068 |   1)               |            ack_apic_edge() {
  321.913069 |   1)   0.533 us    |              irq_to_desc();
  321.913070 |   1)               |              move_native_irq() {
  321.913070 |   1)   0.525 us    |                irq_to_desc();
  321.913072 |   1)   1.577 us    |              }
  321.913072 |   1)   0.563 us    |              native_apic_mem_write();
  321.913073 |   1)   4.813 us    |            }
  321.913074 |   1)   0.601 us    |            _spin_unlock();
  321.913075 |   1)               |            handle_IRQ_event() {
  321.913075 |   1)               |              timer_interrupt() {
  321.913076 |   1)               |                tick_handle_oneshot_broadcast() {
  321.913077 |   1)   0.631 us    |                  _spin_lock();
  321.913078 |   1)               |                  ktime_get() {
  321.913078 |   1)               |                    ktime_get_ts() {
  321.913079 |   1)               |                      getnstimeofday() {
  321.913079 |   1)   1.735 us    |                        read_hpet();
  321.913082 |   1)   2.854 us    |                      }
  321.913082 |   1)   0.534 us    |                      set_normalized_timespec();
  321.913083 |   1)   4.963 us    |                    }
  321.913084 |   1)   6.015 us    |                  }
  321.913084 |   1)               |                  tick_do_broadcast() {
  321.913085 |   1)               |                    lapic_timer_broadcast() {
  321.913086 |   1)   0.624 us    |                      flat_send_IPI_mask();
  321.913087 |   1)   1.697 us    |                    }
  321.913087 |   1)   2.944 us    |                  }
  321.913088 |   1)   0.585 us    |                  _spin_unlock();
  321.913089 |   1) + 12.892 us   |                }
  321.913089 |   1) + 14.079 us   |              }
  321.913090 |   1) + 15.176 us   |            }
  321.913091 |   1)   0.540 us    |            note_interrupt();
  321.913092 |   1)   0.653 us    |            _spin_lock();
  321.913093 |   1)   0.586 us    |            _spin_unlock();
  321.913094 |   1) + 27.354 us   |          }
  321.913094 |   1) + 29.547 us   |        }
  321.913095 |   1)               |        irq_exit() {
  321.913096 |   1)   0.526 us    |          rcu_irq_exit();
  321.913097 |   1)   0.526 us    |          idle_cpu();
  321.913098 |   1)   2.674 us    |        }
  321.913098 |   1) + 41.096 us   |      }
  321.913098 |   1)   <========== |

Or you can only trace do_IRQ and smp_apic_timer_interrupt alone without tracing their
call tree:

echo smp_apic_timer_interrupt > set_ftrace_filter
echo do_IRQ >> set_ftrace_filter


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 21:46     ` Peter Zijlstra
  2009-02-18 22:02       ` Frank Ch. Eigler
  2009-02-18 23:34       ` Kyle McMartin
@ 2009-02-19  2:13       ` Frederic Weisbecker
  2 siblings, 0 replies; 48+ messages in thread
From: Frederic Weisbecker @ 2009-02-19  2:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fche, compudj

On Wed, Feb 18, 2009 at 10:46:20PM +0100, Peter Zijlstra wrote:
> On Wed, 2009-02-18 at 16:35 -0500, Jason Baron wrote:
> > On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote:
> > > On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote:
> > > > hi,
> > > > 
> > > > Using a copule of tracepoints i've implemented an 'irq tracer' which has
> > > > the following column format:
> > > > 
> > > > [time][irq][action][handled][cpu][length]
> > > > 
> > > > for example on my system:
> > > > 
> > > > [ 1038.927248381]     28     eth0      1        3     0.000002745
> > > > [ 1038.927544688]     28     eth0      1        3     0.000002520
> > > > [ 1038.927593609]     28     eth0      1        3     0.000002509
> > > > [ 1038.974087675]     29     ahci      1        1     0.000013135
> > > > [ 1038.974288475]     29     ahci      1        1     0.000005542
> > > > [ 1038.974414324]     29     ahci      1        1     0.000007953
> > > > 
> > > > I think this patch is useful in understanding irq system behavior, and
> > > > for tracking down irq driver handlers that are firing too often or
> > > > spending too much time handling an irq. This approach could be extended
> > > > to the timer irq and for softirqs...
> > > 
> > > The function graph tracer can already do this, it has a special section
> > > to recognize irq entry function, and already measures time spend.
> > > 
> > > I guess with Steve's new triggers (probes or whatever they ended up
> > > being called) you could limit it to just IRQ entry points.
> > > 
> > 
> > i did look at the graph tracer first. While it does measure irq related
> > functions, it does not give this level of detail concerning which irq #,
> > which irq handler is involved, and whether or not the irq was handled
> > successfully or not. So I believe this tracepoints add a level of detail
> > that the graph tracer does not have. Furthermore, this patch requires 2
> > tracepoints, not instrumentation for all kernel functions.
> 
> How useful is that return value?
> 
> Much of the other data is already available, /proc/interrupts will
> happily tell you the source of your interrupt storm. The irq-off latency
> tracer will tell you if stuff takes too much time, the graph tracer can
> tell you what is taking how much time.
> 
> I really am having a difficult time seeing the use in such narrow
> tracers.
> 

Well, something that could be done would be to use this irq tracing framework
to produce the average of time spent in each irq handler.

kernel/trace/trace_stat.c gives some infrastructure for that.

BTW, I have the project to build a function hashlist, based on the function list
provided by dynamic ftrace, to store the average of time spent for each traced
function, using the function graph tracer.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-18 22:10         ` Peter Zijlstra
  2009-02-18 22:23           ` Frank Ch. Eigler
@ 2009-02-20 19:52           ` Jason Baron
  2009-02-21  3:39             ` Frederic Weisbecker
  2009-02-22  3:38             ` KOSAKI Motohiro
  1 sibling, 2 replies; 48+ messages in thread
From: Jason Baron @ 2009-02-20 19:52 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec, compudj

On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote:
> On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote:
> 
> > > I really am having a difficult time seeing the use in such narrow
> > > tracers.
> > 
> > Part of the problem may come from defining "tracers" as something
> > limited to ftrace engines.  Once such tracepoints are in the kernel,
> > more powerful analytical tools may be attached to them.
> 
> ftrace graph tracer is by far the most powerful thing I've seen, there's
> nothing limiting about ftrace.
> 
> What is limiting are these puny little tracers that have no real value.
> 
> A much better purpose for these tracepoints would be augmenting data in
> existing tracers like the graph/function/sched tracer.
> 

ok...using these two tracepoints i've augmented the graph tracer to add
a new column that prints the irq # and handler we're in, or -1
otherwise. its a new 'funcgraph-irq' in 'trace_options', sample output:


 1) #-1:none        2.323 us    |                                  native_apic_mem_write();
 1) #-1:none      + 21.891 us   |                                }
 1) #-1:none                    |                                handle_IRQ_event() {
 1) #29:ahci        1.938 us    |                                  irq_to_desc();
 1) #29:ahci        2.626 us    |                                  runqueue_is_locked();
 1) #29:ahci      + 21.173 us   |                                          }
		.
		.
		.
 1) #29:ahci      + 32.047 us   |                                        }
 1) #29:ahci      + 43.746 us   |                                      }
 1) #29:ahci      + 51.634 us   |                                    }
 1) #29:ahci      + 61.954 us   |                                  }
 1) #-1:none        1.014 us    |                                  runqueue_is_locked();
 1) #-1:none                    |                                  __wake_up() {
 1) #-1:none        0.920 us    |                                    _spin_lock_irqsave();
 1) #-1:none        1.028 us    |                                    __wake_up_common();


This seems to work pretty well - although it does get confused by
certain preemption events which it can't handle...I think this can make
the logs more readable. I can probably get it working if ppl thinks its
valuable...or have better ways of implementing it. Code is below.

thanks,

-Jason


diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644
index 0000000..ecec94a
--- /dev/null
+++ b/include/trace/irq.h
@@ -0,0 +1,25 @@
+#ifndef _TRACE_IRQ_H
+#define _TRACE_IRQ_H
+
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+enum {
+        IRQ_ENTRY = 0,
+        IRQ_EXIT = 1,
+};
+
+struct irq_trace {
+	int type;
+	int irq;
+	char irq_name[10];
+};
+
+DECLARE_TRACE(irq_entry,
+	TPPROTO(unsigned int id),
+	TPARGS(id));
+DECLARE_TRACE(irq_exit,
+	TPPROTO(unsigned int id, irqreturn_t retval),
+	TPARGS(id, retval));
+
+#endif /* _TRACE_IRQ_H */
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fdff380..34053b5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,6 +18,7 @@
 #include <linux/rculist.h>
 #include <linux/hash.h>
 #include <linux/bootmem.h>
+#include <trace/irq.h>
 
 #include "internals.h"
 
@@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
 	return IRQ_NONE;
 }
 
+DEFINE_TRACE(irq_entry);
+DEFINE_TRACE(irq_exit);
+
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
@@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 		local_irq_enable_in_hardirq();
 
 	do {
+		trace_irq_entry(irq);
 		ret = action->handler(irq, action->dev_id);
+		trace_irq_exit(irq, ret);
 		if (ret == IRQ_HANDLED)
 			status |= action->flags;
 		retval |= ret;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 627090b..58b9f7e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
+obj-$(CONFIG_IRQ_TRACER) += trace_irq.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dbff020..6a34aaa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
 #include <trace/boot.h>
 #include <trace/kmemtrace.h>
 #include <trace/power.h>
+#include <trace/irq.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -33,6 +34,7 @@ enum trace_type {
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
 	TRACE_POWER,
+	TRACE_IRQ,
 	TRACE_BLK,
 
 	__TRACE_LAST_TYPE,
@@ -173,6 +175,11 @@ struct trace_power {
 	struct power_trace	state_data;
 };
 
+struct trace_irq {
+	struct trace_entry	ent;
+	struct irq_trace	irq_data;
+};
+
 struct kmemtrace_alloc_entry {
 	struct trace_entry	ent;
 	enum kmemtrace_type_id type_id;
@@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
  		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+		IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 519a0ca..4c4c777 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -10,10 +10,19 @@
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/fs.h>
+#include <trace/irq.h>
 
 #include "trace.h"
 #include "trace_output.h"
 
+
+struct per_cpu_irq_data {
+	int irq_num;
+	char action_str[10];
+};
+static struct per_cpu_irq_data cpu_irq_data[NR_CPUS];
+static struct trace_array *graph_trace_tr;
+
 #define TRACE_GRAPH_INDENT	2
 
 /* Flag options */
@@ -23,6 +32,7 @@
 #define TRACE_GRAPH_PRINT_PROC		0x8
 #define TRACE_GRAPH_PRINT_DURATION	0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME	0X20
+#define TRACE_GRAPH_PRINT_IRQ		0X40
 
 static struct tracer_opt trace_opts[] = {
 	/* Display overruns? (for self-debug purpose) */
@@ -37,6 +47,8 @@ static struct tracer_opt trace_opts[] = {
 	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
 	/* Display absolute time of an entry */
 	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
+	/* Display irq number, -1 otherwise */
+	{ TRACER_OPT(funcgraph-irq, TRACE_GRAPH_PRINT_IRQ) },
 	{ } /* Empty entry */
 };
 
@@ -52,10 +64,17 @@ static struct tracer_flags tracer_flags = {
 
 static int graph_trace_init(struct trace_array *tr)
 {
+	int i;
 	int ret = register_ftrace_graph(&trace_graph_return,
 					&trace_graph_entry);
 	if (ret)
 		return ret;
+	graph_trace_tr = tr;
+	for (i=0;i<NR_CPUS;i++) {
+		cpu_irq_data[i].irq_num = -1;
+		sprintf(cpu_irq_data[i].action_str, "none");
+                cpu_irq_data[i].action_str[4] = '\0';
+	}
 	tracing_start_cmdline_record();
 
 	return 0;
@@ -63,6 +82,7 @@ static int graph_trace_init(struct trace_array *tr)
 
 static void graph_trace_reset(struct trace_array *tr)
 {
+	graph_trace_tr = tr;
 	tracing_stop_cmdline_record();
 	unregister_ftrace_graph();
 }
@@ -110,6 +130,27 @@ print_graph_cpu(struct trace_seq *s, int cpu)
 	return TRACE_TYPE_HANDLED;
 }
 
+#define TRACE_GRAPH_IRQ_LENGTH     14
+
+static enum print_line_t
+print_graph_irqnum(struct trace_seq *s, int cpu)
+{
+	char irq_str[TRACE_GRAPH_IRQ_LENGTH];
+	int len, ret;
+
+	len = snprintf(irq_str, TRACE_GRAPH_IRQ_LENGTH, "#%d:%s", cpu_irq_data[cpu].irq_num, cpu_irq_data[cpu].action_str);
+	ret = trace_seq_printf(s, "%s", irq_str);	
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+	while (len < TRACE_GRAPH_IRQ_LENGTH) {
+		ret = trace_seq_printf(s, " ");
+                if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		len++;
+	}
+	return TRACE_TYPE_HANDLED;
+}
+
 #define TRACE_GRAPH_PROCINFO_LENGTH	14
 
 static enum print_line_t
@@ -504,6 +545,13 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
+	/* irq */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
+		ret = print_graph_irqnum(s, cpu);
+		if (ret == TRACE_TYPE_PARTIAL_LINE)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
 	/* Proc */
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
 		ret = print_graph_proc(s, ent->pid);
@@ -551,6 +599,13 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
+	/* irq */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
+		ret = print_graph_irqnum(s, cpu);
+		if (ret == TRACE_TYPE_PARTIAL_LINE)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
 	/* Proc */
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
 		ret = print_graph_proc(s, ent->pid);
@@ -627,6 +682,13 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
+	/* irq */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
+		ret = print_graph_irqnum(s, cpu);
+		if (ret == TRACE_TYPE_PARTIAL_LINE)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
 	/* Proc */
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
 		ret = print_graph_proc(s, ent->pid);
@@ -699,6 +761,22 @@ print_graph_function(struct trace_iterator *iter)
 		trace_assign_type(field, entry);
 		return print_graph_comment(field, s, entry, iter);
 	}
+	case TRACE_IRQ: {
+		struct trace_irq *field;
+		int cpu = iter->cpu;
+
+        	trace_assign_type(field, entry);
+		if (field->irq_data.type == IRQ_ENTRY) {
+			cpu_irq_data[cpu].irq_num = field->irq_data.irq;	
+			strncpy(cpu_irq_data[cpu].action_str,field->irq_data.irq_name, 9);
+			cpu_irq_data[cpu].action_str[9] = '\0';
+		} else {
+			cpu_irq_data[cpu].irq_num = -1;
+			sprintf(cpu_irq_data[cpu].action_str, "none");
+			cpu_irq_data[cpu].action_str[4] = '\0';
+		}
+		return TRACE_TYPE_HANDLED;
+	}
 	default:
 		return TRACE_TYPE_UNHANDLED;
 	}
@@ -753,6 +831,67 @@ static void graph_trace_close(struct trace_iterator *iter)
 	percpu_free(iter->private);
 }
 
+static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
+{
+        struct ring_buffer_event *event;
+        struct trace_irq *entry;
+
+        event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
+                                          sizeof(*entry), 0, 0);
+        if (!event)
+                return;
+        entry   = ring_buffer_event_data(event);
+        entry->irq_data.type = IRQ_EXIT;
+        trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
+}
+
+static void probe_irq_entry(unsigned int id)
+{
+        struct ring_buffer_event *event;
+        struct trace_irq *entry;
+	struct irq_desc *desc;
+
+        event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
+                                          sizeof(*entry), 0, 0);
+        if (!event)
+                return;
+        entry   = ring_buffer_event_data(event);
+        entry->irq_data.irq = id;
+        entry->irq_data.type = IRQ_ENTRY;
+	desc = irq_to_desc(id);
+	strncpy(entry->irq_data.irq_name, desc->action->name, 9);
+	entry->irq_data.irq_name[9] = '\0';
+        trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
+}
+
+static int graph_trace_set_flag(u32 old_flags, u32 bit, int set)
+{
+	int ret = 0;
+
+        if (bit == TRACE_GRAPH_PRINT_IRQ) {
+		/* do nothing if already set */
+		//if (!!set == !!(func_flags.val & TRACE_GRAPH_PRINT_IRQ))
+                //        return 0;
+		if (set) {
+			ret = register_trace_irq_entry(probe_irq_entry);
+        		if (ret) {
+                		pr_info("irq trace: irq entry tracepoint failed to register\n");
+                		return ret;
+			}
+			ret = register_trace_irq_exit(probe_irq_exit);
+			if (ret) {
+				pr_info("irq trace: irq exit tracepoint failed to register\n");
+				unregister_trace_irq_entry(probe_irq_entry);
+				return ret;
+			}
+		} else {
+			unregister_trace_irq_entry(probe_irq_entry);	
+			unregister_trace_irq_exit(probe_irq_exit);
+		}
+	}
+	return ret;
+}
+
 static struct tracer graph_trace __read_mostly = {
 	.name	     	= "function_graph",
 	.open		= graph_trace_open,
@@ -762,6 +901,7 @@ static struct tracer graph_trace __read_mostly = {
 	.print_line	= print_graph_function,
 	.print_header	= print_graph_headers,
 	.flags		= &tracer_flags,
+	.set_flag       = graph_trace_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest	= trace_selftest_startup_function_graph,
 #endif


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-20 19:52           ` Jason Baron
@ 2009-02-21  3:39             ` Frederic Weisbecker
  2009-02-22  3:38             ` KOSAKI Motohiro
  1 sibling, 0 replies; 48+ messages in thread
From: Frederic Weisbecker @ 2009-02-21  3:39 UTC (permalink / raw)
  To: Jason Baron
  Cc: Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel,
	acme, compudj

On Fri, Feb 20, 2009 at 02:52:36PM -0500, Jason Baron wrote:
> On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote:
> > On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote:
> > 
> > > > I really am having a difficult time seeing the use in such narrow
> > > > tracers.
> > > 
> > > Part of the problem may come from defining "tracers" as something
> > > limited to ftrace engines.  Once such tracepoints are in the kernel,
> > > more powerful analytical tools may be attached to them.
> > 
> > ftrace graph tracer is by far the most powerful thing I've seen, there's
> > nothing limiting about ftrace.
> > 
> > What is limiting are these puny little tracers that have no real value.
> > 
> > A much better purpose for these tracepoints would be augmenting data in
> > existing tracers like the graph/function/sched tracer.
> > 
> 
> ok...using these two tracepoints i've augmented the graph tracer to add
> a new column that prints the irq # and handler we're in, or -1
> otherwise. its a new 'funcgraph-irq' in 'trace_options', sample output:
> 
> 
>  1) #-1:none        2.323 us    |                                  native_apic_mem_write();
>  1) #-1:none      + 21.891 us   |                                }
>  1) #-1:none                    |                                handle_IRQ_event() {


My dream would be to see:

1)                             |                                handle_IRQ_event(29:ahci) {


But it's not possible for now, not until we have the hashtable.
Anyway, instead of having #-1:none, it would be better to have nothing
in case of user context (or softirq).

Or actually why not just an ftrace_printk when the probe is on?
Since it immediately follows the call to handle_IRQ_event, we would have the
following displayed:

1)           |                                handle_IRQ_event() {
1)           |                                  /* 29 : ahci */

Because ftrace_printk send TRACE_PRINT entries which are displayed like this
by the function graph tracer.
It would be much more light than a whole column.


>  1) #29:ahci        1.938 us    |                                  irq_to_desc();
>  1) #29:ahci        2.626 us    |                                  runqueue_is_locked();
>  1) #29:ahci      + 21.173 us   |                                          }
> 		.
> 		.
> 		.
>  1) #29:ahci      + 32.047 us   |                                        }
>  1) #29:ahci      + 43.746 us   |                                      }
>  1) #29:ahci      + 51.634 us   |                                    }
>  1) #29:ahci      + 61.954 us   |                                  }
>  1) #-1:none        1.014 us    |                                  runqueue_is_locked();
>  1) #-1:none                    |                                  __wake_up() {
>  1) #-1:none        0.920 us    |                                    _spin_lock_irqsave();
>  1) #-1:none        1.028 us    |                                    __wake_up_common();
> 
> 
> This seems to work pretty well - although it does get confused by
> certain preemption events which it can't handle...I think this can make
> the logs more readable. I can probably get it working if ppl thinks its
> valuable...or have better ways of implementing it. Code is below.
> 
> thanks,
> 
> -Jason
> 
> 
> diff --git a/include/trace/irq.h b/include/trace/irq.h
> new file mode 100644
> index 0000000..ecec94a
> --- /dev/null
> +++ b/include/trace/irq.h
> @@ -0,0 +1,25 @@
> +#ifndef _TRACE_IRQ_H
> +#define _TRACE_IRQ_H
> +
> +#include <linux/interrupt.h>
> +#include <linux/tracepoint.h>
> +
> +enum {
> +        IRQ_ENTRY = 0,
> +        IRQ_EXIT = 1,
> +};
> +
> +struct irq_trace {
> +	int type;
> +	int irq;
> +	char irq_name[10];
> +};
> +
> +DECLARE_TRACE(irq_entry,
> +	TPPROTO(unsigned int id),
> +	TPARGS(id));
> +DECLARE_TRACE(irq_exit,
> +	TPPROTO(unsigned int id, irqreturn_t retval),
> +	TPARGS(id, retval));
> +
> +#endif /* _TRACE_IRQ_H */
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index fdff380..34053b5 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -18,6 +18,7 @@
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
>  #include <linux/bootmem.h>
> +#include <trace/irq.h>
>  
>  #include "internals.h"
>  
> @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
>  	return IRQ_NONE;
>  }
>  
> +DEFINE_TRACE(irq_entry);
> +DEFINE_TRACE(irq_exit);
> +
>  /**
>   * handle_IRQ_event - irq action chain handler
>   * @irq:	the interrupt number
> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>  		local_irq_enable_in_hardirq();
>  
>  	do {
> +		trace_irq_entry(irq);
>  		ret = action->handler(irq, action->dev_id);
> +		trace_irq_exit(irq, ret);
>  		if (ret == IRQ_HANDLED)
>  			status |= action->flags;
>  		retval |= ret;
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 627090b..58b9f7e 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
>  obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
>  obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
>  obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
> +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o
>  
>  libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index dbff020..6a34aaa 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -11,6 +11,7 @@
>  #include <trace/boot.h>
>  #include <trace/kmemtrace.h>
>  #include <trace/power.h>
> +#include <trace/irq.h>
>  
>  enum trace_type {
>  	__TRACE_FIRST_TYPE = 0,
> @@ -33,6 +34,7 @@ enum trace_type {
>  	TRACE_KMEM_ALLOC,
>  	TRACE_KMEM_FREE,
>  	TRACE_POWER,
> +	TRACE_IRQ,
>  	TRACE_BLK,
>  
>  	__TRACE_LAST_TYPE,
> @@ -173,6 +175,11 @@ struct trace_power {
>  	struct power_trace	state_data;
>  };
>  
> +struct trace_irq {
> +	struct trace_entry	ent;
> +	struct irq_trace	irq_data;
> +};
> +
>  struct kmemtrace_alloc_entry {
>  	struct trace_entry	ent;
>  	enum kmemtrace_type_id type_id;
> @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
>  			  TRACE_GRAPH_RET);		\
>  		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
>   		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
> +		IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
>  			  TRACE_KMEM_ALLOC);	\
>  		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index 519a0ca..4c4c777 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -10,10 +10,19 @@
>  #include <linux/uaccess.h>
>  #include <linux/ftrace.h>
>  #include <linux/fs.h>
> +#include <trace/irq.h>
>  
>  #include "trace.h"
>  #include "trace_output.h"
>  
> +
> +struct per_cpu_irq_data {
> +	int irq_num;
> +	char action_str[10];
> +};
> +static struct per_cpu_irq_data cpu_irq_data[NR_CPUS];
> +static struct trace_array *graph_trace_tr;
> +
>  #define TRACE_GRAPH_INDENT	2
>  
>  /* Flag options */
> @@ -23,6 +32,7 @@
>  #define TRACE_GRAPH_PRINT_PROC		0x8
>  #define TRACE_GRAPH_PRINT_DURATION	0x10
>  #define TRACE_GRAPH_PRINT_ABS_TIME	0X20
> +#define TRACE_GRAPH_PRINT_IRQ		0X40
>  
>  static struct tracer_opt trace_opts[] = {
>  	/* Display overruns? (for self-debug purpose) */
> @@ -37,6 +47,8 @@ static struct tracer_opt trace_opts[] = {
>  	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
>  	/* Display absolute time of an entry */
>  	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
> +	/* Display irq number, -1 otherwise */
> +	{ TRACER_OPT(funcgraph-irq, TRACE_GRAPH_PRINT_IRQ) },
>  	{ } /* Empty entry */
>  };
>  
> @@ -52,10 +64,17 @@ static struct tracer_flags tracer_flags = {
>  
>  static int graph_trace_init(struct trace_array *tr)
>  {
> +	int i;
>  	int ret = register_ftrace_graph(&trace_graph_return,
>  					&trace_graph_entry);
>  	if (ret)
>  		return ret;
> +	graph_trace_tr = tr;
> +	for (i=0;i<NR_CPUS;i++) {
> +		cpu_irq_data[i].irq_num = -1;
> +		sprintf(cpu_irq_data[i].action_str, "none");
> +                cpu_irq_data[i].action_str[4] = '\0';
> +	}
>  	tracing_start_cmdline_record();
>  
>  	return 0;
> @@ -63,6 +82,7 @@ static int graph_trace_init(struct trace_array *tr)
>  
>  static void graph_trace_reset(struct trace_array *tr)
>  {
> +	graph_trace_tr = tr;
>  	tracing_stop_cmdline_record();
>  	unregister_ftrace_graph();
>  }
> @@ -110,6 +130,27 @@ print_graph_cpu(struct trace_seq *s, int cpu)
>  	return TRACE_TYPE_HANDLED;
>  }
>  
> +#define TRACE_GRAPH_IRQ_LENGTH     14
> +
> +static enum print_line_t
> +print_graph_irqnum(struct trace_seq *s, int cpu)
> +{
> +	char irq_str[TRACE_GRAPH_IRQ_LENGTH];
> +	int len, ret;
> +
> +	len = snprintf(irq_str, TRACE_GRAPH_IRQ_LENGTH, "#%d:%s", cpu_irq_data[cpu].irq_num, cpu_irq_data[cpu].action_str);
> +	ret = trace_seq_printf(s, "%s", irq_str);	
> +	if (!ret)
> +		return TRACE_TYPE_PARTIAL_LINE;
> +	while (len < TRACE_GRAPH_IRQ_LENGTH) {
> +		ret = trace_seq_printf(s, " ");
> +                if (!ret)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +		len++;
> +	}
> +	return TRACE_TYPE_HANDLED;
> +}
> +
>  #define TRACE_GRAPH_PROCINFO_LENGTH	14
>  
>  static enum print_line_t
> @@ -504,6 +545,13 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
>  			return TRACE_TYPE_PARTIAL_LINE;
>  	}
>  
> +	/* irq */
> +	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
> +		ret = print_graph_irqnum(s, cpu);
> +		if (ret == TRACE_TYPE_PARTIAL_LINE)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +	}
> +
>  	/* Proc */
>  	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
>  		ret = print_graph_proc(s, ent->pid);
> @@ -551,6 +599,13 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
>  			return TRACE_TYPE_PARTIAL_LINE;
>  	}
>  
> +	/* irq */
> +	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
> +		ret = print_graph_irqnum(s, cpu);
> +		if (ret == TRACE_TYPE_PARTIAL_LINE)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +	}
> +
>  	/* Proc */
>  	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
>  		ret = print_graph_proc(s, ent->pid);
> @@ -627,6 +682,13 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
>  			return TRACE_TYPE_PARTIAL_LINE;
>  	}
>  
> +	/* irq */
> +	if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
> +		ret = print_graph_irqnum(s, cpu);
> +		if (ret == TRACE_TYPE_PARTIAL_LINE)
> +			return TRACE_TYPE_PARTIAL_LINE;
> +	}
> +
>  	/* Proc */
>  	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
>  		ret = print_graph_proc(s, ent->pid);
> @@ -699,6 +761,22 @@ print_graph_function(struct trace_iterator *iter)
>  		trace_assign_type(field, entry);
>  		return print_graph_comment(field, s, entry, iter);
>  	}
> +	case TRACE_IRQ: {
> +		struct trace_irq *field;
> +		int cpu = iter->cpu;
> +
> +        	trace_assign_type(field, entry);
> +		if (field->irq_data.type == IRQ_ENTRY) {
> +			cpu_irq_data[cpu].irq_num = field->irq_data.irq;	
> +			strncpy(cpu_irq_data[cpu].action_str,field->irq_data.irq_name, 9);
> +			cpu_irq_data[cpu].action_str[9] = '\0';
> +		} else {
> +			cpu_irq_data[cpu].irq_num = -1;
> +			sprintf(cpu_irq_data[cpu].action_str, "none");
> +			cpu_irq_data[cpu].action_str[4] = '\0';
> +		}
> +		return TRACE_TYPE_HANDLED;
> +	}
>  	default:
>  		return TRACE_TYPE_UNHANDLED;
>  	}
> @@ -753,6 +831,67 @@ static void graph_trace_close(struct trace_iterator *iter)
>  	percpu_free(iter->private);
>  }
>  
> +static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
> +{
> +        struct ring_buffer_event *event;
> +        struct trace_irq *entry;
> +
> +        event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
> +                                          sizeof(*entry), 0, 0);
> +        if (!event)
> +                return;
> +        entry   = ring_buffer_event_data(event);
> +        entry->irq_data.type = IRQ_EXIT;
> +        trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
> +}
> +
> +static void probe_irq_entry(unsigned int id)
> +{
> +        struct ring_buffer_event *event;
> +        struct trace_irq *entry;
> +	struct irq_desc *desc;
> +
> +        event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
> +                                          sizeof(*entry), 0, 0);
> +        if (!event)
> +                return;
> +        entry   = ring_buffer_event_data(event);
> +        entry->irq_data.irq = id;
> +        entry->irq_data.type = IRQ_ENTRY;
> +	desc = irq_to_desc(id);
> +	strncpy(entry->irq_data.irq_name, desc->action->name, 9);
> +	entry->irq_data.irq_name[9] = '\0';
> +        trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
> +}
> +
> +static int graph_trace_set_flag(u32 old_flags, u32 bit, int set)
> +{
> +	int ret = 0;
> +
> +        if (bit == TRACE_GRAPH_PRINT_IRQ) {
> +		/* do nothing if already set */
> +		//if (!!set == !!(func_flags.val & TRACE_GRAPH_PRINT_IRQ))
> +                //        return 0;
> +		if (set) {
> +			ret = register_trace_irq_entry(probe_irq_entry);
> +        		if (ret) {
> +                		pr_info("irq trace: irq entry tracepoint failed to register\n");
> +                		return ret;
> +			}
> +			ret = register_trace_irq_exit(probe_irq_exit);
> +			if (ret) {
> +				pr_info("irq trace: irq exit tracepoint failed to register\n");
> +				unregister_trace_irq_entry(probe_irq_entry);
> +				return ret;
> +			}
> +		} else {
> +			unregister_trace_irq_entry(probe_irq_entry);	
> +			unregister_trace_irq_exit(probe_irq_exit);
> +		}
> +	}
> +	return ret;
> +}
> +
>  static struct tracer graph_trace __read_mostly = {
>  	.name	     	= "function_graph",
>  	.open		= graph_trace_open,
> @@ -762,6 +901,7 @@ static struct tracer graph_trace __read_mostly = {
>  	.print_line	= print_graph_function,
>  	.print_header	= print_graph_headers,
>  	.flags		= &tracer_flags,
> +	.set_flag       = graph_trace_set_flag,
>  #ifdef CONFIG_FTRACE_SELFTEST
>  	.selftest	= trace_selftest_startup_function_graph,
>  #endif
> 


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-20 19:52           ` Jason Baron
  2009-02-21  3:39             ` Frederic Weisbecker
@ 2009-02-22  3:38             ` KOSAKI Motohiro
  2009-02-25 16:48               ` Masami Hiramatsu
  1 sibling, 1 reply; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-22  3:38 UTC (permalink / raw)
  To: Jason Baron
  Cc: kosaki.motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo,
	rostedt, linux-kernel, acme, fweisbec, compudj,
	Mathieu Desnoyers

>  /**
>   * handle_IRQ_event - irq action chain handler
>   * @irq:	the interrupt number
> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>  		local_irq_enable_in_hardirq();
>  
>  	do {
> +		trace_irq_entry(irq);
>  		ret = action->handler(irq, action->dev_id);
> +		trace_irq_exit(irq, ret);
>  		if (ret == IRQ_HANDLED)
>  			status |= action->flags;
>  		retval |= ret;

Nobdy want unnecessary redundant tracepoint.
Please discuss with mathieu, and merge his tracepoint.




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-22  3:38             ` KOSAKI Motohiro
@ 2009-02-25 16:48               ` Masami Hiramatsu
  2009-02-25 16:57                 ` Jason Baron
  2009-02-25 16:58                 ` Mathieu Desnoyers
  0 siblings, 2 replies; 48+ messages in thread
From: Masami Hiramatsu @ 2009-02-25 16:48 UTC (permalink / raw)
  To: KOSAKI Motohiro, Mathieu Desnoyers
  Cc: Jason Baron, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt,
	linux-kernel, acme, fweisbec, compudj



KOSAKI Motohiro wrote:
>>  /**
>>   * handle_IRQ_event - irq action chain handler
>>   * @irq:	the interrupt number
>> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>>  		local_irq_enable_in_hardirq();
>>  
>>  	do {
>> +		trace_irq_entry(irq);
>>  		ret = action->handler(irq, action->dev_id);
>> +		trace_irq_exit(irq, ret);
>>  		if (ret == IRQ_HANDLED)
>>  			status |= action->flags;
>>  		retval |= ret;
> 
> Nobdy want unnecessary redundant tracepoint.
> Please discuss with mathieu, and merge his tracepoint.

Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
is enough. However, from the same viewpoint, it should pass irq-number
to irq-exit event too, because we may lost some previous events by buffer-overflow
etc.

         trace_irq_entry(irq, NULL);
         ret = _handle_IRQ_event(irq, action);
         trace_irq_exit(irq, ret);
                        ^^^^

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 16:48               ` Masami Hiramatsu
@ 2009-02-25 16:57                 ` Jason Baron
  2009-02-25 17:34                   ` Mathieu Desnoyers
  2009-02-25 16:58                 ` Mathieu Desnoyers
  1 sibling, 1 reply; 48+ messages in thread
From: Jason Baron @ 2009-02-25 16:57 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: KOSAKI Motohiro, Mathieu Desnoyers, Peter Zijlstra,
	Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec,
	compudj

On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> KOSAKI Motohiro wrote:
> >>  /**
> >>   * handle_IRQ_event - irq action chain handler
> >>   * @irq:	the interrupt number
> >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> >>  		local_irq_enable_in_hardirq();
> >>  
> >>  	do {
> >> +		trace_irq_entry(irq);
> >>  		ret = action->handler(irq, action->dev_id);
> >> +		trace_irq_exit(irq, ret);
> >>  		if (ret == IRQ_HANDLED)
> >>  			status |= action->flags;
> >>  		retval |= ret;
> > 
> > Nobdy want unnecessary redundant tracepoint.
> > Please discuss with mathieu, and merge his tracepoint.
> 
> Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> is enough. However, from the same viewpoint, it should pass irq-number
> to irq-exit event too, because we may lost some previous events by buffer-overflow
> etc.
> 
>          trace_irq_entry(irq, NULL);
>          ret = _handle_IRQ_event(irq, action);
>          trace_irq_exit(irq, ret);
>                         ^^^^
> 

the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
different places. So the above suggested irq tracepoint provides the
same information with 4 less tracepoints in the code. So I believe its
simpler - plus we can understand which action handlers are handling the
interrupt.

thanks,

-Jason

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 16:48               ` Masami Hiramatsu
  2009-02-25 16:57                 ` Jason Baron
@ 2009-02-25 16:58                 ` Mathieu Desnoyers
  2009-02-25 17:19                   ` Masami Hiramatsu
  1 sibling, 1 reply; 48+ messages in thread
From: Mathieu Desnoyers @ 2009-02-25 16:58 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: KOSAKI Motohiro, Jason Baron, Peter Zijlstra, Frank Ch. Eigler,
	mingo, rostedt, linux-kernel, acme, fweisbec

* Masami Hiramatsu (mhiramat@redhat.com) wrote:
> 
> 
> KOSAKI Motohiro wrote:
> >>  /**
> >>   * handle_IRQ_event - irq action chain handler
> >>   * @irq:	the interrupt number
> >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> >>  		local_irq_enable_in_hardirq();
> >>  
> >>  	do {
> >> +		trace_irq_entry(irq);
> >>  		ret = action->handler(irq, action->dev_id);
> >> +		trace_irq_exit(irq, ret);
> >>  		if (ret == IRQ_HANDLED)
> >>  			status |= action->flags;
> >>  		retval |= ret;
> > 
> > Nobdy want unnecessary redundant tracepoint.
> > Please discuss with mathieu, and merge his tracepoint.
> 
> Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> is enough. However, from the same viewpoint, it should pass irq-number
> to irq-exit event too, because we may lost some previous events by buffer-overflow
> etc.
> 
>          trace_irq_entry(irq, NULL);
>          ret = _handle_IRQ_event(irq, action);
>          trace_irq_exit(irq, ret);
>                         ^^^^
> 

I seriously doubt we should consider a trace with missing events as
"reliable". If your only argument is that when the buffers are not large
enough we could lose events, then I think we should just hint people at
doing the right thing, which is to tweak the tracer parameters (e.g.
larger buffers) so they stop losing events.

A trace with events lost is really a scenario close to a corrupted
trace because we don't know which event has been lost, nor where. I
don't think we should increase the event size to support that kind of
broken scenario.

Mathieu

> Thank you,
> 
> -- 
> Masami Hiramatsu
> 
> Software Engineer
> Hitachi Computer Products (America) Inc.
> Software Solutions Division
> 
> e-mail: mhiramat@redhat.com
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 16:58                 ` Mathieu Desnoyers
@ 2009-02-25 17:19                   ` Masami Hiramatsu
  2009-02-27  3:08                     ` KOSAKI Motohiro
  0 siblings, 1 reply; 48+ messages in thread
From: Masami Hiramatsu @ 2009-02-25 17:19 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: KOSAKI Motohiro, Jason Baron, Peter Zijlstra, Frank Ch. Eigler,
	mingo, rostedt, linux-kernel, acme, fweisbec

Mathieu Desnoyers wrote:
> * Masami Hiramatsu (mhiramat@redhat.com) wrote:
>>
>> KOSAKI Motohiro wrote:
>>>>  /**
>>>>   * handle_IRQ_event - irq action chain handler
>>>>   * @irq:	the interrupt number
>>>> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>>>>  		local_irq_enable_in_hardirq();
>>>>  
>>>>  	do {
>>>> +		trace_irq_entry(irq);
>>>>  		ret = action->handler(irq, action->dev_id);
>>>> +		trace_irq_exit(irq, ret);
>>>>  		if (ret == IRQ_HANDLED)
>>>>  			status |= action->flags;
>>>>  		retval |= ret;
>>> Nobdy want unnecessary redundant tracepoint.
>>> Please discuss with mathieu, and merge his tracepoint.
>> Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
>> is enough. However, from the same viewpoint, it should pass irq-number
>> to irq-exit event too, because we may lost some previous events by buffer-overflow
>> etc.
>>
>>          trace_irq_entry(irq, NULL);
>>          ret = _handle_IRQ_event(irq, action);
>>          trace_irq_exit(irq, ret);
>>                         ^^^^
>>
> 
> I seriously doubt we should consider a trace with missing events as
> "reliable". If your only argument is that when the buffers are not large
> enough we could lose events, then I think we should just hint people at
> doing the right thing, which is to tweak the tracer parameters (e.g.
> larger buffers) so they stop losing events.

Hi Mathieu,

I think it depends on what events we'd like to trace on what environment
(not have so much memories).
Someone(like me) may want "reliability" for those events, someone not.

> A trace with events lost is really a scenario close to a corrupted
> trace because we don't know which event has been lost, nor where. I
> don't think we should increase the event size to support that kind of
> broken scenario.

Why LTTng doesn't filter those unused arguments inside their handlers?
Is there no room for compromise on sharing tracepoints with other
tracers? IMHO, tracepoints in the kernel should be a common
infrastructure for many users.

Thank you,

> 
> Mathieu
> 
>> Thank you,
>>
>> -- 
>> Masami Hiramatsu
>>
>> Software Engineer
>> Hitachi Computer Products (America) Inc.
>> Software Solutions Division
>>
>> e-mail: mhiramat@redhat.com
>>
> 

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 16:57                 ` Jason Baron
@ 2009-02-25 17:34                   ` Mathieu Desnoyers
  2009-02-25 18:05                     ` Steven Rostedt
                                       ` (2 more replies)
  0 siblings, 3 replies; 48+ messages in thread
From: Mathieu Desnoyers @ 2009-02-25 17:34 UTC (permalink / raw)
  To: Jason Baron
  Cc: Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra,
	Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec

* Jason Baron (jbaron@redhat.com) wrote:
> On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> > KOSAKI Motohiro wrote:
> > >>  /**
> > >>   * handle_IRQ_event - irq action chain handler
> > >>   * @irq:	the interrupt number
> > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> > >>  		local_irq_enable_in_hardirq();
> > >>  
> > >>  	do {
> > >> +		trace_irq_entry(irq);
> > >>  		ret = action->handler(irq, action->dev_id);
> > >> +		trace_irq_exit(irq, ret);
> > >>  		if (ret == IRQ_HANDLED)
> > >>  			status |= action->flags;
> > >>  		retval |= ret;
> > > 
> > > Nobdy want unnecessary redundant tracepoint.
> > > Please discuss with mathieu, and merge his tracepoint.
> > 
> > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> > is enough. However, from the same viewpoint, it should pass irq-number
> > to irq-exit event too, because we may lost some previous events by buffer-overflow
> > etc.
> > 
> >          trace_irq_entry(irq, NULL);
> >          ret = _handle_IRQ_event(irq, action);
> >          trace_irq_exit(irq, ret);
> >                         ^^^^
> > 
> 
> the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> different places. So the above suggested irq tracepoint provides the
> same information with 4 less tracepoints in the code. So I believe its
> simpler - plus we can understand which action handlers are handling the
> interrupt.
> 

The main thing I dislike about only tracing action->handler() calls is
that you are not tracing an IRQ per se, but rather the invocation of a
given handler within the interrupt. For instance, it would be difficult
to calculate the maximum interrupt latency for a given interrupt line,
because you don't have the "real" irq entry/exit events, just the
individual handler() calls.

But I agree that knowing which handler is called is important.

How about this compromise :

trace_irq_entry(irq, action)
  _handle_IRQ_event()
    for each action  {
      trace_irq_handler(action, ret);
      ret = action->handler(irq, action->dev_id);
      ...
    }
trace_irq_exit(action_ret);

Would that give you the information you need ?

Here trace_irq_handler would be passed the _current_ action invoked and
the _previous_ action return value. Note that we should initialize
irqreturn_t ret to some initial value if we do this. That should keep
the tracing overhead minimal.

Mathieu

> thanks,
> 
> -Jason
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 17:34                   ` Mathieu Desnoyers
@ 2009-02-25 18:05                     ` Steven Rostedt
  2009-02-25 22:12                       ` Mathieu Desnoyers
  2009-02-26 15:11                     ` Jason Baron
  2009-02-27  3:33                     ` KOSAKI Motohiro
  2 siblings, 1 reply; 48+ messages in thread
From: Steven Rostedt @ 2009-02-25 18:05 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra,
	Frank Ch. Eigler, mingo, linux-kernel, acme, fweisbec



On Wed, 25 Feb 2009, Mathieu Desnoyers wrote:

> * Jason Baron (jbaron@redhat.com) wrote:
> > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> > > KOSAKI Motohiro wrote:
> > > >>  /**
> > > >>   * handle_IRQ_event - irq action chain handler
> > > >>   * @irq:	the interrupt number
> > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> > > >>  		local_irq_enable_in_hardirq();
> > > >>  
> > > >>  	do {
> > > >> +		trace_irq_entry(irq);
> > > >>  		ret = action->handler(irq, action->dev_id);
> > > >> +		trace_irq_exit(irq, ret);
> > > >>  		if (ret == IRQ_HANDLED)
> > > >>  			status |= action->flags;
> > > >>  		retval |= ret;
> > > > 
> > > > Nobdy want unnecessary redundant tracepoint.
> > > > Please discuss with mathieu, and merge his tracepoint.
> > > 
> > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> > > is enough. However, from the same viewpoint, it should pass irq-number
> > > to irq-exit event too, because we may lost some previous events by buffer-overflow
> > > etc.
> > > 
> > >          trace_irq_entry(irq, NULL);
> > >          ret = _handle_IRQ_event(irq, action);
> > >          trace_irq_exit(irq, ret);
> > >                         ^^^^
> > > 
> > 
> > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> > different places. So the above suggested irq tracepoint provides the
> > same information with 4 less tracepoints in the code. So I believe its
> > simpler - plus we can understand which action handlers are handling the
> > interrupt.
> > 
> 
> The main thing I dislike about only tracing action->handler() calls is
> that you are not tracing an IRQ per se, but rather the invocation of a
> given handler within the interrupt. For instance, it would be difficult
> to calculate the maximum interrupt latency for a given interrupt line,
> because you don't have the "real" irq entry/exit events, just the
> individual handler() calls.

Then use the function_graph tracer.

 # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter 
 # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter
 # echo function_graph > /debug/tracing/current_tracer 
 # cat /debug/tracing/trace

# tracer: function_graph
#
# CPU  DURATION                  FUNCTION CALLS
# |     |   |                     |   |   |   |
 ------------------------------------------
 1)  kblockd-4012  =>    <idle>-0   
 ------------------------------------------

 1)   ==========> |
 1) + 35.783 us   |  smp_apic_timer_interrupt();
 ------------------------------------------
 2)  kstop/2-4013  =>    <idle>-0   
 ------------------------------------------

 2)   ==========> |
 2) + 13.819 us   |  smp_apic_timer_interrupt();
 ------------------------------------------
 3)  kstop/3-4014  =>    <idle>-0   
 ------------------------------------------

 3)   ==========> |
 3) + 24.919 us   |  __irqentry_text_start();
 ------------------------------------------
 0)  kstop/0-4011  =>    <idle>-0   
 ------------------------------------------

 0)   ==========> |
 0) + 24.444 us   |  smp_apic_timer_interrupt();
 ------------------------------------------
 1)    <idle>-0    =>   bash-3899   
 ------------------------------------------

 1)   ==========> |
 1) + 13.642 us   |  smp_apic_timer_interrupt();
 2)   ==========> |
 2) + 15.014 us   |  smp_apic_timer_interrupt();
 3)   ==========> |
 3) + 31.004 us   |  smp_apic_timer_interrupt();
 ------------------------------------------
 0)    <idle>-0    =>   sshd-3892   
 ------------------------------------------

 0)   ==========> |
 0) + 15.655 us   |  __irqentry_text_start();
 0)   ==========> |
 0) + 37.722 us   |  __irqentry_text_start();
 ------------------------------------------
 0)   sshd-3892    =>    <idle>-0   
 ------------------------------------------

 0)   ==========> |
 0) + 26.139 us   |  __irqentry_text_start();
 3)   ==========> |
 3) + 27.240 us   |  __irqentry_text_start();
 3)   ==========> |
 3) + 21.987 us   |  __irqentry_text_start();


And there you have your latencies ;-)

With Jasons added trace points, we could add the to the event tracer
and those would show up as comments here.

-- Steve



> 
> But I agree that knowing which handler is called is important.
> 
> How about this compromise :
> 
> trace_irq_entry(irq, action)
>   _handle_IRQ_event()
>     for each action  {
>       trace_irq_handler(action, ret);
>       ret = action->handler(irq, action->dev_id);
>       ...
>     }
> trace_irq_exit(action_ret);
> 
> Would that give you the information you need ?
> 
> Here trace_irq_handler would be passed the _current_ action invoked and
> the _previous_ action return value. Note that we should initialize
> irqreturn_t ret to some initial value if we do this. That should keep
> the tracing overhead minimal.
> 


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 18:05                     ` Steven Rostedt
@ 2009-02-25 22:12                       ` Mathieu Desnoyers
  2009-02-25 22:20                         ` Frederic Weisbecker
  2009-02-25 22:21                         ` Steven Rostedt
  0 siblings, 2 replies; 48+ messages in thread
From: Mathieu Desnoyers @ 2009-02-25 22:12 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra,
	Frank Ch. Eigler, mingo, linux-kernel, acme, fweisbec

* Steven Rostedt (rostedt@goodmis.org) wrote:
> 
> 
> On Wed, 25 Feb 2009, Mathieu Desnoyers wrote:
> 
> > * Jason Baron (jbaron@redhat.com) wrote:
> > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> > > > KOSAKI Motohiro wrote:
> > > > >>  /**
> > > > >>   * handle_IRQ_event - irq action chain handler
> > > > >>   * @irq:	the interrupt number
> > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> > > > >>  		local_irq_enable_in_hardirq();
> > > > >>  
> > > > >>  	do {
> > > > >> +		trace_irq_entry(irq);
> > > > >>  		ret = action->handler(irq, action->dev_id);
> > > > >> +		trace_irq_exit(irq, ret);
> > > > >>  		if (ret == IRQ_HANDLED)
> > > > >>  			status |= action->flags;
> > > > >>  		retval |= ret;
> > > > > 
> > > > > Nobdy want unnecessary redundant tracepoint.
> > > > > Please discuss with mathieu, and merge his tracepoint.
> > > > 
> > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> > > > is enough. However, from the same viewpoint, it should pass irq-number
> > > > to irq-exit event too, because we may lost some previous events by buffer-overflow
> > > > etc.
> > > > 
> > > >          trace_irq_entry(irq, NULL);
> > > >          ret = _handle_IRQ_event(irq, action);
> > > >          trace_irq_exit(irq, ret);
> > > >                         ^^^^
> > > > 
> > > 
> > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> > > different places. So the above suggested irq tracepoint provides the
> > > same information with 4 less tracepoints in the code. So I believe its
> > > simpler - plus we can understand which action handlers are handling the
> > > interrupt.
> > > 
> > 
> > The main thing I dislike about only tracing action->handler() calls is
> > that you are not tracing an IRQ per se, but rather the invocation of a
> > given handler within the interrupt. For instance, it would be difficult
> > to calculate the maximum interrupt latency for a given interrupt line,
> > because you don't have the "real" irq entry/exit events, just the
> > individual handler() calls.
> 
> Then use the function_graph tracer.
> 

Sadly, the function tracer cannot be enabled on production systems.
Therefore it's not a usable solution in the context where I need this.

Mathieu

>  # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter 
>  # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter
>  # echo function_graph > /debug/tracing/current_tracer 
>  # cat /debug/tracing/trace
> 
> # tracer: function_graph
> #
> # CPU  DURATION                  FUNCTION CALLS
> # |     |   |                     |   |   |   |
>  ------------------------------------------
>  1)  kblockd-4012  =>    <idle>-0   
>  ------------------------------------------
> 
>  1)   ==========> |
>  1) + 35.783 us   |  smp_apic_timer_interrupt();
>  ------------------------------------------
>  2)  kstop/2-4013  =>    <idle>-0   
>  ------------------------------------------
> 
>  2)   ==========> |
>  2) + 13.819 us   |  smp_apic_timer_interrupt();
>  ------------------------------------------
>  3)  kstop/3-4014  =>    <idle>-0   
>  ------------------------------------------
> 
>  3)   ==========> |
>  3) + 24.919 us   |  __irqentry_text_start();
>  ------------------------------------------
>  0)  kstop/0-4011  =>    <idle>-0   
>  ------------------------------------------
> 
>  0)   ==========> |
>  0) + 24.444 us   |  smp_apic_timer_interrupt();
>  ------------------------------------------
>  1)    <idle>-0    =>   bash-3899   
>  ------------------------------------------
> 
>  1)   ==========> |
>  1) + 13.642 us   |  smp_apic_timer_interrupt();
>  2)   ==========> |
>  2) + 15.014 us   |  smp_apic_timer_interrupt();
>  3)   ==========> |
>  3) + 31.004 us   |  smp_apic_timer_interrupt();
>  ------------------------------------------
>  0)    <idle>-0    =>   sshd-3892   
>  ------------------------------------------
> 
>  0)   ==========> |
>  0) + 15.655 us   |  __irqentry_text_start();
>  0)   ==========> |
>  0) + 37.722 us   |  __irqentry_text_start();
>  ------------------------------------------
>  0)   sshd-3892    =>    <idle>-0   
>  ------------------------------------------
> 
>  0)   ==========> |
>  0) + 26.139 us   |  __irqentry_text_start();
>  3)   ==========> |
>  3) + 27.240 us   |  __irqentry_text_start();
>  3)   ==========> |
>  3) + 21.987 us   |  __irqentry_text_start();
> 
> 
> And there you have your latencies ;-)
> 
> With Jasons added trace points, we could add the to the event tracer
> and those would show up as comments here.
> 
> -- Steve
> 
> 
> 
> > 
> > But I agree that knowing which handler is called is important.
> > 
> > How about this compromise :
> > 
> > trace_irq_entry(irq, action)
> >   _handle_IRQ_event()
> >     for each action  {
> >       trace_irq_handler(action, ret);
> >       ret = action->handler(irq, action->dev_id);
> >       ...
> >     }
> > trace_irq_exit(action_ret);
> > 
> > Would that give you the information you need ?
> > 
> > Here trace_irq_handler would be passed the _current_ action invoked and
> > the _previous_ action return value. Note that we should initialize
> > irqreturn_t ret to some initial value if we do this. That should keep
> > the tracing overhead minimal.
> > 
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 22:12                       ` Mathieu Desnoyers
@ 2009-02-25 22:20                         ` Frederic Weisbecker
  2009-02-25 23:13                           ` Mathieu Desnoyers
  2009-02-25 22:21                         ` Steven Rostedt
  1 sibling, 1 reply; 48+ messages in thread
From: Frederic Weisbecker @ 2009-02-25 22:20 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Steven Rostedt, Jason Baron, Masami Hiramatsu, KOSAKI Motohiro,
	Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme

On Wed, Feb 25, 2009 at 05:12:10PM -0500, Mathieu Desnoyers wrote:
> * Steven Rostedt (rostedt@goodmis.org) wrote:
> > 
> > 
> > On Wed, 25 Feb 2009, Mathieu Desnoyers wrote:
> > 
> > > * Jason Baron (jbaron@redhat.com) wrote:
> > > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> > > > > KOSAKI Motohiro wrote:
> > > > > >>  /**
> > > > > >>   * handle_IRQ_event - irq action chain handler
> > > > > >>   * @irq:	the interrupt number
> > > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> > > > > >>  		local_irq_enable_in_hardirq();
> > > > > >>  
> > > > > >>  	do {
> > > > > >> +		trace_irq_entry(irq);
> > > > > >>  		ret = action->handler(irq, action->dev_id);
> > > > > >> +		trace_irq_exit(irq, ret);
> > > > > >>  		if (ret == IRQ_HANDLED)
> > > > > >>  			status |= action->flags;
> > > > > >>  		retval |= ret;
> > > > > > 
> > > > > > Nobdy want unnecessary redundant tracepoint.
> > > > > > Please discuss with mathieu, and merge his tracepoint.
> > > > > 
> > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> > > > > is enough. However, from the same viewpoint, it should pass irq-number
> > > > > to irq-exit event too, because we may lost some previous events by buffer-overflow
> > > > > etc.
> > > > > 
> > > > >          trace_irq_entry(irq, NULL);
> > > > >          ret = _handle_IRQ_event(irq, action);
> > > > >          trace_irq_exit(irq, ret);
> > > > >                         ^^^^
> > > > > 
> > > > 
> > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> > > > different places. So the above suggested irq tracepoint provides the
> > > > same information with 4 less tracepoints in the code. So I believe its
> > > > simpler - plus we can understand which action handlers are handling the
> > > > interrupt.
> > > > 
> > > 
> > > The main thing I dislike about only tracing action->handler() calls is
> > > that you are not tracing an IRQ per se, but rather the invocation of a
> > > given handler within the interrupt. For instance, it would be difficult
> > > to calculate the maximum interrupt latency for a given interrupt line,
> > > because you don't have the "real" irq entry/exit events, just the
> > > individual handler() calls.
> > 
> > Then use the function_graph tracer.
> > 
> 
> Sadly, the function tracer cannot be enabled on production systems.
> Therefore it's not a usable solution in the context where I need this.
> 
> Mathieu


Why?
If this is about the interrupt latency caused by stop_machine, I still don't understand.
This latency happens before the tracing (function filter which use patching), not during
tracing.

 
> >  # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter 
> >  # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter
> >  # echo function_graph > /debug/tracing/current_tracer 
> >  # cat /debug/tracing/trace
> > 
> > # tracer: function_graph
> > #
> > # CPU  DURATION                  FUNCTION CALLS
> > # |     |   |                     |   |   |   |
> >  ------------------------------------------
> >  1)  kblockd-4012  =>    <idle>-0   
> >  ------------------------------------------
> > 
> >  1)   ==========> |
> >  1) + 35.783 us   |  smp_apic_timer_interrupt();
> >  ------------------------------------------
> >  2)  kstop/2-4013  =>    <idle>-0   
> >  ------------------------------------------
> > 
> >  2)   ==========> |
> >  2) + 13.819 us   |  smp_apic_timer_interrupt();
> >  ------------------------------------------
> >  3)  kstop/3-4014  =>    <idle>-0   
> >  ------------------------------------------
> > 
> >  3)   ==========> |
> >  3) + 24.919 us   |  __irqentry_text_start();
> >  ------------------------------------------
> >  0)  kstop/0-4011  =>    <idle>-0   
> >  ------------------------------------------
> > 
> >  0)   ==========> |
> >  0) + 24.444 us   |  smp_apic_timer_interrupt();
> >  ------------------------------------------
> >  1)    <idle>-0    =>   bash-3899   
> >  ------------------------------------------
> > 
> >  1)   ==========> |
> >  1) + 13.642 us   |  smp_apic_timer_interrupt();
> >  2)   ==========> |
> >  2) + 15.014 us   |  smp_apic_timer_interrupt();
> >  3)   ==========> |
> >  3) + 31.004 us   |  smp_apic_timer_interrupt();
> >  ------------------------------------------
> >  0)    <idle>-0    =>   sshd-3892   
> >  ------------------------------------------
> > 
> >  0)   ==========> |
> >  0) + 15.655 us   |  __irqentry_text_start();
> >  0)   ==========> |
> >  0) + 37.722 us   |  __irqentry_text_start();
> >  ------------------------------------------
> >  0)   sshd-3892    =>    <idle>-0   
> >  ------------------------------------------
> > 
> >  0)   ==========> |
> >  0) + 26.139 us   |  __irqentry_text_start();
> >  3)   ==========> |
> >  3) + 27.240 us   |  __irqentry_text_start();
> >  3)   ==========> |
> >  3) + 21.987 us   |  __irqentry_text_start();
> > 
> > 
> > And there you have your latencies ;-)
> > 
> > With Jasons added trace points, we could add the to the event tracer
> > and those would show up as comments here.
> > 
> > -- Steve
> > 
> > 
> > 
> > > 
> > > But I agree that knowing which handler is called is important.
> > > 
> > > How about this compromise :
> > > 
> > > trace_irq_entry(irq, action)
> > >   _handle_IRQ_event()
> > >     for each action  {
> > >       trace_irq_handler(action, ret);
> > >       ret = action->handler(irq, action->dev_id);
> > >       ...
> > >     }
> > > trace_irq_exit(action_ret);
> > > 
> > > Would that give you the information you need ?
> > > 
> > > Here trace_irq_handler would be passed the _current_ action invoked and
> > > the _previous_ action return value. Note that we should initialize
> > > irqreturn_t ret to some initial value if we do this. That should keep
> > > the tracing overhead minimal.
> > > 
> > 
> 
> -- 
> Mathieu Desnoyers
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 22:12                       ` Mathieu Desnoyers
  2009-02-25 22:20                         ` Frederic Weisbecker
@ 2009-02-25 22:21                         ` Steven Rostedt
  1 sibling, 0 replies; 48+ messages in thread
From: Steven Rostedt @ 2009-02-25 22:21 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra,
	Frank Ch. Eigler, mingo, linux-kernel, acme, fweisbec



On Wed, 25 Feb 2009, Mathieu Desnoyers wrote:
> > > The main thing I dislike about only tracing action->handler() calls is
> > > that you are not tracing an IRQ per se, but rather the invocation of a
> > > given handler within the interrupt. For instance, it would be difficult
> > > to calculate the maximum interrupt latency for a given interrupt line,
> > > because you don't have the "real" irq entry/exit events, just the
> > > individual handler() calls.
> > 
> > Then use the function_graph tracer.
> > 
> 
> Sadly, the function tracer cannot be enabled on production systems.
> Therefore it's not a usable solution in the context where I need this.

And why not? It has zero overhead when disabled.

-- Steve

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 22:20                         ` Frederic Weisbecker
@ 2009-02-25 23:13                           ` Mathieu Desnoyers
  2009-02-26  1:41                             ` Steven Rostedt
  0 siblings, 1 reply; 48+ messages in thread
From: Mathieu Desnoyers @ 2009-02-25 23:13 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Steven Rostedt, Jason Baron, Masami Hiramatsu, KOSAKI Motohiro,
	Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme,
	Dominique Toupin

* Frederic Weisbecker (fweisbec@gmail.com) wrote:
> On Wed, Feb 25, 2009 at 05:12:10PM -0500, Mathieu Desnoyers wrote:
> > * Steven Rostedt (rostedt@goodmis.org) wrote:
> > > 
> > > 
> > > On Wed, 25 Feb 2009, Mathieu Desnoyers wrote:
> > > 
> > > > * Jason Baron (jbaron@redhat.com) wrote:
> > > > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> > > > > > KOSAKI Motohiro wrote:
> > > > > > >>  /**
> > > > > > >>   * handle_IRQ_event - irq action chain handler
> > > > > > >>   * @irq:	the interrupt number
> > > > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> > > > > > >>  		local_irq_enable_in_hardirq();
> > > > > > >>  
> > > > > > >>  	do {
> > > > > > >> +		trace_irq_entry(irq);
> > > > > > >>  		ret = action->handler(irq, action->dev_id);
> > > > > > >> +		trace_irq_exit(irq, ret);
> > > > > > >>  		if (ret == IRQ_HANDLED)
> > > > > > >>  			status |= action->flags;
> > > > > > >>  		retval |= ret;
> > > > > > > 
> > > > > > > Nobdy want unnecessary redundant tracepoint.
> > > > > > > Please discuss with mathieu, and merge his tracepoint.
> > > > > > 
> > > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> > > > > > is enough. However, from the same viewpoint, it should pass irq-number
> > > > > > to irq-exit event too, because we may lost some previous events by buffer-overflow
> > > > > > etc.
> > > > > > 
> > > > > >          trace_irq_entry(irq, NULL);
> > > > > >          ret = _handle_IRQ_event(irq, action);
> > > > > >          trace_irq_exit(irq, ret);
> > > > > >                         ^^^^
> > > > > > 
> > > > > 
> > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> > > > > different places. So the above suggested irq tracepoint provides the
> > > > > same information with 4 less tracepoints in the code. So I believe its
> > > > > simpler - plus we can understand which action handlers are handling the
> > > > > interrupt.
> > > > > 
> > > > 
> > > > The main thing I dislike about only tracing action->handler() calls is
> > > > that you are not tracing an IRQ per se, but rather the invocation of a
> > > > given handler within the interrupt. For instance, it would be difficult
> > > > to calculate the maximum interrupt latency for a given interrupt line,
> > > > because you don't have the "real" irq entry/exit events, just the
> > > > individual handler() calls.
> > > 
> > > Then use the function_graph tracer.
> > > 
> > 
> > Sadly, the function tracer cannot be enabled on production systems.
> > Therefore it's not a usable solution in the context where I need this.
> > 
> > Mathieu
> 
> 
> Why?
> If this is about the interrupt latency caused by stop_machine, I still don't understand.
> This latency happens before the tracing (function filter which use patching), not during
> tracing.
> 

Given this scenario :

A telecommunication system runs, but the client notices something wrong.
They call their service provider. The provider enables tracing
_remotely_ on the _production system_ while it's _active in the field_.

Bam, those few milliseconds interrupt latencies become unacceptable.

Hopefully this scenario makes the use-case clearer. The problem is not
that interrupt latencies would occur while tracing is on, but rather
that it would happen on a running production system when switching
tracing on. This is what is totally unacceptable for this use-case.

For more details about such requirements, I'm CCing Dominique Toupin
from Ericsson who I'm sure would be happy to give more details about
this if needed.

Mathieu




>  
> > >  # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter 
> > >  # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter
> > >  # echo function_graph > /debug/tracing/current_tracer 
> > >  # cat /debug/tracing/trace
> > > 
> > > # tracer: function_graph
> > > #
> > > # CPU  DURATION                  FUNCTION CALLS
> > > # |     |   |                     |   |   |   |
> > >  ------------------------------------------
> > >  1)  kblockd-4012  =>    <idle>-0   
> > >  ------------------------------------------
> > > 
> > >  1)   ==========> |
> > >  1) + 35.783 us   |  smp_apic_timer_interrupt();
> > >  ------------------------------------------
> > >  2)  kstop/2-4013  =>    <idle>-0   
> > >  ------------------------------------------
> > > 
> > >  2)   ==========> |
> > >  2) + 13.819 us   |  smp_apic_timer_interrupt();
> > >  ------------------------------------------
> > >  3)  kstop/3-4014  =>    <idle>-0   
> > >  ------------------------------------------
> > > 
> > >  3)   ==========> |
> > >  3) + 24.919 us   |  __irqentry_text_start();
> > >  ------------------------------------------
> > >  0)  kstop/0-4011  =>    <idle>-0   
> > >  ------------------------------------------
> > > 
> > >  0)   ==========> |
> > >  0) + 24.444 us   |  smp_apic_timer_interrupt();
> > >  ------------------------------------------
> > >  1)    <idle>-0    =>   bash-3899   
> > >  ------------------------------------------
> > > 
> > >  1)   ==========> |
> > >  1) + 13.642 us   |  smp_apic_timer_interrupt();
> > >  2)   ==========> |
> > >  2) + 15.014 us   |  smp_apic_timer_interrupt();
> > >  3)   ==========> |
> > >  3) + 31.004 us   |  smp_apic_timer_interrupt();
> > >  ------------------------------------------
> > >  0)    <idle>-0    =>   sshd-3892   
> > >  ------------------------------------------
> > > 
> > >  0)   ==========> |
> > >  0) + 15.655 us   |  __irqentry_text_start();
> > >  0)   ==========> |
> > >  0) + 37.722 us   |  __irqentry_text_start();
> > >  ------------------------------------------
> > >  0)   sshd-3892    =>    <idle>-0   
> > >  ------------------------------------------
> > > 
> > >  0)   ==========> |
> > >  0) + 26.139 us   |  __irqentry_text_start();
> > >  3)   ==========> |
> > >  3) + 27.240 us   |  __irqentry_text_start();
> > >  3)   ==========> |
> > >  3) + 21.987 us   |  __irqentry_text_start();
> > > 
> > > 
> > > And there you have your latencies ;-)
> > > 
> > > With Jasons added trace points, we could add the to the event tracer
> > > and those would show up as comments here.
> > > 
> > > -- Steve
> > > 
> > > 
> > > 
> > > > 
> > > > But I agree that knowing which handler is called is important.
> > > > 
> > > > How about this compromise :
> > > > 
> > > > trace_irq_entry(irq, action)
> > > >   _handle_IRQ_event()
> > > >     for each action  {
> > > >       trace_irq_handler(action, ret);
> > > >       ret = action->handler(irq, action->dev_id);
> > > >       ...
> > > >     }
> > > > trace_irq_exit(action_ret);
> > > > 
> > > > Would that give you the information you need ?
> > > > 
> > > > Here trace_irq_handler would be passed the _current_ action invoked and
> > > > the _previous_ action return value. Note that we should initialize
> > > > irqreturn_t ret to some initial value if we do this. That should keep
> > > > the tracing overhead minimal.
> > > > 
> > > 
> > 
> > -- 
> > Mathieu Desnoyers
> > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 23:13                           ` Mathieu Desnoyers
@ 2009-02-26  1:41                             ` Steven Rostedt
  2009-02-26 12:37                               ` Dominique Toupin
  0 siblings, 1 reply; 48+ messages in thread
From: Steven Rostedt @ 2009-02-26  1:41 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Frederic Weisbecker, Jason Baron, Masami Hiramatsu,
	KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo,
	linux-kernel, acme, Dominique Toupin


On Wed, 25 Feb 2009, Mathieu Desnoyers wrote:
> > > > 
> > > > Then use the function_graph tracer.
> > > > 
> > > 
> > > Sadly, the function tracer cannot be enabled on production systems.
> > > Therefore it's not a usable solution in the context where I need this.
> > > 
> > > Mathieu
> > 
> > 
> > Why?
> > If this is about the interrupt latency caused by stop_machine, I still don't understand.
> > This latency happens before the tracing (function filter which use patching), not during
> > tracing.
> > 
> 
> Given this scenario :
> 
> A telecommunication system runs, but the client notices something wrong.
> They call their service provider. The provider enables tracing
> _remotely_ on the _production system_ while it's _active in the field_.
> 
> Bam, those few milliseconds interrupt latencies become unacceptable.
> 
> Hopefully this scenario makes the use-case clearer. The problem is not
> that interrupt latencies would occur while tracing is on, but rather
> that it would happen on a running production system when switching
> tracing on. This is what is totally unacceptable for this use-case.
> 
> For more details about such requirements, I'm CCing Dominique Toupin
> from Ericsson who I'm sure would be happy to give more details about
> this if needed.

Hmm, so this system in the field is running Linux with the Real-Time 
Patch?  Because if it isn't it will suffer from millisecond latencies in 
normal operation.

-- Steve


^ permalink raw reply	[flat|nested] 48+ messages in thread

* RE: [PATCH] new irq tracer
  2009-02-26  1:41                             ` Steven Rostedt
@ 2009-02-26 12:37                               ` Dominique Toupin
  2009-02-27  3:14                                 ` KOSAKI Motohiro
  0 siblings, 1 reply; 48+ messages in thread
From: Dominique Toupin @ 2009-02-26 12:37 UTC (permalink / raw)
  To: Steven Rostedt, Mathieu Desnoyers
  Cc: Frederic Weisbecker, Jason Baron, Masami Hiramatsu,
	KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo,
	linux-kernel, acme


In many cases we don't use Linux real-time, we have many systems that
are soft-real-time an non real-time Linux is good enough.
 

> -----Original Message-----
> From: Steven Rostedt [mailto:rostedt@goodmis.org] 
> Sent: 25-Feb-09 20:42
> To: Mathieu Desnoyers
> Cc: Frederic Weisbecker; Jason Baron; Masami Hiramatsu; 
> KOSAKI Motohiro; Peter Zijlstra; Frank Ch. Eigler; 
> mingo@elte.hu; linux-kernel@vger.kernel.org; 
> acme@ghostprotocols.net; Dominique Toupin
> Subject: Re: [PATCH] new irq tracer
> 
> 
> On Wed, 25 Feb 2009, Mathieu Desnoyers wrote:
> > > > > 
> > > > > Then use the function_graph tracer.
> > > > > 
> > > > 
> > > > Sadly, the function tracer cannot be enabled on 
> production systems.
> > > > Therefore it's not a usable solution in the context 
> where I need this.
> > > > 
> > > > Mathieu
> > > 
> > > 
> > > Why?
> > > If this is about the interrupt latency caused by 
> stop_machine, I still don't understand.
> > > This latency happens before the tracing (function filter 
> which use 
> > > patching), not during tracing.
> > > 
> > 
> > Given this scenario :
> > 
> > A telecommunication system runs, but the client notices 
> something wrong.
> > They call their service provider. The provider enables tracing 
> > _remotely_ on the _production system_ while it's _active in 
> the field_.
> > 
> > Bam, those few milliseconds interrupt latencies become unacceptable.
> > 
> > Hopefully this scenario makes the use-case clearer. The 
> problem is not 
> > that interrupt latencies would occur while tracing is on, 
> but rather 
> > that it would happen on a running production system when switching 
> > tracing on. This is what is totally unacceptable for this use-case.
> > 
> > For more details about such requirements, I'm CCing 
> Dominique Toupin 
> > from Ericsson who I'm sure would be happy to give more 
> details about 
> > this if needed.
> 
> Hmm, so this system in the field is running Linux with the 
> Real-Time Patch?  Because if it isn't it will suffer from 
> millisecond latencies in normal operation.
> 
> -- Steve
> 
> 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 17:34                   ` Mathieu Desnoyers
  2009-02-25 18:05                     ` Steven Rostedt
@ 2009-02-26 15:11                     ` Jason Baron
  2009-02-26 15:32                       ` Steven Rostedt
                                         ` (3 more replies)
  2009-02-27  3:33                     ` KOSAKI Motohiro
  2 siblings, 4 replies; 48+ messages in thread
From: Jason Baron @ 2009-02-26 15:11 UTC (permalink / raw)
  To: Mathieu Desnoyers, mingo
  Cc: Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra,
	Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec

On Wed, Feb 25, 2009 at 12:34:12PM -0500, Mathieu Desnoyers wrote:
> * Jason Baron (jbaron@redhat.com) wrote:
> > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> > > KOSAKI Motohiro wrote:
> > > >>  /**
> > > >>   * handle_IRQ_event - irq action chain handler
> > > >>   * @irq:	the interrupt number
> > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> > > >>  		local_irq_enable_in_hardirq();
> > > >>  
> > > >>  	do {
> > > >> +		trace_irq_entry(irq);
> > > >>  		ret = action->handler(irq, action->dev_id);
> > > >> +		trace_irq_exit(irq, ret);
> > > >>  		if (ret == IRQ_HANDLED)
> > > >>  			status |= action->flags;
> > > >>  		retval |= ret;
> > > > 
> > > > Nobdy want unnecessary redundant tracepoint.
> > > > Please discuss with mathieu, and merge his tracepoint.
> > > 
> > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> > > is enough. However, from the same viewpoint, it should pass irq-number
> > > to irq-exit event too, because we may lost some previous events by buffer-overflow
> > > etc.
> > > 
> > >          trace_irq_entry(irq, NULL);
> > >          ret = _handle_IRQ_event(irq, action);
> > >          trace_irq_exit(irq, ret);
> > >                         ^^^^
> > > 
> > 
> > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> > different places. So the above suggested irq tracepoint provides the
> > same information with 4 less tracepoints in the code. So I believe its
> > simpler - plus we can understand which action handlers are handling the
> > interrupt.
> > 
> 
> The main thing I dislike about only tracing action->handler() calls is
> that you are not tracing an IRQ per se, but rather the invocation of a
> given handler within the interrupt. For instance, it would be difficult
> to calculate the maximum interrupt latency for a given interrupt line,
> because you don't have the "real" irq entry/exit events, just the
> individual handler() calls.
> 
> But I agree that knowing which handler is called is important.
> 
> How about this compromise :
> 
> trace_irq_entry(irq, action)
>   _handle_IRQ_event()
>     for each action  {
>       trace_irq_handler(action, ret);
>       ret = action->handler(irq, action->dev_id);
>       ...
>     }
> trace_irq_exit(action_ret);
> 
> Would that give you the information you need ?
> 
> Here trace_irq_handler would be passed the _current_ action invoked and
> the _previous_ action return value. Note that we should initialize
> irqreturn_t ret to some initial value if we do this. That should keep
> the tracing overhead minimal.
> 

maybe...although that would require re-arranging the 'while' loop in
'handle_IRQ_event' from a do..while loop to a 'while' loop, which will
require an extra branch check, and then we still have to record the last 'ret'
value. I'm not that keen on re-arranging this for trace data...

Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace
as follows using the original two tracepoints (patch below):

 3)               |                      handle_IRQ_event() {
 3)               |                        /* (irq_handler_entry) irq=28 handler=eth0 */
 3)               |                        e1000_intr_msi() {
 3)   2.460 us    |                          __napi_schedule();
 3)   9.416 us    |                        }
 3)               |                        /* (irq_handler_exit) irq=28 handler=eth0 return=handled */
 3) + 22.935 us   |                      }

thanks,

-Jason


Signed-off-by: Jason Baron <jbaron@redhat.com>
---

 include/trace/irq.h             |    9 +++++++++
 include/trace/irq_event_types.h |   17 +++++++++++++++++
 kernel/irq/handle.c             |    6 ++++++
 kernel/trace/events.c           |    2 ++
 4 files changed, 34 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/irq.h
 create mode 100644 include/trace/irq_event_types.h


diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644
index 0000000..ff5d449
--- /dev/null
+++ b/include/trace/irq.h
@@ -0,0 +1,9 @@
+#ifndef _TRACE_IRQ_H
+#define _TRACE_IRQ_H
+
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+#include <trace/irq_event_types.h>
+
+#endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
new file mode 100644
index 0000000..5889bfd
--- /dev/null
+++ b/include/trace/irq_event_types.h
@@ -0,0 +1,17 @@
+
+/* use <trace/irq.h> instead */
+#ifndef DEFINE_TRACE_FMT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+DEFINE_TRACE_FMT(irq_handler_entry,
+	TPPROTO(int irq, struct irqaction *action),
+	TPARGS(irq, action),
+	TPFMT("irq=%d handler=%s", irq, action->name));
+
+DEFINE_TRACE_FMT(irq_handler_exit,
+	TPPROTO(int irq, struct irqaction *action, int ret),
+	TPARGS(irq, action, ret),
+	TPFMT("irq=%d handler=%s return=%s",
+		irq, action->name, ret ? "handled" : "unhandled"));
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fdff380..bcebc2e 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,6 +18,7 @@
 #include <linux/rculist.h>
 #include <linux/hash.h>
 #include <linux/bootmem.h>
+#include <trace/irq.h>
 
 #include "internals.h"
 
@@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
 	return IRQ_NONE;
 }
 
+DEFINE_TRACE(irq_handler_entry);
+DEFINE_TRACE(irq_handler_exit);
+
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
@@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 		local_irq_enable_in_hardirq();
 
 	do {
+		trace_irq_handler_entry(irq, action);
 		ret = action->handler(irq, action->dev_id);
+		trace_irq_handler_exit(irq, action, ret);
 		if (ret == IRQ_HANDLED)
 			status |= action->flags;
 		retval |= ret;
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
index 38c89ee..3c75623 100644
--- a/kernel/trace/events.c
+++ b/kernel/trace/events.c
@@ -6,8 +6,10 @@
 
 /* trace/<type>.h here */
 #include <trace/sched.h>
+#include <trace/irq.h>
 
 #include "trace_events.h"
 
 /* trace/<type>_event_types.h here */
 #include <trace/sched_event_types.h>
+#include <trace/irq_event_types.h>







^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-26 15:11                     ` Jason Baron
@ 2009-02-26 15:32                       ` Steven Rostedt
  2009-02-26 15:35                         ` Ingo Molnar
  2009-02-26 15:40                       ` Peter Zijlstra
                                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 48+ messages in thread
From: Steven Rostedt @ 2009-02-26 15:32 UTC (permalink / raw)
  To: Jason Baron
  Cc: Mathieu Desnoyers, mingo, Masami Hiramatsu, KOSAKI Motohiro,
	Peter Zijlstra, Frank Ch. Eigler, linux-kernel, acme, fweisbec



On Thu, 26 Feb 2009, Jason Baron wrote:
> 
> maybe...although that would require re-arranging the 'while' loop in
> 'handle_IRQ_event' from a do..while loop to a 'while' loop, which will
> require an extra branch check, and then we still have to record the last 'ret'
> value. I'm not that keen on re-arranging this for trace data...
> 
> Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace
> as follows using the original two tracepoints (patch below):
> 
>  3)               |                      handle_IRQ_event() {
>  3)               |                        /* (irq_handler_entry) irq=28 handler=eth0 */
>  3)               |                        e1000_intr_msi() {
>  3)   2.460 us    |                          __napi_schedule();
>  3)   9.416 us    |                        }
>  3)               |                        /* (irq_handler_exit) irq=28 handler=eth0 return=handled */
>  3) + 22.935 us   |                      }
> 
> thanks,
> 
> -Jason

Looks good, one minor note:

> 
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>
> ---
> 
>  include/trace/irq.h             |    9 +++++++++
>  include/trace/irq_event_types.h |   17 +++++++++++++++++
>  kernel/irq/handle.c             |    6 ++++++
>  kernel/trace/events.c           |    2 ++
>  4 files changed, 34 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/irq.h
>  create mode 100644 include/trace/irq_event_types.h
> 
> 
> diff --git a/include/trace/irq.h b/include/trace/irq.h
> new file mode 100644
> index 0000000..ff5d449
> --- /dev/null
> +++ b/include/trace/irq.h
> @@ -0,0 +1,9 @@
> +#ifndef _TRACE_IRQ_H
> +#define _TRACE_IRQ_H
> +
> +#include <linux/interrupt.h>
> +#include <linux/tracepoint.h>
> +
> +#include <trace/irq_event_types.h>
> +
> +#endif
> diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
> new file mode 100644
> index 0000000..5889bfd
> --- /dev/null
> +++ b/include/trace/irq_event_types.h
> @@ -0,0 +1,17 @@
> +
> +/* use <trace/irq.h> instead */
> +#ifndef DEFINE_TRACE_FMT
> +# error Do not include this file directly.
> +# error Unless you know what you are doing.
> +#endif
> +
> +DEFINE_TRACE_FMT(irq_handler_entry,


I've renamed DEFINE_TRACE_FMT to TRACE_FORMAT. I believe that is already 
in Ingo's tree.

Other than that:

Acked-by: Steven Rostedt <srostedt@redhat.com>

Thanks,

-- Steve


> +	TPPROTO(int irq, struct irqaction *action),
> +	TPARGS(irq, action),
> +	TPFMT("irq=%d handler=%s", irq, action->name));
> +
> +DEFINE_TRACE_FMT(irq_handler_exit,
> +	TPPROTO(int irq, struct irqaction *action, int ret),
> +	TPARGS(irq, action, ret),
> +	TPFMT("irq=%d handler=%s return=%s",
> +		irq, action->name, ret ? "handled" : "unhandled"));
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index fdff380..bcebc2e 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -18,6 +18,7 @@
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
>  #include <linux/bootmem.h>
> +#include <trace/irq.h>
>  
>  #include "internals.h"
>  
> @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
>  	return IRQ_NONE;
>  }
>  
> +DEFINE_TRACE(irq_handler_entry);
> +DEFINE_TRACE(irq_handler_exit);
> +
>  /**
>   * handle_IRQ_event - irq action chain handler
>   * @irq:	the interrupt number
> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>  		local_irq_enable_in_hardirq();
>  
>  	do {
> +		trace_irq_handler_entry(irq, action);
>  		ret = action->handler(irq, action->dev_id);
> +		trace_irq_handler_exit(irq, action, ret);
>  		if (ret == IRQ_HANDLED)
>  			status |= action->flags;
>  		retval |= ret;
> diff --git a/kernel/trace/events.c b/kernel/trace/events.c
> index 38c89ee..3c75623 100644
> --- a/kernel/trace/events.c
> +++ b/kernel/trace/events.c
> @@ -6,8 +6,10 @@
>  
>  /* trace/<type>.h here */
>  #include <trace/sched.h>
> +#include <trace/irq.h>
>  
>  #include "trace_events.h"
>  
>  /* trace/<type>_event_types.h here */
>  #include <trace/sched_event_types.h>
> +#include <trace/irq_event_types.h>
> 
> 
> 
> 
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-26 15:32                       ` Steven Rostedt
@ 2009-02-26 15:35                         ` Ingo Molnar
  0 siblings, 0 replies; 48+ messages in thread
From: Ingo Molnar @ 2009-02-26 15:35 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Jason Baron, Mathieu Desnoyers, Masami Hiramatsu,
	KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, linux-kernel,
	acme, fweisbec


* Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 26 Feb 2009, Jason Baron wrote:
> > 
> > maybe...although that would require re-arranging the 'while' loop in
> > 'handle_IRQ_event' from a do..while loop to a 'while' loop, which will
> > require an extra branch check, and then we still have to record the last 'ret'
> > value. I'm not that keen on re-arranging this for trace data...
> > 
> > Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace
> > as follows using the original two tracepoints (patch below):
> > 
> >  3)               |                      handle_IRQ_event() {
> >  3)               |                        /* (irq_handler_entry) irq=28 handler=eth0 */
> >  3)               |                        e1000_intr_msi() {
> >  3)   2.460 us    |                          __napi_schedule();
> >  3)   9.416 us    |                        }
> >  3)               |                        /* (irq_handler_exit) irq=28 handler=eth0 return=handled */
> >  3) + 22.935 us   |                      }
> > 
> > thanks,
> > 
> > -Jason
> 
> Looks good, one minor note:

[...]
> > +DEFINE_TRACE_FMT(irq_handler_entry,
> 
> I've renamed DEFINE_TRACE_FMT to TRACE_FORMAT. I believe that 
> is already in Ingo's tree.
> 
> Other than that:
> 
> Acked-by: Steven Rostedt <srostedt@redhat.com>

i've changed that in the patch and have applied Jason's patch to 
tip:tracing/ftrace. Thanks guys!

	Ingo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-26 15:11                     ` Jason Baron
  2009-02-26 15:32                       ` Steven Rostedt
@ 2009-02-26 15:40                       ` Peter Zijlstra
  2009-02-26 16:20                       ` Frederic Weisbecker
  2009-02-27  3:35                       ` KOSAKI Motohiro
  3 siblings, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-26 15:40 UTC (permalink / raw)
  To: Jason Baron
  Cc: Mathieu Desnoyers, mingo, Masami Hiramatsu, KOSAKI Motohiro,
	Frank Ch. Eigler, rostedt, linux-kernel, acme, fweisbec

On Thu, 2009-02-26 at 10:11 -0500, Jason Baron wrote:
> 
> Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace
> as follows using the original two tracepoints (patch below):
> 
>  3)               |                      handle_IRQ_event() {
>  3)               |                        /* (irq_handler_entry) irq=28 handler=eth0 */
>  3)               |                        e1000_intr_msi() {
>  3)   2.460 us    |                          __napi_schedule();
>  3)   9.416 us    |                        }
>  3)               |                        /* (irq_handler_exit) irq=28 handler=eth0 return=handled */
>  3) + 22.935 us   |                      }
> 
> thanks,
> 
> -Jason
> 
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>

Looks good, thanks Jason!

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-26 15:11                     ` Jason Baron
  2009-02-26 15:32                       ` Steven Rostedt
  2009-02-26 15:40                       ` Peter Zijlstra
@ 2009-02-26 16:20                       ` Frederic Weisbecker
  2009-02-27  3:35                       ` KOSAKI Motohiro
  3 siblings, 0 replies; 48+ messages in thread
From: Frederic Weisbecker @ 2009-02-26 16:20 UTC (permalink / raw)
  To: Jason Baron
  Cc: Mathieu Desnoyers, mingo, Masami Hiramatsu, KOSAKI Motohiro,
	Peter Zijlstra, Frank Ch. Eigler, rostedt, linux-kernel, acme

On Thu, Feb 26, 2009 at 10:11:05AM -0500, Jason Baron wrote:
> On Wed, Feb 25, 2009 at 12:34:12PM -0500, Mathieu Desnoyers wrote:
> > * Jason Baron (jbaron@redhat.com) wrote:
> > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote:
> > > > KOSAKI Motohiro wrote:
> > > > >>  /**
> > > > >>   * handle_IRQ_event - irq action chain handler
> > > > >>   * @irq:	the interrupt number
> > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> > > > >>  		local_irq_enable_in_hardirq();
> > > > >>  
> > > > >>  	do {
> > > > >> +		trace_irq_entry(irq);
> > > > >>  		ret = action->handler(irq, action->dev_id);
> > > > >> +		trace_irq_exit(irq, ret);
> > > > >>  		if (ret == IRQ_HANDLED)
> > > > >>  			status |= action->flags;
> > > > >>  		retval |= ret;
> > > > > 
> > > > > Nobdy want unnecessary redundant tracepoint.
> > > > > Please discuss with mathieu, and merge his tracepoint.
> > > > 
> > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> > > > is enough. However, from the same viewpoint, it should pass irq-number
> > > > to irq-exit event too, because we may lost some previous events by buffer-overflow
> > > > etc.
> > > > 
> > > >          trace_irq_entry(irq, NULL);
> > > >          ret = _handle_IRQ_event(irq, action);
> > > >          trace_irq_exit(irq, ret);
> > > >                         ^^^^
> > > > 
> > > 
> > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> > > different places. So the above suggested irq tracepoint provides the
> > > same information with 4 less tracepoints in the code. So I believe its
> > > simpler - plus we can understand which action handlers are handling the
> > > interrupt.
> > > 
> > 
> > The main thing I dislike about only tracing action->handler() calls is
> > that you are not tracing an IRQ per se, but rather the invocation of a
> > given handler within the interrupt. For instance, it would be difficult
> > to calculate the maximum interrupt latency for a given interrupt line,
> > because you don't have the "real" irq entry/exit events, just the
> > individual handler() calls.
> > 
> > But I agree that knowing which handler is called is important.
> > 
> > How about this compromise :
> > 
> > trace_irq_entry(irq, action)
> >   _handle_IRQ_event()
> >     for each action  {
> >       trace_irq_handler(action, ret);
> >       ret = action->handler(irq, action->dev_id);
> >       ...
> >     }
> > trace_irq_exit(action_ret);
> > 
> > Would that give you the information you need ?
> > 
> > Here trace_irq_handler would be passed the _current_ action invoked and
> > the _previous_ action return value. Note that we should initialize
> > irqreturn_t ret to some initial value if we do this. That should keep
> > the tracing overhead minimal.
> > 
> 
> maybe...although that would require re-arranging the 'while' loop in
> 'handle_IRQ_event' from a do..while loop to a 'while' loop, which will
> require an extra branch check, and then we still have to record the last 'ret'
> value. I'm not that keen on re-arranging this for trace data...
> 
> Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace
> as follows using the original two tracepoints (patch below):
> 
>  3)               |                      handle_IRQ_event() {
>  3)               |                        /* (irq_handler_entry) irq=28 handler=eth0 */
>  3)               |                        e1000_intr_msi() {
>  3)   2.460 us    |                          __napi_schedule();
>  3)   9.416 us    |                        }
>  3)               |                        /* (irq_handler_exit) irq=28 handler=eth0 return=handled */
>  3) + 22.935 us   |                      }
> 
> thanks,
> 
> -Jason
> 


I'm impressed by this new TRACE_FMT system.
It means that I will just need to toggle a value on a /debug/trace/events/irq_stuff/enable
to have the useful informations as comments inside a trace, or in a whole dedicated traces.

I've played with it a part of the night to test the bprintk patch, this is awesome!


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 17:19                   ` Masami Hiramatsu
@ 2009-02-27  3:08                     ` KOSAKI Motohiro
  0 siblings, 0 replies; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-27  3:08 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kosaki.motohiro, Mathieu Desnoyers, Jason Baron, Peter Zijlstra,
	Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec

> >>>>  /**
> >>>>   * handle_IRQ_event - irq action chain handler
> >>>>   * @irq:	the interrupt number
> >>>> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
> >>>>  		local_irq_enable_in_hardirq();
> >>>>  
> >>>>  	do {
> >>>> +		trace_irq_entry(irq);
> >>>>  		ret = action->handler(irq, action->dev_id);
> >>>> +		trace_irq_exit(irq, ret);
> >>>>  		if (ret == IRQ_HANDLED)
> >>>>  			status |= action->flags;
> >>>>  		retval |= ret;
> >>> Nobdy want unnecessary redundant tracepoint.
> >>> Please discuss with mathieu, and merge his tracepoint.
> >> Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint
> >> is enough. However, from the same viewpoint, it should pass irq-number
> >> to irq-exit event too, because we may lost some previous events by buffer-overflow
> >> etc.
> >>
> >>          trace_irq_entry(irq, NULL);
> >>          ret = _handle_IRQ_event(irq, action);
> >>          trace_irq_exit(irq, ret);
> >>                         ^^^^
> >>
> > 
> > I seriously doubt we should consider a trace with missing events as
> > "reliable". If your only argument is that when the buffers are not large
> > enough we could lose events, then I think we should just hint people at
> > doing the right thing, which is to tweak the tracer parameters (e.g.
> > larger buffers) so they stop losing events.
> 
> Hi Mathieu,
> 
> I think it depends on what events we'd like to trace on what environment
> (not have so much memories).
> Someone(like me) may want "reliability" for those events, someone not.

I don't think this viewpoint is valueable.
buffer overflow problem depend on the number of AVERAGE output from tracer.
typically, one irq has only one irq handler. 

if this change makes buffer overflow, the tracer buffer management is too silly.
Just change it.



> > A trace with events lost is really a scenario close to a corrupted
> > trace because we don't know which event has been lost, nor where. I
> > don't think we should increase the event size to support that kind of
> > broken scenario.
> 
> Why LTTng doesn't filter those unused arguments inside their handlers?
> Is there no room for compromise on sharing tracepoints with other
> tracers? IMHO, tracepoints in the kernel should be a common
> infrastructure for many users.

Agreed. last mathieu's mail only talk about lttng issue.
I can imazine systemtap folks need explicit irq number argument.



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-26 12:37                               ` Dominique Toupin
@ 2009-02-27  3:14                                 ` KOSAKI Motohiro
  2009-02-27  3:29                                   ` Steven Rostedt
                                                     ` (2 more replies)
  0 siblings, 3 replies; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-27  3:14 UTC (permalink / raw)
  To: Dominique Toupin
  Cc: kosaki.motohiro, Steven Rostedt, Mathieu Desnoyers,
	Frederic Weisbecker, Jason Baron, Masami Hiramatsu,
	Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme

> > > Given this scenario :
> > > 
> > > A telecommunication system runs, but the client notices 
> > something wrong.
> > > They call their service provider. The provider enables tracing 
> > > _remotely_ on the _production system_ while it's _active in 
> > the field_.
> > > 
> > > Bam, those few milliseconds interrupt latencies become unacceptable.
> > > 
> > > Hopefully this scenario makes the use-case clearer. The 
> > problem is not 
> > > that interrupt latencies would occur while tracing is on, 
> > but rather 
> > > that it would happen on a running production system when switching 
> > > tracing on. This is what is totally unacceptable for this use-case.
> > > 
> > > For more details about such requirements, I'm CCing 
> > Dominique Toupin 
> > > from Ericsson who I'm sure would be happy to give more 
> > details about 
> > > this if needed.
> > 
> > Hmm, so this system in the field is running Linux with the 
> > Real-Time Patch?  Because if it isn't it will suffer from 
> > millisecond latencies in normal operation.
>
> In many cases we don't use Linux real-time, we have many systems that
> are soft-real-time an non real-time Linux is good enough.
>  

Agreed, rt-patch seems off topics. we discuss to mainline kernel.

Steven, I sitll think nobody use ftrace on production system now yet.
Do you know actual production user?





^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  3:14                                 ` KOSAKI Motohiro
@ 2009-02-27  3:29                                   ` Steven Rostedt
  2009-02-27  3:36                                   ` Steven Rostedt
  2009-02-27  7:23                                   ` Peter Zijlstra
  2 siblings, 0 replies; 48+ messages in thread
From: Steven Rostedt @ 2009-02-27  3:29 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker,
	Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler,
	mingo, linux-kernel, acme


On Fri, 27 Feb 2009, KOSAKI Motohiro wrote:
> > > > Given this scenario :
> > > > 
> > > > A telecommunication system runs, but the client notices 
> > > something wrong.
> > > > They call their service provider. The provider enables tracing 
> > > > _remotely_ on the _production system_ while it's _active in 
> > > the field_.
> > > > 
> > > > Bam, those few milliseconds interrupt latencies become unacceptable.
> > > > 
> > > > Hopefully this scenario makes the use-case clearer. The 
> > > problem is not 
> > > > that interrupt latencies would occur while tracing is on, 
> > > but rather 
> > > > that it would happen on a running production system when switching 
> > > > tracing on. This is what is totally unacceptable for this use-case.
> > > > 
> > > > For more details about such requirements, I'm CCing 
> > > Dominique Toupin 
> > > > from Ericsson who I'm sure would be happy to give more 
> > > details about 
> > > > this if needed.
> > > 
> > > Hmm, so this system in the field is running Linux with the 
> > > Real-Time Patch?  Because if it isn't it will suffer from 
> > > millisecond latencies in normal operation.
> >
> > In many cases we don't use Linux real-time, we have many systems that
> > are soft-real-time an non real-time Linux is good enough.
> >  
> 
> Agreed, rt-patch seems off topics. we discuss to mainline kernel.
> 
> Steven, I sitll think nobody use ftrace on production system now yet.
> Do you know actual production user?

First lets separate out "ftrace" and the function tracer. ftrace is the 
infrastructure that has schedule tracing, event tracing and lots and lots 
of other kinds of tracing.

The "stop_machine" talked about in this thread is specific for 
dynamic_ftrace (which is used for the function and graph tracers).

Your question about "production user" is a little unfair, since function 
tracing did not appear till 2.6.28. (I'm ignoring 2.6.27, since the
version of the function tracer there had a notorious bug).

No distribution is shipping a production 2.6.28 AFIAK. Which means, I do 
not know of any production use of the function tracer. I have already 
recommended that people enable the function tracer if they are using 
2.6.28 in a production environment. Given some time, when 2.6.28 is more 
in use, my answer would probably be "Yes, I do know it is used in a 
production environment".

-- Steve


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-25 17:34                   ` Mathieu Desnoyers
  2009-02-25 18:05                     ` Steven Rostedt
  2009-02-26 15:11                     ` Jason Baron
@ 2009-02-27  3:33                     ` KOSAKI Motohiro
  2009-02-27  7:25                       ` Peter Zijlstra
  2 siblings, 1 reply; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-27  3:33 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: kosaki.motohiro, Jason Baron, Masami Hiramatsu, Peter Zijlstra,
	Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec

> > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3
> > different places. So the above suggested irq tracepoint provides the
> > same information with 4 less tracepoints in the code. So I believe its
> > simpler - plus we can understand which action handlers are handling the
> > interrupt.
> > 
> 
> The main thing I dislike about only tracing action->handler() calls is
> that you are not tracing an IRQ per se, but rather the invocation of a
> given handler within the interrupt. For instance, it would be difficult
> to calculate the maximum interrupt latency for a given interrupt line,
> because you don't have the "real" irq entry/exit events, just the
> individual handler() calls.

I agree with IRQ latency tracing is very important.
So now, We understand we have another two good requirement.

Therefore, I can agree to Mathieu's separete trace point idea and
Jason's current patch.

Thanks! good discussion.



> 
> But I agree that knowing which handler is called is important.
> 
> How about this compromise :
> 
> trace_irq_entry(irq, action)
>   _handle_IRQ_event()
>     for each action  {
>       trace_irq_handler(action, ret);
>       ret = action->handler(irq, action->dev_id);
>       ...
>     }
> trace_irq_exit(action_ret);
> 
> Would that give you the information you need ?
> 
> Here trace_irq_handler would be passed the _current_ action invoked and
> the _previous_ action return value. Note that we should initialize
> irqreturn_t ret to some initial value if we do this. That should keep
> the tracing overhead minimal.






^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-26 15:11                     ` Jason Baron
                                         ` (2 preceding siblings ...)
  2009-02-26 16:20                       ` Frederic Weisbecker
@ 2009-02-27  3:35                       ` KOSAKI Motohiro
  3 siblings, 0 replies; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-27  3:35 UTC (permalink / raw)
  To: Jason Baron
  Cc: kosaki.motohiro, Mathieu Desnoyers, mingo, Masami Hiramatsu,
	Peter Zijlstra, Frank Ch. Eigler, rostedt, linux-kernel, acme,
	fweisbec


> Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace
> as follows using the original two tracepoints (patch below):
> 
>  3)               |                      handle_IRQ_event() {
>  3)               |                        /* (irq_handler_entry) irq=28 handler=eth0 */
>  3)               |                        e1000_intr_msi() {
>  3)   2.460 us    |                          __napi_schedule();
>  3)   9.416 us    |                        }
>  3)               |                        /* (irq_handler_exit) irq=28 handler=eth0 return=handled */
>  3) + 22.935 us   |                      }
> 
> thanks,
> 
> -Jason

OK.
I think you prove this patch is valuable enough. very thanks!

	Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>


> 
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>
> ---
> 
>  include/trace/irq.h             |    9 +++++++++
>  include/trace/irq_event_types.h |   17 +++++++++++++++++
>  kernel/irq/handle.c             |    6 ++++++
>  kernel/trace/events.c           |    2 ++
>  4 files changed, 34 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/irq.h
>  create mode 100644 include/trace/irq_event_types.h
> 
> 
> diff --git a/include/trace/irq.h b/include/trace/irq.h
> new file mode 100644
> index 0000000..ff5d449
> --- /dev/null
> +++ b/include/trace/irq.h
> @@ -0,0 +1,9 @@
> +#ifndef _TRACE_IRQ_H
> +#define _TRACE_IRQ_H
> +
> +#include <linux/interrupt.h>
> +#include <linux/tracepoint.h>
> +
> +#include <trace/irq_event_types.h>
> +
> +#endif
> diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
> new file mode 100644
> index 0000000..5889bfd
> --- /dev/null
> +++ b/include/trace/irq_event_types.h
> @@ -0,0 +1,17 @@
> +
> +/* use <trace/irq.h> instead */
> +#ifndef DEFINE_TRACE_FMT
> +# error Do not include this file directly.
> +# error Unless you know what you are doing.
> +#endif
> +
> +DEFINE_TRACE_FMT(irq_handler_entry,
> +	TPPROTO(int irq, struct irqaction *action),
> +	TPARGS(irq, action),
> +	TPFMT("irq=%d handler=%s", irq, action->name));
> +
> +DEFINE_TRACE_FMT(irq_handler_exit,
> +	TPPROTO(int irq, struct irqaction *action, int ret),
> +	TPARGS(irq, action, ret),
> +	TPFMT("irq=%d handler=%s return=%s",
> +		irq, action->name, ret ? "handled" : "unhandled"));
> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
> index fdff380..bcebc2e 100644
> --- a/kernel/irq/handle.c
> +++ b/kernel/irq/handle.c
> @@ -18,6 +18,7 @@
>  #include <linux/rculist.h>
>  #include <linux/hash.h>
>  #include <linux/bootmem.h>
> +#include <trace/irq.h>
>  
>  #include "internals.h"
>  
> @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
>  	return IRQ_NONE;
>  }
>  
> +DEFINE_TRACE(irq_handler_entry);
> +DEFINE_TRACE(irq_handler_exit);
> +
>  /**
>   * handle_IRQ_event - irq action chain handler
>   * @irq:	the interrupt number
> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
>  		local_irq_enable_in_hardirq();
>  
>  	do {
> +		trace_irq_handler_entry(irq, action);
>  		ret = action->handler(irq, action->dev_id);
> +		trace_irq_handler_exit(irq, action, ret);
>  		if (ret == IRQ_HANDLED)
>  			status |= action->flags;
>  		retval |= ret;
> diff --git a/kernel/trace/events.c b/kernel/trace/events.c
> index 38c89ee..3c75623 100644
> --- a/kernel/trace/events.c
> +++ b/kernel/trace/events.c
> @@ -6,8 +6,10 @@
>  
>  /* trace/<type>.h here */
>  #include <trace/sched.h>
> +#include <trace/irq.h>
>  
>  #include "trace_events.h"
>  
>  /* trace/<type>_event_types.h here */
>  #include <trace/sched_event_types.h>
> +#include <trace/irq_event_types.h>
> 
> 
> 
> 
> 
> 




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  3:14                                 ` KOSAKI Motohiro
  2009-02-27  3:29                                   ` Steven Rostedt
@ 2009-02-27  3:36                                   ` Steven Rostedt
  2009-02-27  7:48                                     ` KOSAKI Motohiro
  2009-02-27  7:23                                   ` Peter Zijlstra
  2 siblings, 1 reply; 48+ messages in thread
From: Steven Rostedt @ 2009-02-27  3:36 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker,
	Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler,
	mingo, linux-kernel, acme


On Fri, 27 Feb 2009, KOSAKI Motohiro wrote:
> >
> > In many cases we don't use Linux real-time, we have many systems that
> > are soft-real-time an non real-time Linux is good enough.
> >  
> 
> Agreed, rt-patch seems off topics. we discuss to mainline kernel.

The reason I brought up the rt-patch is that Mathieu is concerned about 
the small latency that happens when we run stop_machine to switch the nops 
to pointers to trace functions. This latency can be a couple of millisecs, 
but I can easily make a normal kernel suffer a couple of millisec latency 
by simply running hackbench (as a normal user).

The point being, I think Mathieu's point about the latency of enabling the 
function tracer is frivolous. It is a one time shot that happens when the 
sysadmin purposely enables the function tracing.  If this is unacceptable, 
then the system had better be running the rt patch because it will be 
hitting this unacceptable latency in normal operation.

-- Steve


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  3:14                                 ` KOSAKI Motohiro
  2009-02-27  3:29                                   ` Steven Rostedt
  2009-02-27  3:36                                   ` Steven Rostedt
@ 2009-02-27  7:23                                   ` Peter Zijlstra
  2 siblings, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-27  7:23 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Dominique Toupin, Steven Rostedt, Mathieu Desnoyers,
	Frederic Weisbecker, Jason Baron, Masami Hiramatsu,
	Frank Ch. Eigler, mingo, linux-kernel, acme

On Fri, 2009-02-27 at 12:14 +0900, KOSAKI Motohiro wrote:
> > > > Given this scenario :
> > > > 
> > > > A telecommunication system runs, but the client notices 
> > > something wrong.
> > > > They call their service provider. The provider enables tracing 
> > > > _remotely_ on the _production system_ while it's _active in 
> > > the field_.
> > > > 
> > > > Bam, those few milliseconds interrupt latencies become unacceptable.
> > > > 
> > > > Hopefully this scenario makes the use-case clearer. The 
> > > problem is not 
> > > > that interrupt latencies would occur while tracing is on, 
> > > but rather 
> > > > that it would happen on a running production system when switching 
> > > > tracing on. This is what is totally unacceptable for this use-case.
> > > > 
> > > > For more details about such requirements, I'm CCing 
> > > Dominique Toupin 
> > > > from Ericsson who I'm sure would be happy to give more 
> > > details about 
> > > > this if needed.
> > > 
> > > Hmm, so this system in the field is running Linux with the 
> > > Real-Time Patch?  Because if it isn't it will suffer from 
> > > millisecond latencies in normal operation.
> >
> > In many cases we don't use Linux real-time, we have many systems that
> > are soft-real-time an non real-time Linux is good enough.
> >  
> 
> Agreed, rt-patch seems off topics. we discuss to mainline kernel.

Exactly, hence you should not worry about ms irq-off latencies, since
mainline is happy to generate those for you, regardless of
function-trace.

The only way to complain about those is if your base system is better
than that, and currently only preempt-rt provides that.

So unless you're on preempt-rt, complaining about ms-order irq latencies
just isn't a valid argument.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  3:33                     ` KOSAKI Motohiro
@ 2009-02-27  7:25                       ` Peter Zijlstra
  0 siblings, 0 replies; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-27  7:25 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Mathieu Desnoyers, Jason Baron, Masami Hiramatsu,
	Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec

On Fri, 2009-02-27 at 12:33 +0900, KOSAKI Motohiro wrote:
> 
> I agree with IRQ latency tracing is very important.

Then use the irq-latency tracer, that thing is specifically created to
measure and locate irq-off latency sources.

We've been using it for years in the preempt-rt patch-set.


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  3:36                                   ` Steven Rostedt
@ 2009-02-27  7:48                                     ` KOSAKI Motohiro
  2009-02-27  8:06                                       ` Peter Zijlstra
  2009-02-27 14:43                                       ` Steven Rostedt
  0 siblings, 2 replies; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-27  7:48 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: kosaki.motohiro, Dominique Toupin, Mathieu Desnoyers,
	Frederic Weisbecker, Jason Baron, Masami Hiramatsu,
	Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme

> 
> On Fri, 27 Feb 2009, KOSAKI Motohiro wrote:
> > >
> > > In many cases we don't use Linux real-time, we have many systems that
> > > are soft-real-time an non real-time Linux is good enough.
> > >  
> > 
> > Agreed, rt-patch seems off topics. we discuss to mainline kernel.
> 
> The reason I brought up the rt-patch is that Mathieu is concerned about 
> the small latency that happens when we run stop_machine to switch the nops 
> to pointers to trace functions. This latency can be a couple of millisecs, 
> but I can easily make a normal kernel suffer a couple of millisec latency 
> by simply running hackbench (as a normal user).
> 
> The point being, I think Mathieu's point about the latency of enabling the 
> function tracer is frivolous. It is a one time shot that happens when the 
> sysadmin purposely enables the function tracing.  If this is unacceptable, 
> then the system had better be running the rt patch because it will be 
> hitting this unacceptable latency in normal operation.

ok, I missed last thread. thanks.

I think you talk about "ftrace, x86: make kernel text writable only for
conversions" thread, right?
hm, it seems very good discussion.

My concern is, both your and Mathieu's point is fairly reasonable and
good direction.

However, I would bring up another viewpoint. so, 

  - the latency issue can happen actual production server?


Mathieu and Dominique explained ACTUAL production server problem.
but I believe Dominique's server never run hackbench.

In other word, I agree mainline kernel has many latency corner case problems.
but I don't agree corner case existing indicate no need latency improvement.
Generally, actual experience indicate it's valuable to hear them reqirement.



HOWEVER, I still also agree with your disliking messiness.
Then, I would suggest we go into next step.

I mean make the patch of mathieu's plan and mesure it. Then
we can mesure number by number. my number mean,

  (1) How much increasing line and messiness
  (2) How much improving latency

that's fairly comparision and typical lkml discussion style, imho.

What do you think?




^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  7:48                                     ` KOSAKI Motohiro
@ 2009-02-27  8:06                                       ` Peter Zijlstra
  2009-02-27  8:13                                         ` KOSAKI Motohiro
  2009-02-27 14:43                                       ` Steven Rostedt
  1 sibling, 1 reply; 48+ messages in thread
From: Peter Zijlstra @ 2009-02-27  8:06 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Steven Rostedt, Dominique Toupin, Mathieu Desnoyers,
	Frederic Weisbecker, Jason Baron, Masami Hiramatsu,
	Frank Ch. Eigler, mingo, linux-kernel, acme

On Fri, 2009-02-27 at 16:48 +0900, KOSAKI Motohiro wrote:

> I mean make the patch of mathieu's plan and mesure it. Then
> we can mesure number by number. my number mean,
> 
>   (1) How much increasing line and messiness
>   (2) How much improving latency
> 
> that's fairly comparision and typical lkml discussion style, imho.
> 
> What do you think?

Patches are welcome ;-)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  8:06                                       ` Peter Zijlstra
@ 2009-02-27  8:13                                         ` KOSAKI Motohiro
  2009-02-27 13:10                                           ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 48+ messages in thread
From: KOSAKI Motohiro @ 2009-02-27  8:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kosaki.motohiro, Steven Rostedt, Dominique Toupin,
	Mathieu Desnoyers, Frederic Weisbecker, Jason Baron,
	Masami Hiramatsu, Frank Ch. Eigler, mingo, linux-kernel, acme

> On Fri, 2009-02-27 at 16:48 +0900, KOSAKI Motohiro wrote:
> 
> > I mean make the patch of mathieu's plan and mesure it. Then
> > we can mesure number by number. my number mean,
> > 
> >   (1) How much increasing line and messiness
> >   (2) How much improving latency
> > 
> > that's fairly comparision and typical lkml discussion style, imho.
> > 
> > What do you think?
> 
> Patches are welcome ;-)

Why sad face? Am I talking about funny thing?



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  8:13                                         ` KOSAKI Motohiro
@ 2009-02-27 13:10                                           ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 48+ messages in thread
From: Arnaldo Carvalho de Melo @ 2009-02-27 13:10 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Peter Zijlstra, Steven Rostedt, Dominique Toupin,
	Mathieu Desnoyers, Frederic Weisbecker, Jason Baron,
	Masami Hiramatsu, Frank Ch. Eigler, mingo, linux-kernel

On Fri, Feb 27, 2009 at 5:13 AM, KOSAKI Motohiro
<kosaki.motohiro@jp.fujitsu.com> wrote:
>> On Fri, 2009-02-27 at 16:48 +0900, KOSAKI Motohiro wrote:
>>
>> > I mean make the patch of mathieu's plan and mesure it. Then
>> > we can mesure number by number. my number mean,
>> >
>> >   (1) How much increasing line and messiness
>> >   (2) How much improving latency
>> >
>> > that's fairly comparision and typical lkml discussion style, imho.
>> >
>> > What do you think?
>>
>> Patches are welcome ;-)
>
> Why sad face? Am I talking about funny thing?

Nope, its just that the way you're talking is looking too managerial.

IOW, instead of proposing measurements, do them and show the results,
how does that sound?

And ;-) is not a sad face in my dictionary, but an ironic one :-)

- Arnaldo

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH] new irq tracer
  2009-02-27  7:48                                     ` KOSAKI Motohiro
  2009-02-27  8:06                                       ` Peter Zijlstra
@ 2009-02-27 14:43                                       ` Steven Rostedt
  1 sibling, 0 replies; 48+ messages in thread
From: Steven Rostedt @ 2009-02-27 14:43 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker,
	Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler,
	Ingo Molnar, LKML, Arnaldo Carvalho de Melo


On Fri, 27 Feb 2009, KOSAKI Motohiro wrote:

> > 
> > On Fri, 27 Feb 2009, KOSAKI Motohiro wrote:
> > > >
> > > > In many cases we don't use Linux real-time, we have many systems that
> > > > are soft-real-time an non real-time Linux is good enough.
> > > >  
> > > 
> > > Agreed, rt-patch seems off topics. we discuss to mainline kernel.
> > 
> > The reason I brought up the rt-patch is that Mathieu is concerned about 
> > the small latency that happens when we run stop_machine to switch the nops 
> > to pointers to trace functions. This latency can be a couple of millisecs, 
> > but I can easily make a normal kernel suffer a couple of millisec latency 
> > by simply running hackbench (as a normal user).
> > 
> > The point being, I think Mathieu's point about the latency of enabling the 
> > function tracer is frivolous. It is a one time shot that happens when the 
> > sysadmin purposely enables the function tracing.  If this is unacceptable, 
> > then the system had better be running the rt patch because it will be 
> > hitting this unacceptable latency in normal operation.
> 
> ok, I missed last thread. thanks.
> 
> I think you talk about "ftrace, x86: make kernel text writable only for
> conversions" thread, right?
> hm, it seems very good discussion.

Yes, and as Mathieu and I finally convinced each other that there are
two different issues there (we both were trying to convince the other
about the same point. Sorta like the two blind man explaining what an 
Elephant looks like ;-)

  text_poke vs touching kernel mappings

  break_points vs stop_machine

The real issue came down to the latter.

> 
> My concern is, both your and Mathieu's point is fairly reasonable and
> good direction.
> 
> However, I would bring up another viewpoint. so, 
> 
>   - the latency issue can happen actual production server?
> 
> 
> Mathieu and Dominique explained ACTUAL production server problem.
> but I believe Dominique's server never run hackbench.
> 
> In other word, I agree mainline kernel has many latency corner case problems.
> but I don't agree corner case existing indicate no need latency improvement.
> Generally, actual experience indicate it's valuable to hear them requirement.

Then the production system had better not do any load at all. hackbench is 
a trivial way to produce the problem, and I used it only as an example 
that one can see. But anytime you have a large quantity of requests coming 
in, and waking up many threads, you run the risk of taking a 3 or 4ms 
interrupts off latency.  I've measured this running database applications.

Using 2.6.28, I did some measurements of the function tracer stop_machine.
Simply by using irqsoff tracer:

 # wc -l available_filter_functions
19043

That is 19 thousand available trace points in the kernel. All of them are
nops when tracing is disabled. But to activate all of them by running 
stop_machine can take a bit of time. It will also add a bit of overhead to
the system when tracing is active simply because you have 19 thousand 
trace points!

 # cd /debug/tracing
 # echo irqsoff > current_tracer
 # sysctl kernel.ftrace_enabled
kernel.ftrace_enabled = 1

 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=0
kernel.ftrace_enabled = 0

 # cat latency_trace > full-disable.txt
 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=1
kernel.ftrace_enabled = 1

 # cat latency_trace > full-enable.txt
 # echo do_fork > set_ftrace_filter
 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=0
kernel.ftrace_enabled = 0
 
 # cat latency_trace > one-func-disable.txt
 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=1
kernel.ftrace_enabled = 1

 # cat latency_trace > one-func-enable.txt
 # head available_filter_functions > set_ftrace_filter
 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=0
kernel.ftrace_enabled = 0
 
 # cat latency_trace > ten-func-disable.txt
 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=1
kernel.ftrace_enabled = 1

 # cat latency_trace > ten-func-enable.txt
 # head -100 available_filter_functions > set_ftrace_filter
 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=0
kernel.ftrace_enabled = 0
 
 # cat latency_trace > hundred-func-disable.txt
 # echo 0 > tracing_max_latency
 # sysctl kernel.ftrace_enabled=1
kernel.ftrace_enabled = 1

 # cat latency_trace > hundred-func-enable.txt

 # sysctrl kernel.ftrace_enabled=0
kernel.ftrace_enabled = 0

 # echo 0 > tracing_max_latency
 # /usr/local/bin/hackbench 50
Time: 18.693

 # cat latency_trace > hackbench.txt

The above output traces and the available_filter_functions can be found
at http://rostedt.homelinux.com/traces/

The actual measured latencies are up on the top left corner. The trace
itself might be a little off because it is still recording the trace
after it recorded the latency, and you might see a few more microsecs
in the trace output.

Here's the latencies:

 one-disable:		564us
 one-enable:		567us

 ten-disable:		547us
 ten-enable:		549us

Funny how enabling 10 took less than one? Perhaps cache issues.

 hundred-disable:	580us
 hundred-enable:	581us

 full-disable: (19043)	9352us
 full-enable:  (19043)	19081us

 hackbench:		3859us

For some reason, full enabling always seems to take longer than full
disabling. Not sure why.

Now lets look at this data. You can enable 100 different call sites in 
just over a half a millisec.  The latency that Mathieu is complaining 
about is just a half a millisec.  That I can guarantee you, a production 
system doing any kind of work, will hit that latency, with any non RT 
Linux kernel.

Yes, you could say, "19ms! That is a lot!" Well, do not enable all
19 thousand trace points at once.  Just having them enabled will cause a 
bit of overhead that is noticeable. Mathieu pointed out that I said I 
would not have this done on an active production system. He is right. 
Although if something is going wrong with the production system, it may be 
worth taking this hit to analyze the situation. Or ever a periodical thing 
on off hours with a notice to customers that analysis of the system is 
being performed from (2am to 3am EST), expect some degradation in 
performance. You can do this. It is a controlled environment.

> 
> HOWEVER, I still also agree with your disliking messiness.
> Then, I would suggest we go into next step.
> 
> I mean make the patch of Mathieu's plan and measure it. Then
> we can mesure number by number. my number mean,
> 
>   (1) How much increasing line and messiness
>   (2) How much improving latency
> 
> that's fairly comparision and typical lkml discussion style, imho.
> 
> What do you think?

The problem with the above is that to replace stop_machine, I would need 
to implement the breakpoint method. This is by no means a trivial change. 
It may take weeks to get right. And for what? To get rid of a half a 
millisec latency that can happen at any time during normal operation?

Note, the stop_machine method can be ported to other archs trivially. 
David Miller ported it in 20 minutes to Sparc64. A break point scheme 
would take a lot more development by each arch maintainer, and risk adding 
many more bugs.

-- Steve


^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2009-02-27 14:43 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-18 19:53 [PATCH] new irq tracer Jason Baron
2009-02-18 20:08 ` Steven Rostedt
2009-02-18 20:26 ` Mathieu Desnoyers
2009-02-19  1:42   ` KOSAKI Motohiro
2009-02-18 20:30 ` Kyle McMartin
2009-02-18 21:15 ` Peter Zijlstra
2009-02-18 21:35   ` Jason Baron
2009-02-18 21:46     ` Peter Zijlstra
2009-02-18 22:02       ` Frank Ch. Eigler
2009-02-18 22:10         ` Peter Zijlstra
2009-02-18 22:23           ` Frank Ch. Eigler
2009-02-18 23:21             ` Peter Zijlstra
2009-02-20 19:52           ` Jason Baron
2009-02-21  3:39             ` Frederic Weisbecker
2009-02-22  3:38             ` KOSAKI Motohiro
2009-02-25 16:48               ` Masami Hiramatsu
2009-02-25 16:57                 ` Jason Baron
2009-02-25 17:34                   ` Mathieu Desnoyers
2009-02-25 18:05                     ` Steven Rostedt
2009-02-25 22:12                       ` Mathieu Desnoyers
2009-02-25 22:20                         ` Frederic Weisbecker
2009-02-25 23:13                           ` Mathieu Desnoyers
2009-02-26  1:41                             ` Steven Rostedt
2009-02-26 12:37                               ` Dominique Toupin
2009-02-27  3:14                                 ` KOSAKI Motohiro
2009-02-27  3:29                                   ` Steven Rostedt
2009-02-27  3:36                                   ` Steven Rostedt
2009-02-27  7:48                                     ` KOSAKI Motohiro
2009-02-27  8:06                                       ` Peter Zijlstra
2009-02-27  8:13                                         ` KOSAKI Motohiro
2009-02-27 13:10                                           ` Arnaldo Carvalho de Melo
2009-02-27 14:43                                       ` Steven Rostedt
2009-02-27  7:23                                   ` Peter Zijlstra
2009-02-25 22:21                         ` Steven Rostedt
2009-02-26 15:11                     ` Jason Baron
2009-02-26 15:32                       ` Steven Rostedt
2009-02-26 15:35                         ` Ingo Molnar
2009-02-26 15:40                       ` Peter Zijlstra
2009-02-26 16:20                       ` Frederic Weisbecker
2009-02-27  3:35                       ` KOSAKI Motohiro
2009-02-27  3:33                     ` KOSAKI Motohiro
2009-02-27  7:25                       ` Peter Zijlstra
2009-02-25 16:58                 ` Mathieu Desnoyers
2009-02-25 17:19                   ` Masami Hiramatsu
2009-02-27  3:08                     ` KOSAKI Motohiro
2009-02-18 23:34       ` Kyle McMartin
2009-02-19  2:13       ` Frederic Weisbecker
2009-02-19  1:46   ` Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).