[RFC] tracing: Add support for critical section event tracing
diff mbox series

Message ID 20170412053851.26286-1-joelaf@google.com
State New, archived
Headers show
Series
  • [RFC] tracing: Add support for critical section event tracing
Related show

Commit Message

Joel Fernandes April 12, 2017, 5:38 a.m. UTC
Critical section trace events can be used for tracing the start and
end of a critical section which can be used by a trace viewer like
systrace to graphically view the start and end of a critical section and
correlate them with latencies and scheduling issues.

Reason for starting critical section:
Either preemption or interrupts were turned off.
Reason for ending critical section:
Both preemption and interrupts are now turned back on.

Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Joel Fernandes <joelaf@google.com>
--
Here's an example of how Android's systrace will be using it to show critical
sections as a gantt chart: http://imgur.com/download/TZplEVp
---
 include/trace/events/critical.h | 55 +++++++++++++++++++++++++++++++
 kernel/trace/Kconfig            |  6 ++++
 kernel/trace/trace_irqsoff.c    | 73 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+)
 create mode 100644 include/trace/events/critical.h

Comments

Joel Fernandes June 30, 2017, 5:17 a.m. UTC | #1
Hi Steven,

Did you have any comments about this patch? It was sent a while ago
and if you can provide me your initial thoughts on it, I would
appreciate it. (Sorry to ping you about it during the busy merge
window time, but I was thinking if I can get any initial early
comments from you then I can get to work on it and finish in time well
before the next merge window).

Regards,
Joel

On Tue, Apr 11, 2017 at 10:38 PM, Joel Fernandes <joelaf@google.com> wrote:
> Critical section trace events can be used for tracing the start and
> end of a critical section which can be used by a trace viewer like
> systrace to graphically view the start and end of a critical section and
> correlate them with latencies and scheduling issues.
>
> Reason for starting critical section:
> Either preemption or interrupts were turned off.
> Reason for ending critical section:
> Both preemption and interrupts are now turned back on.
>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Signed-off-by: Joel Fernandes <joelaf@google.com>
> --
> Here's an example of how Android's systrace will be using it to show critical
> sections as a gantt chart: http://imgur.com/download/TZplEVp
> ---
>  include/trace/events/critical.h | 55 +++++++++++++++++++++++++++++++
>  kernel/trace/Kconfig            |  6 ++++
>  kernel/trace/trace_irqsoff.c    | 73 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 134 insertions(+)
>  create mode 100644 include/trace/events/critical.h
>
> diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
> new file mode 100644
> index 000000000000..bfd58dd4f48f
> --- /dev/null
> +++ b/include/trace/events/critical.h
> @@ -0,0 +1,55 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM critical
> +
> +#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_CRITICAL_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +#include <linux/string.h>
> +
> +TRACE_EVENT(critical_start,
> +
> +       TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> +       TP_ARGS(ip, parent_ip),
> +
> +       TP_STRUCT__entry(
> +               __array(char, func, 16)
> +               __array(char, parent, 16)
> +       ),
> +
> +       TP_fast_assign(
> +               snprintf(__entry->func, 16, "%pf", (void *)ip);
> +               snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
> +       ),
> +
> +       TP_printk("caller=%s parent=%s\n",
> +                 __entry->func,
> +                 __entry->parent)
> +);
> +
> +TRACE_EVENT(critical_end,
> +
> +       TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> +       TP_ARGS(ip, parent_ip),
> +
> +       TP_STRUCT__entry(
> +               __array(char, func, 16)
> +               __array(char, parent, 16)
> +       ),
> +
> +       TP_fast_assign(
> +               snprintf(__entry->func, 16, "%pf", (void *)ip);
> +               snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
> +       ),
> +
> +       TP_printk("caller=%s parent=%s\n",
> +                 __entry->func,
> +                 __entry->parent)
> +);
> +#endif /* _TRACE_CRITICAL_H */
> +
> +/* This part ust be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index d5038005eb5d..986a07985360 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -208,6 +208,12 @@ config PREEMPT_TRACER
>           enabled. This option and the irqs-off timing option can be
>           used together or separately.)
>
> +config TRACE_CRITICAL_SECTION_EVENTS
> +       bool "Trace critical sections as trace events"
> +       default n
> +       select IRQSOFF_TRACER
> +       select PREEMPT_TRACER
> +
>  config SCHED_TRACER
>         bool "Scheduling Latency Tracer"
>         select GENERIC_TRACER
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 7758bc0617cb..a14ce6185885 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -13,6 +13,8 @@
>  #include <linux/uaccess.h>
>  #include <linux/module.h>
>  #include <linux/ftrace.h>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/critical.h>
>
>  #include "trace.h"
>
> @@ -20,6 +22,7 @@ static struct trace_array             *irqsoff_trace __read_mostly;
>  static int                             tracer_enabled __read_mostly;
>
>  static DEFINE_PER_CPU(int, tracing_cpu);
> +static DEFINE_PER_CPU(int, tracing_events_cpu);
>
>  static DEFINE_RAW_SPINLOCK(max_trace_lock);
>
> @@ -362,6 +365,44 @@ check_critical_timing(struct trace_array *tr,
>         __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
>  }
>
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +/*
> + * Called when either preempt or irq are turned off
> + */
> +static inline void
> +start_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> +       int cpu;
> +
> +       cpu = raw_smp_processor_id();
> +
> +       if (per_cpu(tracing_events_cpu, cpu))
> +               return;
> +
> +       trace_critical_start(ip, parent_ip);
> +
> +       per_cpu(tracing_events_cpu, cpu) = 1;
> +}
> +
> +/*
> + * Called when both preempt and irq are turned back on
> + */
> +static inline void
> +stop_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> +       int cpu;
> +
> +       cpu = raw_smp_processor_id();
> +
> +       if (unlikely(per_cpu(tracing_events_cpu, cpu)))
> +               per_cpu(tracing_events_cpu, cpu) = 0;
> +       else
> +               return;
> +
> +       trace_critical_end(ip, parent_ip);
> +}
> +#endif
> +
>  static inline void
>  start_critical_timing(unsigned long ip, unsigned long parent_ip)
>  {
> @@ -434,6 +475,10 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
>  /* start and stop critical timings used to for stoppage (in idle) */
>  void start_critical_timings(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (irqs_disabled() || preempt_count())
> +               start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
>         if (preempt_trace() || irq_trace())
>                 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -441,6 +486,10 @@ EXPORT_SYMBOL_GPL(start_critical_timings);
>
>  void stop_critical_timings(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (irqs_disabled() || preempt_count())
> +               stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
>         if (preempt_trace() || irq_trace())
>                 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -483,6 +532,10 @@ inline void print_irqtrace_events(struct task_struct *curr)
>   */
>  void trace_hardirqs_on(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (!preempt_count())
> +               stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
>         if (!preempt_trace() && irq_trace())
>                 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -490,6 +543,10 @@ EXPORT_SYMBOL(trace_hardirqs_on);
>
>  void trace_hardirqs_off(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (!preempt_count())
> +               start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
>         if (!preempt_trace() && irq_trace())
>                 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -497,6 +554,10 @@ EXPORT_SYMBOL(trace_hardirqs_off);
>
>  __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (!preempt_count())
> +               stop_critical_event(CALLER_ADDR0, caller_addr);
> +#endif
>         if (!preempt_trace() && irq_trace())
>                 stop_critical_timing(CALLER_ADDR0, caller_addr);
>  }
> @@ -504,6 +565,10 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);
>
>  __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (!preempt_count())
> +               start_critical_event(CALLER_ADDR0, caller_addr);
> +#endif
>         if (!preempt_trace() && irq_trace())
>                 start_critical_timing(CALLER_ADDR0, caller_addr);
>  }
> @@ -515,12 +580,20 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
>  #ifdef CONFIG_PREEMPT_TRACER
>  void trace_preempt_on(unsigned long a0, unsigned long a1)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (!irqs_disabled())
> +               stop_critical_event(a0, a1);
> +#endif
>         if (preempt_trace() && !irq_trace())
>                 stop_critical_timing(a0, a1);
>  }
>
>  void trace_preempt_off(unsigned long a0, unsigned long a1)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +       if (!irqs_disabled())
> +               start_critical_event(a0, a1);
> +#endif
>         if (preempt_trace() && !irq_trace())
>                 start_critical_timing(a0, a1);
>  }
> --
> 2.12.2.715.g7642488e1d-goog
>
Steven Rostedt June 30, 2017, 12:32 p.m. UTC | #2
On Thu, 29 Jun 2017 22:17:22 -0700
Joel Fernandes <joelaf@google.com> wrote:

> Hi Steven,
> 
> Did you have any comments about this patch? It was sent a while ago
> and if you can provide me your initial thoughts on it, I would
> appreciate it. (Sorry to ping you about it during the busy merge
> window time, but I was thinking if I can get any initial early
> comments from you then I can get to work on it and finish in time well
> before the next merge window).

Sorry for missing that. It's still in my INBOX in the "to look at"
markings ;-)


Actually, have you seen Tom Zanussi's patch series about histograms.
"tracing: Inter-event (e.g. latency) support". He just recently posted
another series.

I'd like to get that in before looking at adding events into the
critical sections. But that said, I'll still take a look at your patch.

Thanks for reminding me.

-- Steve
Steven Rostedt June 30, 2017, 12:51 p.m. UTC | #3
On Tue, 11 Apr 2017 22:38:51 -0700
Joel Fernandes <joelaf@google.com> wrote:

> diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
> new file mode 100644
> index 000000000000..bfd58dd4f48f
> --- /dev/null
> +++ b/include/trace/events/critical.h
> @@ -0,0 +1,55 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM critical
> +

There should be a:

 #ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS

around the entire header.

> +#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_CRITICAL_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +#include <linux/string.h>
> +
> +TRACE_EVENT(critical_start,
> +
> +	TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> +	TP_ARGS(ip, parent_ip),
> +
> +	TP_STRUCT__entry(
> +		__array(char, func, 16)
> +		__array(char, parent, 16)

Save only the ip, not the name.

> +	),
> +
> +	TP_fast_assign(
> +		snprintf(__entry->func, 16, "%pf", (void *)ip);
> +		snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);

This is very slow to have in the fast path.

> +	),
> +
> +	TP_printk("caller=%s parent=%s\n",

Put in %pF here, trace-cmd and perf know how to handle those, and then
the translation from ip to function name is done in the slow path.


> +		  __entry->func,
> +		  __entry->parent)
> +);
> +
> +TRACE_EVENT(critical_end,
> +
> +	TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> +	TP_ARGS(ip, parent_ip),
> +
> +	TP_STRUCT__entry(
> +		__array(char, func, 16)
> +		__array(char, parent, 16)
> +	),
> +
> +	TP_fast_assign(
> +		snprintf(__entry->func, 16, "%pf", (void *)ip);
> +		snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);

Same here.

> +	),
> +
> +	TP_printk("caller=%s parent=%s\n",
> +		  __entry->func,
> +		  __entry->parent)
> +);
> +#endif /* _TRACE_CRITICAL_H */
> +
> +/* This part ust be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index d5038005eb5d..986a07985360 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -208,6 +208,12 @@ config PREEMPT_TRACER
>  	  enabled. This option and the irqs-off timing option can be
>  	  used together or separately.)
>  
> +config TRACE_CRITICAL_SECTION_EVENTS
> +	bool "Trace critical sections as trace events"
> +	default n
> +	select IRQSOFF_TRACER
> +	select PREEMPT_TRACER

Either make TRACE_CRITICAL_SECTION_EVENTS depend on irqsoff and preempt
tracers, or change trace_irqsoff.c to be able to handle it without
those tracers enabled.

"select" should be avoided when possible.

> +
>  config SCHED_TRACER
>  	bool "Scheduling Latency Tracer"
>  	select GENERIC_TRACER
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 7758bc0617cb..a14ce6185885 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -13,6 +13,8 @@
>  #include <linux/uaccess.h>
>  #include <linux/module.h>
>  #include <linux/ftrace.h>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/critical.h>
>  
>  #include "trace.h"
>  
> @@ -20,6 +22,7 @@ static struct trace_array		*irqsoff_trace __read_mostly;
>  static int				tracer_enabled __read_mostly;
>  
>  static DEFINE_PER_CPU(int, tracing_cpu);
> +static DEFINE_PER_CPU(int, tracing_events_cpu);
>  
>  static DEFINE_RAW_SPINLOCK(max_trace_lock);
>  
> @@ -362,6 +365,44 @@ check_critical_timing(struct trace_array *tr,
>  	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
>  }
>  
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +/*
> + * Called when either preempt or irq are turned off
> + */
> +static inline void
> +start_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> +	int cpu;
> +
> +	cpu = raw_smp_processor_id();

Nuke the above and ...
> +
> +	if (per_cpu(tracing_events_cpu, cpu))

use this_cpu_read() instead. It's much faster.

> +		return;
> +
> +	trace_critical_start(ip, parent_ip);
> +
> +	per_cpu(tracing_events_cpu, cpu) = 1;

	this_cpu_write(tracing_events_cpu, 1);

> +}
> +
> +/*
> + * Called when both preempt and irq are turned back on
> + */
> +static inline void
> +stop_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> +	int cpu;
> +
> +	cpu = raw_smp_processor_id();

Same here.

> +
> +	if (unlikely(per_cpu(tracing_events_cpu, cpu)))
> +		per_cpu(tracing_events_cpu, cpu) = 0;
> +	else
> +		return;
> +
> +	trace_critical_end(ip, parent_ip);

Are you not worried about recursion here? There's no protection.
Wouldn't it be better to have:

	if (!this_cpu_read(tracing_events_cpu))
		return;

	trace_critical_end(ip, parent_ip);

	this_cpu_write(tracing_events_cpu, 0);

?

> +}

Add:

#else

static inline void start_critical_event(unsigned long ip,
					unsigned long pip) { }
static inline void stop_critical_event(unsigned long ip,
					unsigned long pip) { }

static inline int trace_critical_start_enabled() { return 0; }
static inline int trace_critical_end_enabled() { return 0; }

> +#endif
> +
>  static inline void
>  start_critical_timing(unsigned long ip, unsigned long parent_ip)
>  {
> @@ -434,6 +475,10 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
>  /* start and stop critical timings used to for stoppage (in idle) */
>  void start_critical_timings(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (irqs_disabled() || preempt_count())
> +		start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

And replace the above with:

	if (trace_critical_start_enabled())
		start_critical_event(CALLER_ADDR0, CALLER_ADDR1);

to get rid of the #ifdef embedded in the code.

Also, notice the use of "trace_critical_start_enabled()".

>  	if (preempt_trace() || irq_trace())
>  		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -441,6 +486,10 @@ EXPORT_SYMBOL_GPL(start_critical_timings);
>  
>  void stop_critical_timings(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (irqs_disabled() || preempt_count())
> +		stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

Same here, but for the stop functions.

>  	if (preempt_trace() || irq_trace())
>  		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -483,6 +532,10 @@ inline void print_irqtrace_events(struct task_struct *curr)
>   */
>  void trace_hardirqs_on(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (!preempt_count())
> +		stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

and here.

>  	if (!preempt_trace() && irq_trace())
>  		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -490,6 +543,10 @@ EXPORT_SYMBOL(trace_hardirqs_on);
>  
>  void trace_hardirqs_off(void)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (!preempt_count())
> +		start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

and here.

>  	if (!preempt_trace() && irq_trace())
>  		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
>  }
> @@ -497,6 +554,10 @@ EXPORT_SYMBOL(trace_hardirqs_off);
>  
>  __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (!preempt_count())
> +		stop_critical_event(CALLER_ADDR0, caller_addr);
> +#endif

and here.

>  	if (!preempt_trace() && irq_trace())
>  		stop_critical_timing(CALLER_ADDR0, caller_addr);
>  }
> @@ -504,6 +565,10 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);
>  
>  __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (!preempt_count())
> +		start_critical_event(CALLER_ADDR0, caller_addr);
> +#endif

and here.

>  	if (!preempt_trace() && irq_trace())
>  		start_critical_timing(CALLER_ADDR0, caller_addr);
>  }
> @@ -515,12 +580,20 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
>  #ifdef CONFIG_PREEMPT_TRACER
>  void trace_preempt_on(unsigned long a0, unsigned long a1)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (!irqs_disabled())
> +		stop_critical_event(a0, a1);
> +#endif

and here.

>  	if (preempt_trace() && !irq_trace())
>  		stop_critical_timing(a0, a1);
>  }
>  
>  void trace_preempt_off(unsigned long a0, unsigned long a1)
>  {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +	if (!irqs_disabled())
> +		start_critical_event(a0, a1);
> +#endif

and here.

(that was repetitive).

-- Steve

>  	if (preempt_trace() && !irq_trace())
>  		start_critical_timing(a0, a1);
>  }
Joel Fernandes June 30, 2017, 9:03 p.m. UTC | #4
Hi Steven,

Thanks a lot for the comments, I agree with all of them and had a
comment about one of them:

On Fri, Jun 30, 2017 at 5:51 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
[..]
> Are you not worried about recursion here? There's no protection.
> Wouldn't it be better to have:
>
>         if (!this_cpu_read(tracing_events_cpu))
>                 return;
>
>         trace_critical_end(ip, parent_ip);
>
>         this_cpu_write(tracing_events_cpu, 0);
>
> ?
>

I tried to go over some scenarios and I think it shouldn't be a
problem because we start the critical event only when either
interrupts are turned off while preemption is turned on, or preempt is
turned off while interrupts are turned on, and the fact that we call
the tracer while still in the critical section. Let me know if you had
a scenario in mind that can cause problems with this.

Anyway, I will rearrange the code like you suggested just to be extra safe,

Thanks,
Joel
Steven Rostedt June 30, 2017, 9:07 p.m. UTC | #5
On Fri, 30 Jun 2017 14:03:14 -0700
Joel Fernandes <joelaf@google.com> wrote:

> Hi Steven,
> 
> Thanks a lot for the comments, I agree with all of them and had a
> comment about one of them:
> 
> On Fri, Jun 30, 2017 at 5:51 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
> [..]
> > Are you not worried about recursion here? There's no protection.
> > Wouldn't it be better to have:
> >
> >         if (!this_cpu_read(tracing_events_cpu))
> >                 return;
> >
> >         trace_critical_end(ip, parent_ip);
> >
> >         this_cpu_write(tracing_events_cpu, 0);
> >
> > ?
> >  
> 
> I tried to go over some scenarios and I think it shouldn't be a
> problem because we start the critical event only when either
> interrupts are turned off while preemption is turned on, or preempt is
> turned off while interrupts are turned on, and the fact that we call
> the tracer while still in the critical section. Let me know if you had
> a scenario in mind that can cause problems with this.

Then may I ask what is tracing_events_cpu actually protecting?

-- Steve
Joel Fernandes June 30, 2017, 9:24 p.m. UTC | #6
On Fri, Jun 30, 2017 at 2:07 PM, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Fri, 30 Jun 2017 14:03:14 -0700
> Joel Fernandes <joelaf@google.com> wrote:
>
>> Hi Steven,
>>
>> Thanks a lot for the comments, I agree with all of them and had a
>> comment about one of them:
>>
>> On Fri, Jun 30, 2017 at 5:51 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
>> [..]
>> > Are you not worried about recursion here? There's no protection.
>> > Wouldn't it be better to have:
>> >
>> >         if (!this_cpu_read(tracing_events_cpu))
>> >                 return;
>> >
>> >         trace_critical_end(ip, parent_ip);
>> >
>> >         this_cpu_write(tracing_events_cpu, 0);
>> >
>> > ?
>> >
>>
>> I tried to go over some scenarios and I think it shouldn't be a
>> problem because we start the critical event only when either
>> interrupts are turned off while preemption is turned on, or preempt is
>> turned off while interrupts are turned on, and the fact that we call
>> the tracer while still in the critical section. Let me know if you had
>> a scenario in mind that can cause problems with this.
>
> Then may I ask what is tracing_events_cpu actually protecting?

Yes actually I think its not needed considering the above. When I was
developing the code I wanted to do something similar
start_critical_timing which has a tracing_cpu per-cpu variable, and at
the time wasn't sure if it was needed so I just added it in anyway.

Since the case of this patch is more specific  (both preempt and irq
turned back on to mark end of critical section), it may not be needed
and I can drop it. I will think some more about it as well let you
know if I find a scenario that says otherwise.

Thanks so much for making the patch better!!

Regards,
Joel
Joel Fernandes June 30, 2017, 11:21 p.m. UTC | #7
Hi Steven,

On Jun 30, 2017 5:32 AM, "Steven Rostedt" <rostedt@goodmis.org> wrote:
[..]
>> Did you have any comments about this patch? It was sent a while ago
>> and if you can provide me your initial thoughts on it, I would
>> appreciate it. (Sorry to ping you about it during the busy merge
>> window time, but I was thinking if I can get any initial early
>> comments from you then I can get to work on it and finish in time well
>> before the next merge window).

> Sorry for missing that. It's still in my INBOX in the "to look at"
> markings ;-)

Cool, no problem :-)

> Actually, have you seen Tom Zanussi's patch series about histograms.
> "tracing: Inter-event (e.g. latency) support". He just recently posted
> another series.

I was at his talk during ELC. I will take a look at his new patches as
well today. Its quite a big series ;-)

> I'd like to get that in before looking at adding events into the
> critical sections. But that said, I'll still take a look at your patch.

I feel though that the inter event stuff is a bit independent of the
critical events. Other than the fact that the irqsoff tracer can
possibly be rewritten to use inter-events, I didn't see how the
critical events work depends on the inter event stuff, or blocks it as
such. Just checking if there is another reason that you see it can
conflict?

> Thanks for reminding me.

Thanks for making time for the review :-)

Regards,
Joel

Patch
diff mbox series

diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
new file mode 100644
index 000000000000..bfd58dd4f48f
--- /dev/null
+++ b/include/trace/events/critical.h
@@ -0,0 +1,55 @@ 
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM critical
+
+#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CRITICAL_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <linux/string.h>
+
+TRACE_EVENT(critical_start,
+
+	TP_PROTO(unsigned long ip, unsigned long parent_ip),
+
+	TP_ARGS(ip, parent_ip),
+
+	TP_STRUCT__entry(
+		__array(char, func, 16)
+		__array(char, parent, 16)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->func, 16, "%pf", (void *)ip);
+		snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
+	),
+
+	TP_printk("caller=%s parent=%s\n",
+		  __entry->func,
+		  __entry->parent)
+);
+
+TRACE_EVENT(critical_end,
+
+	TP_PROTO(unsigned long ip, unsigned long parent_ip),
+
+	TP_ARGS(ip, parent_ip),
+
+	TP_STRUCT__entry(
+		__array(char, func, 16)
+		__array(char, parent, 16)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->func, 16, "%pf", (void *)ip);
+		snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
+	),
+
+	TP_printk("caller=%s parent=%s\n",
+		  __entry->func,
+		  __entry->parent)
+);
+#endif /* _TRACE_CRITICAL_H */
+
+/* This part ust be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d5038005eb5d..986a07985360 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -208,6 +208,12 @@  config PREEMPT_TRACER
 	  enabled. This option and the irqs-off timing option can be
 	  used together or separately.)
 
+config TRACE_CRITICAL_SECTION_EVENTS
+	bool "Trace critical sections as trace events"
+	default n
+	select IRQSOFF_TRACER
+	select PREEMPT_TRACER
+
 config SCHED_TRACER
 	bool "Scheduling Latency Tracer"
 	select GENERIC_TRACER
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7758bc0617cb..a14ce6185885 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -13,6 +13,8 @@ 
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ftrace.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/critical.h>
 
 #include "trace.h"
 
@@ -20,6 +22,7 @@  static struct trace_array		*irqsoff_trace __read_mostly;
 static int				tracer_enabled __read_mostly;
 
 static DEFINE_PER_CPU(int, tracing_cpu);
+static DEFINE_PER_CPU(int, tracing_events_cpu);
 
 static DEFINE_RAW_SPINLOCK(max_trace_lock);
 
@@ -362,6 +365,44 @@  check_critical_timing(struct trace_array *tr,
 	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
 }
 
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+/*
+ * Called when either preempt or irq are turned off
+ */
+static inline void
+start_critical_event(unsigned long ip, unsigned long parent_ip)
+{
+	int cpu;
+
+	cpu = raw_smp_processor_id();
+
+	if (per_cpu(tracing_events_cpu, cpu))
+		return;
+
+	trace_critical_start(ip, parent_ip);
+
+	per_cpu(tracing_events_cpu, cpu) = 1;
+}
+
+/*
+ * Called when both preempt and irq are turned back on
+ */
+static inline void
+stop_critical_event(unsigned long ip, unsigned long parent_ip)
+{
+	int cpu;
+
+	cpu = raw_smp_processor_id();
+
+	if (unlikely(per_cpu(tracing_events_cpu, cpu)))
+		per_cpu(tracing_events_cpu, cpu) = 0;
+	else
+		return;
+
+	trace_critical_end(ip, parent_ip);
+}
+#endif
+
 static inline void
 start_critical_timing(unsigned long ip, unsigned long parent_ip)
 {
@@ -434,6 +475,10 @@  stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 /* start and stop critical timings used to for stoppage (in idle) */
 void start_critical_timings(void)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (irqs_disabled() || preempt_count())
+		start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
 	if (preempt_trace() || irq_trace())
 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
@@ -441,6 +486,10 @@  EXPORT_SYMBOL_GPL(start_critical_timings);
 
 void stop_critical_timings(void)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (irqs_disabled() || preempt_count())
+		stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
 	if (preempt_trace() || irq_trace())
 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
@@ -483,6 +532,10 @@  inline void print_irqtrace_events(struct task_struct *curr)
  */
 void trace_hardirqs_on(void)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (!preempt_count())
+		stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
 	if (!preempt_trace() && irq_trace())
 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
@@ -490,6 +543,10 @@  EXPORT_SYMBOL(trace_hardirqs_on);
 
 void trace_hardirqs_off(void)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (!preempt_count())
+		start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
 	if (!preempt_trace() && irq_trace())
 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
@@ -497,6 +554,10 @@  EXPORT_SYMBOL(trace_hardirqs_off);
 
 __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (!preempt_count())
+		stop_critical_event(CALLER_ADDR0, caller_addr);
+#endif
 	if (!preempt_trace() && irq_trace())
 		stop_critical_timing(CALLER_ADDR0, caller_addr);
 }
@@ -504,6 +565,10 @@  EXPORT_SYMBOL(trace_hardirqs_on_caller);
 
 __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (!preempt_count())
+		start_critical_event(CALLER_ADDR0, caller_addr);
+#endif
 	if (!preempt_trace() && irq_trace())
 		start_critical_timing(CALLER_ADDR0, caller_addr);
 }
@@ -515,12 +580,20 @@  EXPORT_SYMBOL(trace_hardirqs_off_caller);
 #ifdef CONFIG_PREEMPT_TRACER
 void trace_preempt_on(unsigned long a0, unsigned long a1)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (!irqs_disabled())
+		stop_critical_event(a0, a1);
+#endif
 	if (preempt_trace() && !irq_trace())
 		stop_critical_timing(a0, a1);
 }
 
 void trace_preempt_off(unsigned long a0, unsigned long a1)
 {
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+	if (!irqs_disabled())
+		start_critical_event(a0, a1);
+#endif
 	if (preempt_trace() && !irq_trace())
 		start_critical_timing(a0, a1);
 }