* [RFC PATCH v3 1/2] tracing: add TRACE_EVENT_MAP_COND
2017-01-13 17:19 [RFC PATCH v3 0/2] Extend scheduling tracepoints Julien Desfossez
@ 2017-01-13 17:19 ` Julien Desfossez
2017-01-13 17:19 ` [RFC PATCH v3 2/2] tracing: add policy-based sched_switch events Julien Desfossez
1 sibling, 0 replies; 4+ messages in thread
From: Julien Desfossez @ 2017-01-13 17:19 UTC (permalink / raw)
To: peterz, rostedt, tglx, mingo, bristot, mathieu.desnoyers
Cc: linux-kernel, Julien Desfossez
This new macro allows to hook conditional tracepoint probes to
pre-existing trace events. This allows to create specialized versions of
the same tracepoint without having to explicitly call every possible
tracepoints in the instrumented code.
In order to use it, a TRACE_EVENT must already exist, after that, we can
connect as many TRACE_EVENT_MAP_COND to this TRACE_EVENT as needed.
Example usage:
TRACE_EVENT(tp_test,
TP_PROTO(proto),
TP_ARGS(args),
TP_STRUCT__entry(), /* can be empty */
TP_fast_assign(), /* can be empty */
TP_printk() /* can be empty */
);
TRACE_EVENT_MAP_COND(tp_test, cond_test,
TP_PROTO(proto),
TP_ARGS(args),
TP_CONDITION(cond),
TP_STRUCT__entry(entry),
TP_fast_assign(assign),
TP_printk(print)
);
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (Red Hat) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Julien Desfossez <jdesfossez@efficios.com>
---
include/linux/trace_events.h | 14 ++++-
include/linux/tracepoint.h | 6 ++
include/trace/define_trace.h | 6 ++
include/trace/perf.h | 24 ++++++--
include/trace/trace_events.h | 130 +++++++++++++++++++++++++++++++++++++------
kernel/trace/trace_events.c | 15 +++--
6 files changed, 168 insertions(+), 27 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index be00761..1f7e0ec 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -217,6 +217,7 @@ enum {
TRACE_EVENT_FL_TRACEPOINT_BIT,
TRACE_EVENT_FL_KPROBE_BIT,
TRACE_EVENT_FL_UPROBE_BIT,
+ TRACE_EVENT_FL_MAP_BIT,
};
/*
@@ -231,6 +232,7 @@ enum {
* TRACEPOINT - Event is a tracepoint
* KPROBE - Event is a kprobe
* UPROBE - Event is a uprobe
+ * MAP - Event maps to a tracepoint as an alias
*/
enum {
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -241,10 +243,16 @@ enum {
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
+ TRACE_EVENT_FL_MAP = (1 << TRACE_EVENT_FL_MAP_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
+struct trace_event_map {
+ struct tracepoint *tp;
+ char *name;
+};
+
struct trace_event_call {
struct list_head list;
struct trace_event_class *class;
@@ -252,6 +260,8 @@ struct trace_event_call {
char *name;
/* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */
struct tracepoint *tp;
+ /* Set TRACE_EVENT_FL_MAP flag when using "map" instead */
+ struct trace_event_map *map;
};
struct trace_event event;
char *print_fmt;
@@ -282,7 +292,9 @@ struct trace_event_call {
static inline const char *
trace_event_name(struct trace_event_call *call)
{
- if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
+ if (call->flags & TRACE_EVENT_FL_MAP)
+ return call->map->name;
+ else if (call->flags & TRACE_EVENT_FL_TRACEPOINT)
return call->tp ? call->tp->name : NULL;
else
return call->name;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index f72fcfe..3e5b5a4 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -276,6 +276,7 @@ static inline void tracepoint_synchronize_unregister(void)
#define DEFINE_TRACE_FN(name, reg, unreg)
#define DEFINE_TRACE(name)
+#define DEFINE_TRACE_MAP_COND(name, map, cond)
#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
#define EXPORT_TRACEPOINT_SYMBOL(name)
@@ -469,6 +470,8 @@ static inline void tracepoint_synchronize_unregister(void)
*/
#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
+#define DECLARE_EVENT_COND_CLASS(name, proto, args, cond, \
+ tstruct, assign, print)
#define DEFINE_EVENT(template, name, proto, args) \
DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
@@ -498,4 +501,7 @@ static inline void tracepoint_synchronize_unregister(void)
#define TRACE_EVENT_PERF_PERM(event, expr...)
+#define TRACE_EVENT_MAP_COND(name, map, proto, args, cond, \
+ struct, assign, print)
+
#endif /* ifdef TRACE_EVENT (see note above) */
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 6e3945f..4e112f2 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -45,6 +45,10 @@
assign, print, reg, unreg) \
DEFINE_TRACE_FN(name, reg, unreg)
+#undef TRACE_EVENT_MAP_COND
+#define TRACE_EVENT_MAP_COND(name, map, proto, args, cond, tstruct, \
+ assign, print)
+
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args) \
DEFINE_TRACE(name)
@@ -100,7 +104,9 @@
#undef TRACE_EVENT_FN
#undef TRACE_EVENT_FN_COND
#undef TRACE_EVENT_CONDITION
+#undef TRACE_EVENT_MAP_COND
#undef DECLARE_EVENT_CLASS
+#undef DECLARE_EVENT_COND_CLASS
#undef DEFINE_EVENT
#undef DEFINE_EVENT_FN
#undef DEFINE_EVENT_PRINT
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 04fe68bb..dbd3d27 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -26,8 +26,9 @@
#undef __perf_task
#define __perf_task(t) (__task = (t))
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(call, proto, args, cond, tstruct, \
+ assign, print) \
static notrace void \
perf_trace_##call(void *__data, proto) \
{ \
@@ -43,6 +44,9 @@
int __data_size; \
int rctx; \
\
+ if (!(cond)) \
+ return; \
+ \
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
\
head = this_cpu_ptr(event_call->perf_events); \
@@ -69,18 +73,28 @@
head, __task); \
}
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(call, PARAMS(proto), PARAMS(args), \
+ 1, PARAMS(tstruct), PARAMS(assign), \
+ PARAMS(print))
+
/*
* This part is compiled out, it is only here as a build time check
* to make sure that if the tracepoint handling changes, the
* perf probe will fail to compile unless it too is updated.
*/
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args) \
-static inline void perf_test_probe_##call(void) \
+#undef DEFINE_EVENT_MAP_COND
+#define DEFINE_EVENT_MAP_COND(template, call, map, proto, args, cond) \
+static inline void perf_test_probe_##map(void) \
{ \
check_trace_callback_type_##call(perf_trace_##template); \
}
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args) \
+ DEFINE_EVENT_MAP_COND(template, call, call, PARAMS(proto), \
+ PARAMS(args), 1)
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 467e12f..953cea3 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -65,6 +65,18 @@
PARAMS(print)); \
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+#undef TRACE_EVENT_MAP_COND
+#define TRACE_EVENT_MAP_COND(name, map, proto, args, cond, \
+ tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(map, \
+ PARAMS(proto), \
+ PARAMS(args), \
+ PARAMS(cond), \
+ PARAMS(tstruct), \
+ PARAMS(assign), \
+ PARAMS(print)); \
+ DEFINE_EVENT_MAP_COND(map, name, map, PARAMS(proto), \
+ PARAMS(args), cond);
#undef __field
#define __field(type, item) type item;
@@ -93,8 +105,9 @@
#undef TP_STRUCT__entry
#define TP_STRUCT__entry(args...) args
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(name, proto, args, cond, tstruct, \
+ assign, print) \
struct trace_event_raw_##name { \
struct trace_entry ent; \
tstruct \
@@ -103,6 +116,11 @@
\
static struct trace_event_class event_class_##name;
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(name, PARAMS(proto), PARAMS(args), 1, \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print))
+
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args) \
static struct trace_event_call __used \
@@ -116,6 +134,9 @@
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+#undef DEFINE_EVENT_MAP_COND
+#define DEFINE_EVENT_MAP_COND(template, name, map, proto, args, cond)
+
/* Callbacks are meaningless to ftrace. */
#undef TRACE_EVENT_FN
#define TRACE_EVENT_FN(name, proto, args, tstruct, \
@@ -182,12 +203,18 @@
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(call, proto, args, cond, tstruct, \
+ assign, print) \
struct trace_event_data_offsets_##call { \
tstruct; \
};
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(call, PARAMS(proto), PARAMS(args), 1, \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print))
+
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args)
@@ -195,6 +222,9 @@
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+#undef DEFINE_EVENT_MAP_COND
+#define DEFINE_EVENT_MAP_COND(template, name, map, proto, args, cond)
+
#undef TRACE_EVENT_FLAGS
#define TRACE_EVENT_FLAGS(event, flag)
@@ -307,8 +337,9 @@
trace_print_array_seq(p, array, count, el_size); \
})
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(call, proto, args, cond, tstruct, \
+ assign, print) \
static notrace enum print_line_t \
trace_raw_output_##call(struct trace_iterator *iter, int flags, \
struct trace_event *trace_event) \
@@ -332,6 +363,11 @@
.trace = trace_raw_output_##call, \
};
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(call, PARAMS(proto), PARAMS(args), 1, \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print))
+
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \
static notrace enum print_line_t \
@@ -410,8 +446,9 @@
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(call, proto, args, cond, tstruct, \
+ func, print) \
static int notrace __init \
trace_event_define_fields_##call(struct trace_event_call *event_call) \
{ \
@@ -423,6 +460,11 @@
return ret; \
}
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
+ DECLARE_EVENT_COND_CLASS(call, PARAMS(proto), PARAMS(args), 1, \
+ PARAMS(tstruct), PARAMS(func), PARAMS(print))
+
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args)
@@ -430,6 +472,9 @@
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+#undef DEFINE_EVENT_MAP_COND
+#define DEFINE_EVENT_MAP_COND(template, name, map, proto, args, cond)
+
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/*
@@ -489,8 +534,9 @@
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \
__bitmask_size_in_longs(nr_bits))
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(call, proto, args, cond, tstruct, \
+ assign, print) \
static inline notrace int trace_event_get_offsets_##call( \
struct trace_event_data_offsets_##call *__data_offsets, proto) \
{ \
@@ -503,6 +549,11 @@
return __data_size; \
}
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(call, PARAMS(proto), PARAMS(args), 1, \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print))
+
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, name, proto, args)
@@ -510,6 +561,9 @@
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+#undef DEFINE_EVENT_MAP_COND
+#define DEFINE_EVENT_MAP_COND(template, name, map, proto, args, cond)
+
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/*
@@ -658,8 +712,9 @@
#undef __perf_task
#define __perf_task(t) (t)
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(call, proto, args, cond, tstruct, \
+ assign, print) \
\
static notrace void \
trace_event_raw_event_##call(void *__data, proto) \
@@ -670,6 +725,9 @@
struct trace_event_raw_##call *entry; \
int __data_size; \
\
+ if (!(cond)) \
+ return; \
+ \
if (trace_trigger_soft_disabled(trace_file)) \
return; \
\
@@ -687,19 +745,30 @@
\
trace_event_buffer_commit(&fbuffer); \
}
+
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(call, PARAMS(proto), PARAMS(args), 1, \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print))
+
/*
* The ftrace_test_probe is compiled out, it is only here as a build time check
* to make sure that if the tracepoint handling changes, the ftrace probe will
* fail to compile unless it too is updated.
*/
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args) \
-static inline void ftrace_test_probe_##call(void) \
+#undef DEFINE_EVENT_MAP_COND
+#define DEFINE_EVENT_MAP_COND(template, call, map, proto, args, cond) \
+ static inline void ftrace_test_probe_##map(void) \
{ \
check_trace_callback_type_##call(trace_event_raw_event_##template); \
}
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args) \
+ DEFINE_EVENT_MAP_COND(template, call, call, PARAMS(proto), \
+ PARAMS(args), 1)
+
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print)
@@ -720,8 +789,9 @@
#undef TP_printk
#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args)
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+#undef DECLARE_EVENT_COND_CLASS
+#define DECLARE_EVENT_COND_CLASS(call, proto, args, cond, tstruct, \
+ assign, print) \
_TRACE_PERF_PROTO(call, PARAMS(proto)); \
static char print_fmt_##call[] = print; \
static struct trace_event_class __used __refdata event_class_##call = { \
@@ -734,6 +804,11 @@
_TRACE_PERF_INIT(call) \
};
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_COND_CLASS(call, PARAMS(proto), PARAMS(args), 1, \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print))
+
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
\
@@ -766,4 +841,25 @@
static struct trace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
+#undef DEFINE_EVENT_MAP_COND
+#define DEFINE_EVENT_MAP_COND(_template, _call, _map, _proto, _args, cond) \
+ \
+static struct trace_event_map event_map_##_map = { \
+ .tp = &__tracepoint_##_call, \
+ .name = #_map, \
+}; \
+ \
+static struct trace_event_call __used event_##_map = { \
+ .class = &event_class_##_template, \
+ { \
+ .map = &event_map_##_map, \
+ }, \
+ .event.funcs = &trace_event_type_funcs_##_template, \
+ .print_fmt = print_fmt_##_template, \
+ .flags = TRACE_EVENT_FL_TRACEPOINT | TRACE_EVENT_FL_MAP, \
+}; \
+static struct trace_event_call __used \
+__attribute__((section("_ftrace_events"))) *__event_##_map = &event_##_map
+
+
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9311654..b4ea2bf 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -287,26 +287,33 @@ int trace_event_reg(struct trace_event_call *call,
enum trace_reg type, void *data)
{
struct trace_event_file *file = data;
+ struct tracepoint *tp;
WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
+
+ if (call->flags & TRACE_EVENT_FL_MAP)
+ tp = call->map->tp;
+ else
+ tp = call->tp;
+
switch (type) {
case TRACE_REG_REGISTER:
- return tracepoint_probe_register(call->tp,
+ return tracepoint_probe_register(tp,
call->class->probe,
file);
case TRACE_REG_UNREGISTER:
- tracepoint_probe_unregister(call->tp,
+ tracepoint_probe_unregister(tp,
call->class->probe,
file);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return tracepoint_probe_register(call->tp,
+ return tracepoint_probe_register(tp,
call->class->perf_probe,
call);
case TRACE_REG_PERF_UNREGISTER:
- tracepoint_probe_unregister(call->tp,
+ tracepoint_probe_unregister(tp,
call->class->perf_probe,
call);
return 0;
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH v3 2/2] tracing: add policy-based sched_switch events
2017-01-13 17:19 [RFC PATCH v3 0/2] Extend scheduling tracepoints Julien Desfossez
2017-01-13 17:19 ` [RFC PATCH v3 1/2] tracing: add TRACE_EVENT_MAP_COND Julien Desfossez
@ 2017-01-13 17:19 ` Julien Desfossez
2017-02-07 12:23 ` Peter Zijlstra
1 sibling, 1 reply; 4+ messages in thread
From: Julien Desfossez @ 2017-01-13 17:19 UTC (permalink / raw)
To: peterz, rostedt, tglx, mingo, bristot, mathieu.desnoyers
Cc: linux-kernel, Julien Desfossez
Add 3 new tracepoints: sched_switch_fair, sched_switch_rt and
sched_switch_dl.
These conditional tracepoints are emitted based on the scheduling class
of the next task. Each of these tracepoint gets rid of the prio field
from the original sched_switch and replaces it with fields that are
relevant to the policy of the next task:
- for a fair task: the nice value,
- for a rt task: the nice and rt_priority values,
- for a dl task: the runtime, deadline and period values.
The original sched_switch event is left unmodified, so these new events
can be enabled at the same time (but they are emitted consecutively so
we can see a timestamp offset).
Example output from the 3 new events:
sched_switch_fair: prev_comm=cat prev_pid=2179 prev_state=R+ ==> next_comm=b
next_pid=874 next_policy=SCHED_NORMAL next_nice=0
sched_switch_rt: prev_comm=swapper/10 prev_pid=0 prev_state=R ==> next_comm=b
next_pid=2215 next_policy=SCHED_FIFO next_nice=0
next_rt_priority=100
sched_switch_dl: prev_comm=swapper/10 prev_pid=0 prev_state=R ==> next_comm=b
next_pid=2215 next_policy=SCHED_DEADLINE
next_dl_runtime=10000000 next_dl_deadline=30000000
next_dl_period=30000000
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (Red Hat) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Julien Desfossez <jdesfossez@efficios.com>
---
include/trace/events/sched.h | 192 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 192 insertions(+)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57..c506ed1 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -5,9 +5,39 @@
#define _TRACE_SCHED_H
#include <linux/sched.h>
+#include <linux/sched/deadline.h>
+#include <linux/sched/rt.h>
#include <linux/tracepoint.h>
#include <linux/binfmts.h>
+#define SCHEDULING_POLICY \
+ EM( SCHED_NORMAL, "SCHED_NORMAL") \
+ EM( SCHED_FIFO, "SCHED_FIFO") \
+ EM( SCHED_RR, "SCHED_RR") \
+ EM( SCHED_BATCH, "SCHED_BATCH") \
+ EM( SCHED_IDLE, "SCHED_IDLE") \
+ EMe(SCHED_DEADLINE, "SCHED_DEADLINE")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+SCHEDULING_POLICY
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) {a, b},
+#define EMe(a, b) {a, b}
+
/*
* Tracepoint for calling kthread_stop, performed to end a kthread:
*/
@@ -162,6 +192,168 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
);
/*
+ * Tracepoint for task switches, performed by the scheduler where the next
+ * task has a fair scheduling policy.
+ */
+TRACE_EVENT_MAP_COND(sched_switch, sched_switch_fair,
+
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next),
+
+ TP_ARGS(preempt, prev, next),
+
+ TP_CONDITION(!dl_prio(next->prio) && !rt_prio(next->prio)),
+
+ TP_STRUCT__entry(
+ __array( char, prev_comm, TASK_COMM_LEN )
+ __field( pid_t, prev_pid )
+ __field( long, prev_state )
+ __array( char, next_comm, TASK_COMM_LEN )
+ __field( pid_t, next_pid )
+ __field( unsigned int, next_policy )
+ __field( int, next_nice )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+ __entry->prev_pid = prev->pid;
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev);
+ memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+ __entry->next_pid = next->pid;
+ __entry->next_policy = next->policy;
+ __entry->next_nice = task_nice(next);
+ ),
+
+ TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s "
+ "next_pid=%d next_policy=%s next_nice=%d",
+ __entry->prev_comm, __entry->prev_pid,
+ __entry->prev_state & (TASK_STATE_MAX-1) ?
+ __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+ { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+ { 16, "Z" }, { 32, "X" }, { 64, "x" },
+ { 128, "K" }, { 256, "W" }, { 512, "P" },
+ { 1024, "N" }) : "R",
+ __entry->prev_state & TASK_STATE_MAX ? "+" : "",
+ __entry->next_comm, __entry->next_pid,
+ __print_symbolic(__entry->next_policy, SCHEDULING_POLICY),
+ __entry->next_nice)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler where the next
+ * task has a rt scheduling policy.
+ */
+TRACE_EVENT_MAP_COND(sched_switch, sched_switch_rt,
+
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next),
+
+ TP_ARGS(preempt, prev, next),
+
+ TP_CONDITION(rt_prio(next->prio)),
+
+ TP_STRUCT__entry(
+ __array( char, prev_comm, TASK_COMM_LEN )
+ __field( pid_t, prev_pid )
+ __field( long, prev_state )
+ __array( char, next_comm, TASK_COMM_LEN )
+ __field( pid_t, next_pid )
+ __field( unsigned int, next_policy )
+ __field( int, next_nice )
+ __field( unsigned int, next_rt_priority )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+ __entry->prev_pid = prev->pid;
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev);
+ memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+ __entry->next_pid = next->pid;
+ /*
+ * With PI, a real RT policy might not be set and the default
+ * RT policy is SCHED_FIFO.
+ */
+ __entry->next_policy = (next->policy == SCHED_RR) ?
+ SCHED_RR : SCHED_FIFO;
+ __entry->next_nice = task_nice(next);
+ __entry->next_rt_priority = MAX_RT_PRIO - 1 - next->prio;
+ ),
+
+ TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s "
+ "next_pid=%d next_policy=%s next_nice=%d "
+ "next_rt_priority=%u",
+ __entry->prev_comm, __entry->prev_pid,
+ __entry->prev_state & (TASK_STATE_MAX-1) ?
+ __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+ { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+ { 16, "Z" }, { 32, "X" }, { 64, "x" },
+ { 128, "K" }, { 256, "W" }, { 512, "P" },
+ { 1024, "N" }) : "R",
+ __entry->prev_state & TASK_STATE_MAX ? "+" : "",
+ __entry->next_comm, __entry->next_pid,
+ __print_symbolic(__entry->next_policy, SCHEDULING_POLICY),
+ __entry->next_nice, __entry->next_rt_priority)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler where the next
+ * task has a deadline scheduling policy.
+ */
+TRACE_EVENT_MAP_COND(sched_switch, sched_switch_dl,
+
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next),
+
+ TP_ARGS(preempt, prev, next),
+
+ TP_CONDITION(dl_prio(next->prio)),
+
+ TP_STRUCT__entry(
+ __array( char, prev_comm, TASK_COMM_LEN )
+ __field( pid_t, prev_pid )
+ __field( long, prev_state )
+ __array( char, next_comm, TASK_COMM_LEN )
+ __field( pid_t, next_pid )
+ __field( unsigned int, next_policy )
+ __field( u64, next_dl_runtime )
+ __field( u64, next_dl_deadline )
+ __field( u64, next_dl_period )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+ __entry->prev_pid = prev->pid;
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev);
+ memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+ __entry->next_pid = next->pid;
+ __entry->next_policy = SCHED_DEADLINE;
+ __entry->next_dl_runtime = next->dl.dl_runtime;
+ __entry->next_dl_deadline = next->dl.dl_deadline;
+ __entry->next_dl_period = next->dl.dl_period;
+ ),
+
+ TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s "
+ "next_pid=%d next_policy=%s next_dl_runtime=%Lu "
+ "next_dl_deadline=%Lu next_dl_period=%Lu",
+ __entry->prev_comm, __entry->prev_pid,
+ __entry->prev_state & (TASK_STATE_MAX-1) ?
+ __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+ { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+ { 16, "Z" }, { 32, "X" }, { 64, "x" },
+ { 128, "K" }, { 256, "W" }, { 512, "P" },
+ { 1024, "N" }) : "R",
+ __entry->prev_state & TASK_STATE_MAX ? "+" : "",
+ __entry->next_comm, __entry->next_pid,
+ __print_symbolic(__entry->next_policy, SCHEDULING_POLICY),
+ __entry->next_dl_runtime, __entry->next_dl_deadline,
+ __entry->next_dl_period)
+
+);
+
+/*
* Tracepoint for a task being migrated:
*/
TRACE_EVENT(sched_migrate_task,
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread