From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751371AbdAMR2n (ORCPT ); Fri, 13 Jan 2017 12:28:43 -0500 Received: from albert.ini-tech.com ([192.99.4.57]:55690 "EHLO smtp.ini-tech.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750975AbdAMR2l (ORCPT ); Fri, 13 Jan 2017 12:28:41 -0500 X-Greylist: delayed 513 seconds by postgrey-1.27 at vger.kernel.org; Fri, 13 Jan 2017 12:28:41 EST From: Julien Desfossez To: peterz@infradead.org, rostedt@goodmis.org, tglx@linutronix.de, mingo@redhat.com, bristot@redhat.com, mathieu.desnoyers@efficios.com Cc: linux-kernel@vger.kernel.org, Julien Desfossez Subject: [RFC PATCH v3 2/2] tracing: add policy-based sched_switch events Date: Fri, 13 Jan 2017 12:19:53 -0500 Message-Id: <1484327993-5036-3-git-send-email-jdesfossez@efficios.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1484327993-5036-1-git-send-email-jdesfossez@efficios.com> References: <1484327993-5036-1-git-send-email-jdesfossez@efficios.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Add 3 new tracepoints: sched_switch_fair, sched_switch_rt and sched_switch_dl. These conditional tracepoints are emitted based on the scheduling class of the next task. Each of these tracepoint gets rid of the prio field from the original sched_switch and replaces it with fields that are relevant to the policy of the next task: - for a fair task: the nice value, - for a rt task: the nice and rt_priority values, - for a dl task: the runtime, deadline and period values. The original sched_switch event is left unmodified, so these new events can be enabled at the same time (but they are emitted consecutively so we can see a timestamp offset). Example output from the 3 new events: sched_switch_fair: prev_comm=cat prev_pid=2179 prev_state=R+ ==> next_comm=b next_pid=874 next_policy=SCHED_NORMAL next_nice=0 sched_switch_rt: prev_comm=swapper/10 prev_pid=0 prev_state=R ==> next_comm=b next_pid=2215 next_policy=SCHED_FIFO next_nice=0 next_rt_priority=100 sched_switch_dl: prev_comm=swapper/10 prev_pid=0 prev_state=R ==> next_comm=b next_pid=2215 next_policy=SCHED_DEADLINE next_dl_runtime=10000000 next_dl_deadline=30000000 next_dl_period=30000000 Cc: Peter Zijlstra Cc: Steven Rostedt (Red Hat) Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Daniel Bristot de Oliveira Reviewed-by: Mathieu Desnoyers Signed-off-by: Julien Desfossez --- include/trace/events/sched.h | 192 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9b90c57..c506ed1 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -5,9 +5,39 @@ #define _TRACE_SCHED_H #include +#include +#include #include #include +#define SCHEDULING_POLICY \ + EM( SCHED_NORMAL, "SCHED_NORMAL") \ + EM( SCHED_FIFO, "SCHED_FIFO") \ + EM( SCHED_RR, "SCHED_RR") \ + EM( SCHED_BATCH, "SCHED_BATCH") \ + EM( SCHED_IDLE, "SCHED_IDLE") \ + EMe(SCHED_DEADLINE, "SCHED_DEADLINE") + +/* + * First define the enums in the above macros to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +SCHEDULING_POLICY + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} + /* * Tracepoint for calling kthread_stop, performed to end a kthread: */ @@ -162,6 +192,168 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * ); /* + * Tracepoint for task switches, performed by the scheduler where the next + * task has a fair scheduling policy. + */ +TRACE_EVENT_MAP_COND(sched_switch, sched_switch_fair, + + TP_PROTO(bool preempt, + struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(preempt, prev, next), + + TP_CONDITION(!dl_prio(next->prio) && !rt_prio(next->prio)), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( long, prev_state ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( unsigned int, next_policy ) + __field( int, next_nice ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_state = __trace_sched_switch_state(preempt, prev); + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_policy = next->policy; + __entry->next_nice = task_nice(next); + ), + + TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s " + "next_pid=%d next_policy=%s next_nice=%d", + __entry->prev_comm, __entry->prev_pid, + __entry->prev_state & (TASK_STATE_MAX-1) ? + __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", + { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, + { 16, "Z" }, { 32, "X" }, { 64, "x" }, + { 128, "K" }, { 256, "W" }, { 512, "P" }, + { 1024, "N" }) : "R", + __entry->prev_state & TASK_STATE_MAX ? "+" : "", + __entry->next_comm, __entry->next_pid, + __print_symbolic(__entry->next_policy, SCHEDULING_POLICY), + __entry->next_nice) +); + +/* + * Tracepoint for task switches, performed by the scheduler where the next + * task has a rt scheduling policy. + */ +TRACE_EVENT_MAP_COND(sched_switch, sched_switch_rt, + + TP_PROTO(bool preempt, + struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(preempt, prev, next), + + TP_CONDITION(rt_prio(next->prio)), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( long, prev_state ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( unsigned int, next_policy ) + __field( int, next_nice ) + __field( unsigned int, next_rt_priority ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_state = __trace_sched_switch_state(preempt, prev); + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + /* + * With PI, a real RT policy might not be set and the default + * RT policy is SCHED_FIFO. + */ + __entry->next_policy = (next->policy == SCHED_RR) ? + SCHED_RR : SCHED_FIFO; + __entry->next_nice = task_nice(next); + __entry->next_rt_priority = MAX_RT_PRIO - 1 - next->prio; + ), + + TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s " + "next_pid=%d next_policy=%s next_nice=%d " + "next_rt_priority=%u", + __entry->prev_comm, __entry->prev_pid, + __entry->prev_state & (TASK_STATE_MAX-1) ? + __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", + { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, + { 16, "Z" }, { 32, "X" }, { 64, "x" }, + { 128, "K" }, { 256, "W" }, { 512, "P" }, + { 1024, "N" }) : "R", + __entry->prev_state & TASK_STATE_MAX ? "+" : "", + __entry->next_comm, __entry->next_pid, + __print_symbolic(__entry->next_policy, SCHEDULING_POLICY), + __entry->next_nice, __entry->next_rt_priority) +); + +/* + * Tracepoint for task switches, performed by the scheduler where the next + * task has a deadline scheduling policy. + */ +TRACE_EVENT_MAP_COND(sched_switch, sched_switch_dl, + + TP_PROTO(bool preempt, + struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(preempt, prev, next), + + TP_CONDITION(dl_prio(next->prio)), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( long, prev_state ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( unsigned int, next_policy ) + __field( u64, next_dl_runtime ) + __field( u64, next_dl_deadline ) + __field( u64, next_dl_period ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_state = __trace_sched_switch_state(preempt, prev); + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_policy = SCHED_DEADLINE; + __entry->next_dl_runtime = next->dl.dl_runtime; + __entry->next_dl_deadline = next->dl.dl_deadline; + __entry->next_dl_period = next->dl.dl_period; + ), + + TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s " + "next_pid=%d next_policy=%s next_dl_runtime=%Lu " + "next_dl_deadline=%Lu next_dl_period=%Lu", + __entry->prev_comm, __entry->prev_pid, + __entry->prev_state & (TASK_STATE_MAX-1) ? + __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", + { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, + { 16, "Z" }, { 32, "X" }, { 64, "x" }, + { 128, "K" }, { 256, "W" }, { 512, "P" }, + { 1024, "N" }) : "R", + __entry->prev_state & TASK_STATE_MAX ? "+" : "", + __entry->next_comm, __entry->next_pid, + __print_symbolic(__entry->next_policy, SCHEDULING_POLICY), + __entry->next_dl_runtime, __entry->next_dl_deadline, + __entry->next_dl_period) + +); + +/* * Tracepoint for a task being migrated: */ TRACE_EVENT(sched_migrate_task, -- 1.9.1