From: "tip-bot2 for Valentin Schneider" <tip-bot2@linutronix.de>
To: linux-tip-commits@vger.kernel.org
Cc: Abhijeet Dharmapurikar <adharmap@quicinc.com>,
Valentin Schneider <valentin.schneider@arm.com>,
"Peter Zijlstra (Intel)" <peterz@infradead.org>,
"Steven Rostedt (Google)" <rostedt@goodmis.org>,
x86@kernel.org, linux-kernel@vger.kernel.org
Subject: [tip: sched/core] sched/tracing: Don't re-read p->state when emitting sched_switch event
Date: Tue, 01 Mar 2022 15:24:39 -0000 [thread overview]
Message-ID: <164614827941.16921.4995078681021904041.tip-bot2@tip-bot2> (raw)
In-Reply-To: <20220120162520.570782-2-valentin.schneider@arm.com>
The following commit has been merged into the sched/core branch of tip:
Commit-ID: fa2c3254d7cfff5f7a916ab928a562d1165f17bb
Gitweb: https://git.kernel.org/tip/fa2c3254d7cfff5f7a916ab928a562d1165f17bb
Author: Valentin Schneider <valentin.schneider@arm.com>
AuthorDate: Thu, 20 Jan 2022 16:25:19
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 01 Mar 2022 16:18:39 +01:00
sched/tracing: Don't re-read p->state when emitting sched_switch event
As of commit
c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")
the following sequence becomes possible:
p->__state = TASK_INTERRUPTIBLE;
__schedule()
deactivate_task(p);
ttwu()
READ !p->on_rq
p->__state=TASK_WAKING
trace_sched_switch()
__trace_sched_switch_state()
task_state_index()
return 0;
TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in
the trace event.
Prevent this by pushing the value read from __schedule() down the trace
event.
Reported-by: Abhijeet Dharmapurikar <adharmap@quicinc.com>
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20220120162520.570782-2-valentin.schneider@arm.com
---
include/linux/sched.h | 11 ++++++++---
include/trace/events/sched.h | 11 +++++++----
kernel/sched/core.c | 4 ++--
kernel/trace/fgraph.c | 4 +++-
kernel/trace/ftrace.c | 4 +++-
kernel/trace/trace_events.c | 8 ++++++--
kernel/trace/trace_osnoise.c | 4 +++-
kernel/trace/trace_sched_switch.c | 1 +
kernel/trace/trace_sched_wakeup.c | 1 +
9 files changed, 34 insertions(+), 14 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f00132e..457c8a0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1620,10 +1620,10 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
-static inline unsigned int task_state_index(struct task_struct *tsk)
+static inline unsigned int __task_state_index(unsigned int tsk_state,
+ unsigned int tsk_exit_state)
{
- unsigned int tsk_state = READ_ONCE(tsk->__state);
- unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
+ unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT;
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
@@ -1633,6 +1633,11 @@ static inline unsigned int task_state_index(struct task_struct *tsk)
return fls(state);
}
+static inline unsigned int task_state_index(struct task_struct *tsk)
+{
+ return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state);
+}
+
static inline char task_index_to_char(unsigned int state)
{
static const char state_char[] = "RSDTtXZPI";
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9464048..65e7867 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -187,7 +187,9 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_ARGS(p));
#ifdef CREATE_TRACE_POINTS
-static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+static inline long __trace_sched_switch_state(bool preempt,
+ unsigned int prev_state,
+ struct task_struct *p)
{
unsigned int state;
@@ -208,7 +210,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
* it for left shift operation to get the correct task->state
* mapping.
*/
- state = task_state_index(p);
+ state = __task_state_index(prev_state, p->exit_state);
return state ? (1 << (state - 1)) : state;
}
@@ -220,10 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt,
+ unsigned int prev_state,
struct task_struct *prev,
struct task_struct *next),
- TP_ARGS(preempt, prev, next),
+ TP_ARGS(preempt, prev_state, prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
@@ -239,7 +242,7 @@ TRACE_EVENT(sched_switch,
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
- __entry->prev_state = __trace_sched_switch_state(preempt, prev);
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev);
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ef94612..3aafc15 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4836,7 +4836,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
{
struct rq *rq = this_rq();
struct mm_struct *mm = rq->prev_mm;
- long prev_state;
+ unsigned int prev_state;
/*
* The previous task will have left us with a preempt_count of 2
@@ -6300,7 +6300,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next);
+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 22061d3..19028e0 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -415,7 +415,9 @@ free:
static void
ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
unsigned long long timestamp;
int index;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f9feb19..6762ae0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7347,7 +7347,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
static void
ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3147614..2a19ea7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -759,7 +759,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
static void
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
@@ -783,7 +785,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
static void
event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 870a08d..1829b4c 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1167,7 +1167,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
* used to record the beginning and to report the end of a thread noise window.
*/
static void
-trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p,
+trace_sched_switch_callback(void *data, bool preempt,
+ unsigned int prev_state,
+ struct task_struct *p,
struct task_struct *n)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index e304196..993b0ed 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -22,6 +22,7 @@ static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(void *ignore, bool preempt,
+ unsigned int prev_state,
struct task_struct *prev, struct task_struct *next)
{
int flags;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 2402de5..46429f9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -426,6 +426,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
static void notrace
probe_wakeup_sched_switch(void *ignore, bool preempt,
+ unsigned int prev_state,
struct task_struct *prev, struct task_struct *next)
{
struct trace_array_cpu *data;
next prev parent reply other threads:[~2022-03-01 15:24 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-20 16:25 [PATCH v3 0/2] sched/tracing: sched_switch prev_state reported as TASK_RUNNING when it's not Valentin Schneider
2022-01-20 16:25 ` [PATCH v3 1/2] sched/tracing: Don't re-read p->state when emitting sched_switch event Valentin Schneider
2022-03-01 15:24 ` tip-bot2 for Valentin Schneider [this message]
2022-03-04 16:13 ` [tip: sched/core] " Valentin Schneider
2022-03-08 18:02 ` Qais Yousef
2022-03-08 18:10 ` Greg KH
2022-03-08 18:51 ` Qais Yousef
2022-04-09 23:38 ` Qais Yousef
2022-04-10 22:06 ` Qais Yousef
2022-04-10 23:22 ` Holger Hoffstätte
2022-04-11 7:18 ` Holger Hoffstätte
2022-04-11 7:28 ` Greg KH
2022-04-11 8:05 ` Holger Hoffstätte
2022-04-11 13:23 ` Greg KH
2022-04-11 13:22 ` Greg KH
2022-04-11 21:06 ` Qais Yousef
2022-01-20 16:25 ` [PATCH v3 2/2] sched/tracing: Report TASK_RTLOCK_WAIT tasks as TASK_UNINTERRUPTIBLE Valentin Schneider
2022-03-01 15:24 ` [tip: sched/core] " tip-bot2 for Valentin Schneider
2022-04-09 23:42 ` [PATCH v3 2/2] " Qais Yousef
2022-04-10 6:14 ` Greg KH
2022-04-10 22:13 ` Qais Yousef
2022-04-11 13:20 ` Greg KH
2022-04-11 20:18 ` Qais Yousef
2022-01-21 17:15 ` [PATCH v3 0/2] sched/tracing: sched_switch prev_state reported as TASK_RUNNING when it's not Steven Rostedt
2022-02-27 15:33 ` Peter Zijlstra
2022-04-21 22:12 ` [PATCH] sched/tracing: append prev_state to tp args instead Delyan Kratunov
2022-04-22 10:13 ` Valentin Schneider
2022-04-22 11:09 ` Peter Zijlstra
2022-04-22 15:55 ` Steven Rostedt
2022-04-22 16:54 ` Andrii Nakryiko
2022-04-22 16:37 ` Andrii Nakryiko
2022-04-22 17:22 ` Delyan Kratunov
2022-04-22 18:30 ` Alexei Starovoitov
2022-04-26 12:28 ` Peter Zijlstra
2022-04-26 14:09 ` Qais Yousef
2022-04-26 15:54 ` Andrii Nakryiko
2022-04-27 10:34 ` Qais Yousef
2022-04-27 18:17 ` Andrii Nakryiko
2022-04-27 20:32 ` Alexei Starovoitov
2022-04-28 10:02 ` Qais Yousef
2022-05-09 19:32 ` Andrii Nakryiko
2022-05-10 7:01 ` Peter Zijlstra
2022-05-10 8:29 ` Peter Zijlstra
2022-05-10 14:31 ` Steven Rostedt
2022-05-11 18:28 ` [PATCH v2] " Delyan Kratunov
2022-05-11 19:10 ` Steven Rostedt
2022-05-11 22:45 ` [tip: sched/urgent] sched/tracing: Append " tip-bot2 for Delyan Kratunov
2022-05-11 23:40 ` [PATCH v2] sched/tracing: append " Thomas Gleixner
2022-04-26 15:51 ` [PATCH] " Andrii Nakryiko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=164614827941.16921.4995078681021904041.tip-bot2@tip-bot2 \
--to=tip-bot2@linutronix.de \
--cc=adharmap@quicinc.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=valentin.schneider@arm.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).