All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
@ 2023-02-17 14:53 Sebastian Andrzej Siewior
  2023-02-22 13:36 ` Peter Zijlstra
  2023-05-25 16:52 ` Peter Zijlstra
  0 siblings, 2 replies; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-02-17 14:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ben Segall, Daniel Bristot de Oliveira, Dietmar Eggemann,
	Ingo Molnar, Juri Lelli, Mel Gorman, Peter Zijlstra,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

wait_task_inactive() waits for thread to unschedule in a certain task state.
On PREEMPT_RT that state may be stored in task_struct::saved_state while the
thread, that is being waited for, blocks on a sleeping lock and
task_struct::__state is set to TASK_RTLOCK_WAIT.
It is not possible to check only for TASK_RTLOCK_WAIT to be sure that the task
is blocked on a sleeping lock because during wake up (after the sleeping lock
has been acquired) the task state is set TASK_RUNNING. After the task in on CPU
and acquired the pi_lock it will reset the state accordingly but until then
TASK_RUNNING will be observed (with the desired state is saved in saved_state).

Check also for task_struct::saved_state if the desired match was not found in
task_struct::__state on PREEMPT_RT. If the state was found in saved_state, wait
until the task is idle and state is visible in task_struct::__state.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
---
Repost of https://lore.kernel.org/Yt%2FpQAFQ1xKNK0RY@linutronix.de

 kernel/sched/core.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 76 insertions(+), 5 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3266,6 +3266,76 @@ int migrate_swap(struct task_struct *cur
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#ifdef CONFIG_PREEMPT_RT
+
+/*
+ * Consider:
+ *
+ *  set_special_state(X);
+ *
+ *  do_things()
+ *    // Somewhere in there is an rtlock that can be contended:
+ *    current_save_and_set_rtlock_wait_state();
+ *    [...]
+ *    schedule_rtlock(); (A)
+ *    [...]
+ *    current_restore_rtlock_saved_state();
+ *
+ *  schedule(); (B)
+ *
+ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an
+ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its
+ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more
+ * work to do upon acquiring the lock in do_things() before whoever called
+ * wait_task_inactive() should return. IOW, we have to wait for:
+ *
+ *   p.saved_state = TASK_RUNNING
+ *   p.__state     = X
+ *
+ * which implies the task isn't blocked on an RT lock and got to schedule() (B).
+ *
+ * Also see comments in ttwu_state_match().
+ */
+
+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
+{
+	unsigned long flags;
+	bool mismatch;
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	if (READ_ONCE(p->__state) & match_state)
+		mismatch = false;
+	else if (READ_ONCE(p->saved_state) & match_state)
+		mismatch = false;
+	else
+		mismatch = true;
+
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+	return mismatch;
+}
+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
+					bool *wait)
+{
+	if (READ_ONCE(p->__state) & match_state)
+		return true;
+	if (READ_ONCE(p->saved_state) & match_state) {
+		*wait = true;
+		return true;
+	}
+	return false;
+}
+#else
+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
+{
+	return !(READ_ONCE(p->__state) & match_state);
+}
+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
+					bool *wait)
+{
+	return (READ_ONCE(p->__state) & match_state);
+}
+#endif
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -3284,7 +3354,7 @@ int migrate_swap(struct task_struct *cur
  */
 unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
 {
-	int running, queued;
+	bool running, wait;
 	struct rq_flags rf;
 	unsigned long ncsw;
 	struct rq *rq;
@@ -3310,7 +3380,7 @@ unsigned long wait_task_inactive(struct
 		 * is actually now running somewhere else!
 		 */
 		while (task_on_cpu(rq, p)) {
-			if (!(READ_ONCE(p->__state) & match_state))
+			if (state_mismatch(p, match_state))
 				return 0;
 			cpu_relax();
 		}
@@ -3323,9 +3393,10 @@ unsigned long wait_task_inactive(struct
 		rq = task_rq_lock(p, &rf);
 		trace_sched_wait_task(p);
 		running = task_on_cpu(rq, p);
-		queued = task_on_rq_queued(p);
+		wait = task_on_rq_queued(p);
 		ncsw = 0;
-		if (READ_ONCE(p->__state) & match_state)
+
+		if (state_match(p, match_state, &wait))
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
 		task_rq_unlock(rq, p, &rf);
 
@@ -3355,7 +3426,7 @@ unsigned long wait_task_inactive(struct
 		 * running right now), it's preempted, and we should
 		 * yield - it could be a while.
 		 */
-		if (unlikely(queued)) {
+		if (unlikely(wait)) {
 			ktime_t to = NSEC_PER_SEC / HZ;
 
 			set_current_state(TASK_UNINTERRUPTIBLE);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-02-17 14:53 [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive() Sebastian Andrzej Siewior
@ 2023-02-22 13:36 ` Peter Zijlstra
  2023-02-23 16:53   ` Sebastian Andrzej Siewior
  2023-05-25 16:52 ` Peter Zijlstra
  1 sibling, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2023-02-22 13:36 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On Fri, Feb 17, 2023 at 03:53:02PM +0100, Sebastian Andrzej Siewior wrote:
> wait_task_inactive() waits for thread to unschedule in a certain task state.
> On PREEMPT_RT that state may be stored in task_struct::saved_state while the
> thread, that is being waited for, blocks on a sleeping lock and
> task_struct::__state is set to TASK_RTLOCK_WAIT.
> It is not possible to check only for TASK_RTLOCK_WAIT to be sure that the task
> is blocked on a sleeping lock because during wake up (after the sleeping lock
> has been acquired) the task state is set TASK_RUNNING. After the task in on CPU
> and acquired the pi_lock it will reset the state accordingly but until then
> TASK_RUNNING will be observed (with the desired state is saved in saved_state).
> 
> Check also for task_struct::saved_state if the desired match was not found in
> task_struct::__state on PREEMPT_RT. If the state was found in saved_state, wait
> until the task is idle and state is visible in task_struct::__state.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Reviewed-by: Valentin Schneider <vschneid@redhat.com>
> ---

Which if the very few wait_task_inactive() users requires this?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-02-22 13:36 ` Peter Zijlstra
@ 2023-02-23 16:53   ` Sebastian Andrzej Siewior
  2023-03-29 13:33     ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-02-23 16:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On 2023-02-22 14:36:14 [+0100], Peter Zijlstra wrote:
> Which if the very few wait_task_inactive() users requires this?

ptrace is the remaining (known) one (just verified on v6.2-rt3).
ptrace_check_attach() waits for the child which blocks on tasklist_lock.

tglx argued that wait_task_inactive() should work regardless of the
task, that is being waited for, blocks on a sleeping lock.

Sebastian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-02-23 16:53   ` Sebastian Andrzej Siewior
@ 2023-03-29 13:33     ` Sebastian Andrzej Siewior
  2023-05-24 14:59       ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-03-29 13:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On 2023-02-23 17:53:48 [+0100], To Peter Zijlstra wrote:
> On 2023-02-22 14:36:14 [+0100], Peter Zijlstra wrote:
> > Which if the very few wait_task_inactive() users requires this?
> 
> ptrace is the remaining (known) one (just verified on v6.2-rt3).
> ptrace_check_attach() waits for the child which blocks on tasklist_lock.
> 
> tglx argued that wait_task_inactive() should work regardless of the
> task, that is being waited for, blocks on a sleeping lock.

a polite ping.

Sebastian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-03-29 13:33     ` Sebastian Andrzej Siewior
@ 2023-05-24 14:59       ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-05-24 14:59 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On 2023-03-29 15:33:39 [+0200], To Peter Zijlstra wrote:
> On 2023-02-23 17:53:48 [+0100], To Peter Zijlstra wrote:
> > On 2023-02-22 14:36:14 [+0100], Peter Zijlstra wrote:
> > > Which if the very few wait_task_inactive() users requires this?
> > 
> > ptrace is the remaining (known) one (just verified on v6.2-rt3).
> > ptrace_check_attach() waits for the child which blocks on tasklist_lock.
> > 
> > tglx argued that wait_task_inactive() should work regardless of the
> > task, that is being waited for, blocks on a sleeping lock.
> 
> a polite ping.

a very polity ping.

Sebastian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-02-17 14:53 [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive() Sebastian Andrzej Siewior
  2023-02-22 13:36 ` Peter Zijlstra
@ 2023-05-25 16:52 ` Peter Zijlstra
  2023-05-26  8:05   ` Peter Zijlstra
  2023-05-26  8:47   ` [PATCH] " Sebastian Andrzej Siewior
  1 sibling, 2 replies; 16+ messages in thread
From: Peter Zijlstra @ 2023-05-25 16:52 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On Fri, Feb 17, 2023 at 03:53:02PM +0100, Sebastian Andrzej Siewior wrote:

> +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
> +{
> +	unsigned long flags;
> +	bool mismatch;
> +
> +	raw_spin_lock_irqsave(&p->pi_lock, flags);
> +	if (READ_ONCE(p->__state) & match_state)
> +		mismatch = false;
> +	else if (READ_ONCE(p->saved_state) & match_state)
> +		mismatch = false;
> +	else
> +		mismatch = true;
> +
> +	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> +	return mismatch;
> +}
> +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
> +					bool *wait)
> +{
> +	if (READ_ONCE(p->__state) & match_state)
> +		return true;
> +	if (READ_ONCE(p->saved_state) & match_state) {
> +		*wait = true;
> +		return true;
> +	}
> +	return false;
> +}
> +#else
> +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
> +{
> +	return !(READ_ONCE(p->__state) & match_state);
> +}
> +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
> +					bool *wait)
> +{
> +	return (READ_ONCE(p->__state) & match_state);
> +}
> +#endif
> +
>  /*
>   * wait_task_inactive - wait for a thread to unschedule.
>   *

Urgh...

I've ended up with the below.. I've tried folding it with
ttwu_state_match() but every attempt so far makes it an unholy mess.

Now, if only we had proper lock guard then we could drop another few
lines, but alas.

---
 kernel/sched/core.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a68d1276bab0..5a106629a98d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3341,6 +3341,37 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+static __always_inline
+bool __wti_state_match(struct task_struct *p, unsigned int state, int *queued)
+{
+	if (READ_ONCE(p->__state) & state)
+		return true;
+
+#ifdef CONFIG_PREEMPT_RT
+	if (READ_ONCE(p->saved_state) & state) {
+		if (queued)
+			*queued = 1;
+		return true;
+	}
+#endif
+	return false;
+}
+
+static __always_inline bool wti_state_match(struct task_struct *p, unsigned int state)
+{
+#ifdef CONFIG_PREEMPT_RT
+	bool match;
+
+	raw_spin_lock_irq(&p->pi_lock);
+	match = __wti_state_match(p, state, NULL);
+	raw_spin_unlock_irq(&p->pi_lock);
+
+	return match;
+#else
+	return __wti_state_match(p, state, NULL);
+#endif
+}
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -3385,7 +3416,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
 		 * is actually now running somewhere else!
 		 */
 		while (task_on_cpu(rq, p)) {
-			if (!(READ_ONCE(p->__state) & match_state))
+			if (!wti_state_match(p, match_state))
 				return 0;
 			cpu_relax();
 		}
@@ -3400,7 +3431,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
 		running = task_on_cpu(rq, p);
 		queued = task_on_rq_queued(p);
 		ncsw = 0;
-		if (READ_ONCE(p->__state) & match_state)
+		if (__wti_state_match(p, match_state, &queued))
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
 		task_rq_unlock(rq, p, &rf);
 

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-05-25 16:52 ` Peter Zijlstra
@ 2023-05-26  8:05   ` Peter Zijlstra
  2023-05-26 15:13     ` Sebastian Andrzej Siewior
  2023-05-26  8:47   ` [PATCH] " Sebastian Andrzej Siewior
  1 sibling, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2023-05-26  8:05 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On Thu, May 25, 2023 at 06:52:44PM +0200, Peter Zijlstra wrote:
> On Fri, Feb 17, 2023 at 03:53:02PM +0100, Sebastian Andrzej Siewior wrote:
> 
> > +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
> > +{
> > +	unsigned long flags;
> > +	bool mismatch;
> > +
> > +	raw_spin_lock_irqsave(&p->pi_lock, flags);
> > +	if (READ_ONCE(p->__state) & match_state)
> > +		mismatch = false;
> > +	else if (READ_ONCE(p->saved_state) & match_state)
> > +		mismatch = false;
> > +	else
> > +		mismatch = true;
> > +
> > +	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> > +	return mismatch;
> > +}
> > +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
> > +					bool *wait)
> > +{
> > +	if (READ_ONCE(p->__state) & match_state)
> > +		return true;
> > +	if (READ_ONCE(p->saved_state) & match_state) {
> > +		*wait = true;
> > +		return true;
> > +	}
> > +	return false;
> > +}
> > +#else
> > +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
> > +{
> > +	return !(READ_ONCE(p->__state) & match_state);
> > +}
> > +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
> > +					bool *wait)
> > +{
> > +	return (READ_ONCE(p->__state) & match_state);
> > +}
> > +#endif
> > +
> >  /*
> >   * wait_task_inactive - wait for a thread to unschedule.
> >   *
> 
> Urgh...
> 
> I've ended up with the below.. I've tried folding it with
> ttwu_state_match() but every attempt so far makes it an unholy mess.
> 
> Now, if only we had proper lock guard then we could drop another few
> lines, but alas.

New day, new chances... How's this? Code-gen doesn't look totally
insane, but then, making sense of an optimizing compiler's output is
always a wee challenge.

---
 kernel/sched/core.c | 55 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a68d1276bab0..d89610fffd23 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3341,6 +3341,35 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+static __always_inline
+int __task_state_match(struct task_struct *p, unsigned int state)
+{
+	if (READ_ONCE(p->__state) & state)
+		return 1;
+
+#ifdef CONFIG_PREEMPT_RT
+	if (READ_ONCE(p->saved_state) & state)
+		return -1;
+#endif
+	return 0;
+}
+
+static __always_inline
+int task_state_match(struct task_struct *p, unsigned int state)
+{
+#ifdef CONFIG_PREEMPT_RT
+	int match;
+
+	raw_spin_lock_irq(&p->pi_lock);
+	match = __task_state_match(p, state);
+	raw_spin_unlock_irq(&p->pi_lock);
+
+	return match;
+#else
+	return __task_state_match(p, state);
+#endif
+}
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -3359,7 +3388,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
  */
 unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
 {
-	int running, queued;
+	int running, queued, match;
 	struct rq_flags rf;
 	unsigned long ncsw;
 	struct rq *rq;
@@ -3385,7 +3414,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
 		 * is actually now running somewhere else!
 		 */
 		while (task_on_cpu(rq, p)) {
-			if (!(READ_ONCE(p->__state) & match_state))
+			if (!task_state_match(p, match_state))
 				return 0;
 			cpu_relax();
 		}
@@ -3400,8 +3429,15 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
 		running = task_on_cpu(rq, p);
 		queued = task_on_rq_queued(p);
 		ncsw = 0;
-		if (READ_ONCE(p->__state) & match_state)
+		if ((match = __task_state_match(p, match_state))) {
+			/*
+			 * When matching on p->saved_state, consider this task
+			 * still queued so it will wait.
+			 */
+			if (match < 0)
+				queued = 1;
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+		}
 		task_rq_unlock(rq, p, &rf);
 
 		/*
@@ -4003,15 +4039,14 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 static __always_inline
 bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
 {
+	int match;
+
 	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
 		WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
 			     state != TASK_RTLOCK_WAIT);
 	}
 
-	if (READ_ONCE(p->__state) & state) {
-		*success = 1;
-		return true;
-	}
+	*success = !!(match = __task_state_match(p, state));
 
 #ifdef CONFIG_PREEMPT_RT
 	/*
@@ -4027,12 +4062,10 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
 	 * p::saved_state to TASK_RUNNING so any further tests will
 	 * not result in false positives vs. @success
 	 */
-	if (p->saved_state & state) {
+	if (match < 0)
 		p->saved_state = TASK_RUNNING;
-		*success = 1;
-	}
 #endif
-	return false;
+	return match > 0;
 }
 
 /*

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-05-25 16:52 ` Peter Zijlstra
  2023-05-26  8:05   ` Peter Zijlstra
@ 2023-05-26  8:47   ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-05-26  8:47 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On 2023-05-25 18:52:44 [+0200], Peter Zijlstra wrote:
> Urgh...
> 
> I've ended up with the below.. I've tried folding it with
> ttwu_state_match() but every attempt so far makes it an unholy mess.
> 
> Now, if only we had proper lock guard then we could drop another few
> lines, but alas.

perfect, thank you.
Tested the bits.

Sebastian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-05-26  8:05   ` Peter Zijlstra
@ 2023-05-26 15:13     ` Sebastian Andrzej Siewior
  2023-06-01  9:12       ` Peter Zijlstra
  0 siblings, 1 reply; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-05-26 15:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On 2023-05-26 10:05:43 [+0200], Peter Zijlstra wrote:
> New day, new chances... How's this? Code-gen doesn't look totally
> insane, but then, making sense of an optimizing compiler's output is
> always a wee challenge.

Noticed it too late but looks good. Tested, works.

Sebastian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-05-26 15:13     ` Sebastian Andrzej Siewior
@ 2023-06-01  9:12       ` Peter Zijlstra
  2023-06-02  8:25         ` Peter Zijlstra
  2023-06-05 19:16         ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
  0 siblings, 2 replies; 16+ messages in thread
From: Peter Zijlstra @ 2023-06-01  9:12 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On Fri, May 26, 2023 at 05:13:35PM +0200, Sebastian Andrzej Siewior wrote:
> On 2023-05-26 10:05:43 [+0200], Peter Zijlstra wrote:
> > New day, new chances... How's this? Code-gen doesn't look totally
> > insane, but then, making sense of an optimizing compiler's output is
> > always a wee challenge.
> 
> Noticed it too late but looks good. Tested, works.

Excellent; full patch below. Will go stick in tip/sched/core soonish.

---
Subject: sched: Consider task_struct::saved_state in wait_task_inactive()
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed May 31 16:39:07 CEST 2023

With the introduction of task_struct::saved_state in commit
5f220be21418 ("sched/wakeup: Prepare for RT sleeping spin/rwlocks")
matching the task state has gotten more complicated. That same commit
changed try_to_wake_up() to consider both states, but
wait_task_inactive() has been neglected.

Sebastian noted that the wait_task_inactive() usage in
ptrace_check_attach() can misbehave when ptrace_stop() is blocked on
the tasklist_lock after it sets TASK_TRACED.

Therefore extract a common helper from ttwu_state_match() and use that
to teach wait_task_inactive() about the PREEMPT_RT locks.

Originally-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 kernel/sched/core.c |   59 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 11 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3341,6 +3341,39 @@ int migrate_swap(struct task_struct *cur
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+static __always_inline
+int __task_state_match(struct task_struct *p, unsigned int state)
+{
+	if (READ_ONCE(p->__state) & state)
+		return 1;
+
+#ifdef CONFIG_PREEMPT_RT
+	if (READ_ONCE(p->saved_state) & state)
+		return -1;
+#endif
+	return 0;
+}
+
+static __always_inline
+int task_state_match(struct task_struct *p, unsigned int state)
+{
+#ifdef CONFIG_PREEMPT_RT
+	int match;
+
+	/*
+	 * Serialize against current_save_and_set_rtlock_wait_state() and
+	 * current_restore_rtlock_saved_state().
+	 */
+	raw_spin_lock_irq(&p->pi_lock);
+	match = __task_state_match(p, state);
+	raw_spin_unlock_irq(&p->pi_lock);
+
+	return match;
+#else
+	return __task_state_match(p, state);
+#endif
+}
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -3359,7 +3392,7 @@ int migrate_swap(struct task_struct *cur
  */
 unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
 {
-	int running, queued;
+	int running, queued, match;
 	struct rq_flags rf;
 	unsigned long ncsw;
 	struct rq *rq;
@@ -3385,7 +3418,7 @@ unsigned long wait_task_inactive(struct
 		 * is actually now running somewhere else!
 		 */
 		while (task_on_cpu(rq, p)) {
-			if (!(READ_ONCE(p->__state) & match_state))
+			if (!task_state_match(p, match_state))
 				return 0;
 			cpu_relax();
 		}
@@ -3400,8 +3433,15 @@ unsigned long wait_task_inactive(struct
 		running = task_on_cpu(rq, p);
 		queued = task_on_rq_queued(p);
 		ncsw = 0;
-		if (READ_ONCE(p->__state) & match_state)
+		if ((match = __task_state_match(p, match_state))) {
+			/*
+			 * When matching on p->saved_state, consider this task
+			 * still queued so it will wait.
+			 */
+			if (match < 0)
+				queued = 1;
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+		}
 		task_rq_unlock(rq, p, &rf);
 
 		/*
@@ -4003,15 +4043,14 @@ static void ttwu_queue(struct task_struc
 static __always_inline
 bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
 {
+	int match;
+
 	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
 		WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
 			     state != TASK_RTLOCK_WAIT);
 	}
 
-	if (READ_ONCE(p->__state) & state) {
-		*success = 1;
-		return true;
-	}
+	*success = !!(match = __task_state_match(p, state));
 
 #ifdef CONFIG_PREEMPT_RT
 	/*
@@ -4027,12 +4066,10 @@ bool ttwu_state_match(struct task_struct
 	 * p::saved_state to TASK_RUNNING so any further tests will
 	 * not result in false positives vs. @success
 	 */
-	if (p->saved_state & state) {
+	if (match < 0)
 		p->saved_state = TASK_RUNNING;
-		*success = 1;
-	}
 #endif
-	return false;
+	return match > 0;
 }
 
 /*

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-06-01  9:12       ` Peter Zijlstra
@ 2023-06-02  8:25         ` Peter Zijlstra
  2023-06-02 10:37           ` Peter Zijlstra
  2023-06-05 19:16         ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
  1 sibling, 1 reply; 16+ messages in thread
From: Peter Zijlstra @ 2023-06-02  8:25 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On Thu, Jun 01, 2023 at 11:12:34AM +0200, Peter Zijlstra wrote:
> On Fri, May 26, 2023 at 05:13:35PM +0200, Sebastian Andrzej Siewior wrote:
> > On 2023-05-26 10:05:43 [+0200], Peter Zijlstra wrote:
> > > New day, new chances... How's this? Code-gen doesn't look totally
> > > insane, but then, making sense of an optimizing compiler's output is
> > > always a wee challenge.
> > 
> > Noticed it too late but looks good. Tested, works.
> 
> Excellent; full patch below. Will go stick in tip/sched/core soonish.

Urgh, so robot kicked me for breaking !SMP. And that made me realize
that UP wait_task_inactive() is broken on PREEMPT_RT.

Let me figure out what best to do about that..

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-06-02  8:25         ` Peter Zijlstra
@ 2023-06-02 10:37           ` Peter Zijlstra
  2023-06-02 10:49             ` Sebastian Andrzej Siewior
  2023-06-05 16:15             ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 16+ messages in thread
From: Peter Zijlstra @ 2023-06-02 10:37 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On Fri, Jun 02, 2023 at 10:25:03AM +0200, Peter Zijlstra wrote:
> On Thu, Jun 01, 2023 at 11:12:34AM +0200, Peter Zijlstra wrote:
> > On Fri, May 26, 2023 at 05:13:35PM +0200, Sebastian Andrzej Siewior wrote:
> > > On 2023-05-26 10:05:43 [+0200], Peter Zijlstra wrote:
> > > > New day, new chances... How's this? Code-gen doesn't look totally
> > > > insane, but then, making sense of an optimizing compiler's output is
> > > > always a wee challenge.
> > > 
> > > Noticed it too late but looks good. Tested, works.
> > 
> > Excellent; full patch below. Will go stick in tip/sched/core soonish.
> 
> Urgh, so robot kicked me for breaking !SMP. And that made me realize
> that UP wait_task_inactive() is broken on PREEMPT_RT.
> 
> Let me figure out what best to do about that..

I'll stick this in front -- see what happens ;-)

---
Subject: sched: Unconditionally use full-fat wait_task_inactive()
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri Jun  2 10:42:53 CEST 2023

While modifying wait_task_inactive() for PREEMPT_RT; the build robot
noted that UP got broken. This led to audit and consideration of the
UP implementation of wait_task_inactive().

It looks like the UP implementation is also broken for PREEMPT;
consider task_current_syscall() getting preempted between the two
calls to wait_task_inactive().

Therefore move the wait_task_inactive() implementation out of
CONFIG_SMP and unconditionally use it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched.h |    7 -
 kernel/sched/core.c   |  216 +++++++++++++++++++++++++-------------------------
 2 files changed, 110 insertions(+), 113 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2006,15 +2006,12 @@ static __always_inline void scheduler_ip
 	 */
 	preempt_fold_need_resched();
 }
-extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
 #else
 static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
-{
-	return 1;
-}
 #endif
 
+extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
+
 /*
  * Set thread flags in other task's structures.
  * See asm/thread_info.h for TIF_xxxx flags available:
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2213,6 +2213,114 @@ void check_preempt_curr(struct rq *rq, s
 		rq_clock_skip_update(rq);
 }
 
+/*
+ * wait_task_inactive - wait for a thread to unschedule.
+ *
+ * Wait for the thread to block in any of the states set in @match_state.
+ * If it changes, i.e. @p might have woken up, then return zero.  When we
+ * succeed in waiting for @p to be off its CPU, we return a positive number
+ * (its total switch count).  If a second call a short while later returns the
+ * same number, the caller can be sure that @p has remained unscheduled the
+ * whole time.
+ *
+ * The caller must ensure that the task *will* unschedule sometime soon,
+ * else this function might spin for a *long* time. This function can't
+ * be called with interrupts off, or it may introduce deadlock with
+ * smp_call_function() if an IPI is sent by the same process we are
+ * waiting to become inactive.
+ */
+unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
+{
+	int running, queued;
+	struct rq_flags rf;
+	unsigned long ncsw;
+	struct rq *rq;
+
+	for (;;) {
+		/*
+		 * We do the initial early heuristics without holding
+		 * any task-queue locks at all. We'll only try to get
+		 * the runqueue lock when things look like they will
+		 * work out!
+		 */
+		rq = task_rq(p);
+
+		/*
+		 * If the task is actively running on another CPU
+		 * still, just relax and busy-wait without holding
+		 * any locks.
+		 *
+		 * NOTE! Since we don't hold any locks, it's not
+		 * even sure that "rq" stays as the right runqueue!
+		 * But we don't care, since "task_on_cpu()" will
+		 * return false if the runqueue has changed and p
+		 * is actually now running somewhere else!
+		 */
+		while (task_on_cpu(rq, p)) {
+			if (!(READ_ONCE(p->__state) & match_state))
+				return 0;
+			cpu_relax();
+		}
+
+		/*
+		 * Ok, time to look more closely! We need the rq
+		 * lock now, to be *sure*. If we're wrong, we'll
+		 * just go back and repeat.
+		 */
+		rq = task_rq_lock(p, &rf);
+		trace_sched_wait_task(p);
+		running = task_on_cpu(rq, p);
+		queued = task_on_rq_queued(p);
+		ncsw = 0;
+		if (READ_ONCE(p->__state) & match_state)
+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+		task_rq_unlock(rq, p, &rf);
+
+		/*
+		 * If it changed from the expected state, bail out now.
+		 */
+		if (unlikely(!ncsw))
+			break;
+
+		/*
+		 * Was it really running after all now that we
+		 * checked with the proper locks actually held?
+		 *
+		 * Oops. Go back and try again..
+		 */
+		if (unlikely(running)) {
+			cpu_relax();
+			continue;
+		}
+
+		/*
+		 * It's not enough that it's not actively running,
+		 * it must be off the runqueue _entirely_, and not
+		 * preempted!
+		 *
+		 * So if it was still runnable (but just not actively
+		 * running right now), it's preempted, and we should
+		 * yield - it could be a while.
+		 */
+		if (unlikely(queued)) {
+			ktime_t to = NSEC_PER_SEC / HZ;
+
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
+			continue;
+		}
+
+		/*
+		 * Ahh, all good. It wasn't running, and it wasn't
+		 * runnable, which means that it will never become
+		 * running in the future either. We're all done!
+		 */
+		break;
+	}
+
+	return ncsw;
+}
+
 #ifdef CONFIG_SMP
 
 static void
@@ -3341,114 +3449,6 @@ int migrate_swap(struct task_struct *cur
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
-/*
- * wait_task_inactive - wait for a thread to unschedule.
- *
- * Wait for the thread to block in any of the states set in @match_state.
- * If it changes, i.e. @p might have woken up, then return zero.  When we
- * succeed in waiting for @p to be off its CPU, we return a positive number
- * (its total switch count).  If a second call a short while later returns the
- * same number, the caller can be sure that @p has remained unscheduled the
- * whole time.
- *
- * The caller must ensure that the task *will* unschedule sometime soon,
- * else this function might spin for a *long* time. This function can't
- * be called with interrupts off, or it may introduce deadlock with
- * smp_call_function() if an IPI is sent by the same process we are
- * waiting to become inactive.
- */
-unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
-{
-	int running, queued;
-	struct rq_flags rf;
-	unsigned long ncsw;
-	struct rq *rq;
-
-	for (;;) {
-		/*
-		 * We do the initial early heuristics without holding
-		 * any task-queue locks at all. We'll only try to get
-		 * the runqueue lock when things look like they will
-		 * work out!
-		 */
-		rq = task_rq(p);
-
-		/*
-		 * If the task is actively running on another CPU
-		 * still, just relax and busy-wait without holding
-		 * any locks.
-		 *
-		 * NOTE! Since we don't hold any locks, it's not
-		 * even sure that "rq" stays as the right runqueue!
-		 * But we don't care, since "task_on_cpu()" will
-		 * return false if the runqueue has changed and p
-		 * is actually now running somewhere else!
-		 */
-		while (task_on_cpu(rq, p)) {
-			if (!(READ_ONCE(p->__state) & match_state))
-				return 0;
-			cpu_relax();
-		}
-
-		/*
-		 * Ok, time to look more closely! We need the rq
-		 * lock now, to be *sure*. If we're wrong, we'll
-		 * just go back and repeat.
-		 */
-		rq = task_rq_lock(p, &rf);
-		trace_sched_wait_task(p);
-		running = task_on_cpu(rq, p);
-		queued = task_on_rq_queued(p);
-		ncsw = 0;
-		if (READ_ONCE(p->__state) & match_state)
-			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-		task_rq_unlock(rq, p, &rf);
-
-		/*
-		 * If it changed from the expected state, bail out now.
-		 */
-		if (unlikely(!ncsw))
-			break;
-
-		/*
-		 * Was it really running after all now that we
-		 * checked with the proper locks actually held?
-		 *
-		 * Oops. Go back and try again..
-		 */
-		if (unlikely(running)) {
-			cpu_relax();
-			continue;
-		}
-
-		/*
-		 * It's not enough that it's not actively running,
-		 * it must be off the runqueue _entirely_, and not
-		 * preempted!
-		 *
-		 * So if it was still runnable (but just not actively
-		 * running right now), it's preempted, and we should
-		 * yield - it could be a while.
-		 */
-		if (unlikely(queued)) {
-			ktime_t to = NSEC_PER_SEC / HZ;
-
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
-			continue;
-		}
-
-		/*
-		 * Ahh, all good. It wasn't running, and it wasn't
-		 * runnable, which means that it will never become
-		 * running in the future either. We're all done!
-		 */
-		break;
-	}
-
-	return ncsw;
-}
-
 /***
  * kick_process - kick a running thread to enter/exit the kernel
  * @p: the to-be-kicked thread

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-06-02 10:37           ` Peter Zijlstra
@ 2023-06-02 10:49             ` Sebastian Andrzej Siewior
  2023-06-02 11:18               ` Peter Zijlstra
  2023-06-05 16:15             ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-06-02 10:49 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On 2023-06-02 12:37:31 [+0200], Peter Zijlstra wrote:
> ---
> Subject: sched: Unconditionally use full-fat wait_task_inactive()
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Fri Jun  2 10:42:53 CEST 2023
> 
> While modifying wait_task_inactive() for PREEMPT_RT; the build robot
> noted that UP got broken. This led to audit and consideration of the
> UP implementation of wait_task_inactive().
> 
> It looks like the UP implementation is also broken for PREEMPT;

If UP is broken for PREEMPT, shouldn't it get a fixes or stable tag?

Eitherway, I will try to stuff this in RT today and give feedback. I
actually never booted this on UP, will try to do so today…

Sebastian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-06-02 10:49             ` Sebastian Andrzej Siewior
@ 2023-06-02 11:18               ` Peter Zijlstra
  0 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2023-06-02 11:18 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On Fri, Jun 02, 2023 at 12:49:58PM +0200, Sebastian Andrzej Siewior wrote:
> On 2023-06-02 12:37:31 [+0200], Peter Zijlstra wrote:
> > ---
> > Subject: sched: Unconditionally use full-fat wait_task_inactive()
> > From: Peter Zijlstra <peterz@infradead.org>
> > Date: Fri Jun  2 10:42:53 CEST 2023
> > 
> > While modifying wait_task_inactive() for PREEMPT_RT; the build robot
> > noted that UP got broken. This led to audit and consideration of the
> > UP implementation of wait_task_inactive().
> > 
> > It looks like the UP implementation is also broken for PREEMPT;
> 
> If UP is broken for PREEMPT, shouldn't it get a fixes or stable tag?

It has been broken *forever*, I don't think we need to 'rush' a fix.
Also, I don't think anybody actually uses a UP+PREEMPT kernel much, but
what do I know.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().
  2023-06-02 10:37           ` Peter Zijlstra
  2023-06-02 10:49             ` Sebastian Andrzej Siewior
@ 2023-06-05 16:15             ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 16+ messages in thread
From: Sebastian Andrzej Siewior @ 2023-06-05 16:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Ben Segall, Daniel Bristot de Oliveira,
	Dietmar Eggemann, Ingo Molnar, Juri Lelli, Mel Gorman,
	Steven Rostedt, Thomas Gleixner, Valentin Schneider,
	Vincent Guittot

On 2023-06-02 12:37:31 [+0200], Peter Zijlstra wrote:
> I'll stick this in front -- see what happens ;-)

Tested this with the previous one. All good.

Sebastian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [tip: sched/core] sched: Consider task_struct::saved_state in wait_task_inactive()
  2023-06-01  9:12       ` Peter Zijlstra
  2023-06-02  8:25         ` Peter Zijlstra
@ 2023-06-05 19:16         ` tip-bot2 for Peter Zijlstra
  1 sibling, 0 replies; 16+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-06-05 19:16 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Sebastian Andrzej Siewior, Peter Zijlstra (Intel), x86, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     1c06918788e8ae6e69e4381a2806617312922524
Gitweb:        https://git.kernel.org/tip/1c06918788e8ae6e69e4381a2806617312922524
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 31 May 2023 16:39:07 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 05 Jun 2023 21:11:03 +02:00

sched: Consider task_struct::saved_state in wait_task_inactive()

With the introduction of task_struct::saved_state in commit
5f220be21418 ("sched/wakeup: Prepare for RT sleeping spin/rwlocks")
matching the task state has gotten more complicated. That same commit
changed try_to_wake_up() to consider both states, but
wait_task_inactive() has been neglected.

Sebastian noted that the wait_task_inactive() usage in
ptrace_check_attach() can misbehave when ptrace_stop() is blocked on
the tasklist_lock after it sets TASK_TRACED.

Therefore extract a common helper from ttwu_state_match() and use that
to teach wait_task_inactive() about the PREEMPT_RT locks.

Originally-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20230601091234.GW83892@hirez.programming.kicks-ass.net
---
 kernel/sched/core.c | 59 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 810cf7d..ac38225 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2213,6 +2213,39 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 		rq_clock_skip_update(rq);
 }
 
+static __always_inline
+int __task_state_match(struct task_struct *p, unsigned int state)
+{
+	if (READ_ONCE(p->__state) & state)
+		return 1;
+
+#ifdef CONFIG_PREEMPT_RT
+	if (READ_ONCE(p->saved_state) & state)
+		return -1;
+#endif
+	return 0;
+}
+
+static __always_inline
+int task_state_match(struct task_struct *p, unsigned int state)
+{
+#ifdef CONFIG_PREEMPT_RT
+	int match;
+
+	/*
+	 * Serialize against current_save_and_set_rtlock_wait_state() and
+	 * current_restore_rtlock_saved_state().
+	 */
+	raw_spin_lock_irq(&p->pi_lock);
+	match = __task_state_match(p, state);
+	raw_spin_unlock_irq(&p->pi_lock);
+
+	return match;
+#else
+	return __task_state_match(p, state);
+#endif
+}
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -2231,7 +2264,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
  */
 unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
 {
-	int running, queued;
+	int running, queued, match;
 	struct rq_flags rf;
 	unsigned long ncsw;
 	struct rq *rq;
@@ -2257,7 +2290,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
 		 * is actually now running somewhere else!
 		 */
 		while (task_on_cpu(rq, p)) {
-			if (!(READ_ONCE(p->__state) & match_state))
+			if (!task_state_match(p, match_state))
 				return 0;
 			cpu_relax();
 		}
@@ -2272,8 +2305,15 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
 		running = task_on_cpu(rq, p);
 		queued = task_on_rq_queued(p);
 		ncsw = 0;
-		if (READ_ONCE(p->__state) & match_state)
+		if ((match = __task_state_match(p, match_state))) {
+			/*
+			 * When matching on p->saved_state, consider this task
+			 * still queued so it will wait.
+			 */
+			if (match < 0)
+				queued = 1;
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+		}
 		task_rq_unlock(rq, p, &rf);
 
 		/*
@@ -4003,15 +4043,14 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 static __always_inline
 bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
 {
+	int match;
+
 	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
 		WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
 			     state != TASK_RTLOCK_WAIT);
 	}
 
-	if (READ_ONCE(p->__state) & state) {
-		*success = 1;
-		return true;
-	}
+	*success = !!(match = __task_state_match(p, state));
 
 #ifdef CONFIG_PREEMPT_RT
 	/*
@@ -4027,12 +4066,10 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
 	 * p::saved_state to TASK_RUNNING so any further tests will
 	 * not result in false positives vs. @success
 	 */
-	if (p->saved_state & state) {
+	if (match < 0)
 		p->saved_state = TASK_RUNNING;
-		*success = 1;
-	}
 #endif
-	return false;
+	return match > 0;
 }
 
 /*

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-06-05 19:17 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-17 14:53 [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive() Sebastian Andrzej Siewior
2023-02-22 13:36 ` Peter Zijlstra
2023-02-23 16:53   ` Sebastian Andrzej Siewior
2023-03-29 13:33     ` Sebastian Andrzej Siewior
2023-05-24 14:59       ` Sebastian Andrzej Siewior
2023-05-25 16:52 ` Peter Zijlstra
2023-05-26  8:05   ` Peter Zijlstra
2023-05-26 15:13     ` Sebastian Andrzej Siewior
2023-06-01  9:12       ` Peter Zijlstra
2023-06-02  8:25         ` Peter Zijlstra
2023-06-02 10:37           ` Peter Zijlstra
2023-06-02 10:49             ` Sebastian Andrzej Siewior
2023-06-02 11:18               ` Peter Zijlstra
2023-06-05 16:15             ` Sebastian Andrzej Siewior
2023-06-05 19:16         ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-05-26  8:47   ` [PATCH] " Sebastian Andrzej Siewior

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.