All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: rjw@rjwysocki.net, oleg@redhat.com, mingo@kernel.org,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, mgorman@suse.de,
	Will Deacon <will@kernel.org>
Cc: linux-kernel@vger.kernel.org, tj@kernel.org, linux-pm@vger.kernel.org
Subject: Re: [PATCH v3 6/6] freezer,sched: Rewrite core freezer logic
Date: Mon, 18 Oct 2021 15:36:50 +0200	[thread overview]
Message-ID: <YW14cgrrRWSAzmYn@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20211009101445.151888246@infradead.org>

On Sat, Oct 09, 2021 at 12:08:00PM +0200, Peter Zijlstra wrote:

> +static inline unsigned int __can_freeze(struct task_struct *p)
> +{
> +	unsigned int state = READ_ONCE(p->__state);
> +
> +	if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
> +		return 0;
> +
> +	/*
> +	 * Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
> +	 * can suffer spurious wakeups.
> +	 */
> +	if (state & TASK_FREEZABLE)
> +		WARN_ON_ONCE(!(state & TASK_NORMAL));
> +
> +#ifdef CONFIG_LOCKDEP
> +	/*
> +	 * It's dangerous to freeze with locks held; there be dragons there.
> +	 */
> +	if (!(state & __TASK_FREEZABLE_UNSAFE))
> +		WARN_ON_ONCE(debug_locks && p->lockdep_depth);
> +#endif
> +
> +	return TASK_FROZEN;
> +}
> +
> +/* See task_cond_set_special_state(); serializes against ttwu() */
> +static bool __freeze_task(struct task_struct *p)
> +{
> +	return task_cond_set_special_state(p, __can_freeze(p));
> +}

Will found an issue with this, notably task_cond_set_special() only
takes ->pi_lock and as such doesn't serialize against __schedule(),
which then yields the following fun scenario:


	__schedule()					__freeze_task()


	prev_state = READ_ONCE(prev->__state); // INTERRUPTIBLE

							task_cond_set_special_state()
							  ...
							  WRITE_ONCE(prev->__state, TASK_FROZEN);

	if (signal_pending_state(prev_state, prev)) // SIGPENDING
	  WRITE_ONCE(prev->__state, TASK_RUNNING)




And *whoopsie*, freezer things we're frozen, but we're back in the game.


AFAICT the below, which uses the brand-spanking-new task_call_func()
which currently sits in tip/sched/core to also serialize against
rq->lock should avoid this scenario.


--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -233,25 +233,6 @@ struct task_group;
 	} while (0)
 
 /*
- * task_cond_set_special_state() is a cmpxchg like operation on task->state.
- *
- * This operation isn't safe in general and should only be used to transform
- * one (special) blocked state into another, such as:
- *   TASK_STOPPED <-> TASK_FROZEN.
- */
-#define task_cond_set_special_state(task, cond_state)			\
-	({								\
-		struct task_struct *__p = (task);			\
-		unsigned long __flags; /* may shadow */			\
-		unsigned int __state;					\
-		raw_spin_lock_irqsave(&__p->pi_lock, __flags);		\
-		if ((__state = (cond_state)))				\
-			WRITE_ONCE(__p->__state, __state);		\
-		raw_spin_unlock_irqrestore(&__p->pi_lock, __flags);	\
-		!!__state;						\
-	})
-
-/*
  * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
  *
  * RT's spin/rwlock substitutions are state preserving. The state of the
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -101,7 +101,7 @@ static void fake_signal_wake_up(struct t
 	}
 }
 
-static inline unsigned int __can_freeze(struct task_struct *p)
+static int __set_task_frozen(struct task_struct *p, void *arg)
 {
 	unsigned int state = READ_ONCE(p->__state);
 
@@ -123,13 +123,14 @@ static inline unsigned int __can_freeze(
 		WARN_ON_ONCE(debug_locks && p->lockdep_depth);
 #endif
 
+	WRITE_ONCE(p->__state, TASK_FROZEN);
 	return TASK_FROZEN;
 }
 
-/* See task_cond_set_special_state(); serializes against ttwu() */
 static bool __freeze_task(struct task_struct *p)
 {
-	return task_cond_set_special_state(p, __can_freeze(p));
+	/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
+	return task_call_func(p, __set_task_frozen, NULL);
 }
 
 /**
@@ -169,7 +170,7 @@ bool freeze_task(struct task_struct *p)
  * reflects that and the below will refuse to restore the special state and
  * instead issue the wakeup.
  */
-static inline unsigned int __thaw_special(struct task_struct *p)
+static int __set_task_special(struct task_struct *p, void *arg)
 {
 	unsigned int state = 0;
 
@@ -188,6 +189,9 @@ static inline unsigned int __thaw_specia
 		state = TASK_STOPPED;
 	}
 
+	if (state)
+		WRITE_ONCE(p->__state, state);
+
 	return state;
 }
 
@@ -200,7 +204,8 @@ void __thaw_task(struct task_struct *p)
 		goto unlock;
 
 	if (lock_task_sighand(p, &flags2)) {
-		bool ret = task_cond_set_special_state(p, __thaw_special(p));
+		/* TASK_FROZEN -> TASK_{STOPPED,TRACED} */
+		bool ret = task_call_func(p, __set_task_special, NULL);
 		unlock_task_sighand(p, &flags2);
 		if (ret)
 			goto unlock;
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -193,6 +193,17 @@ static bool looks_like_a_spurious_pid(st
 	return true;
 }
 
+static int __set_task_traced(struct task_struct *task, void *arg)
+{
+	unsigned int *state = arg;
+
+	if (!(task->__state & __TASK_TRACED))
+		return 0;
+
+	WRITE_ONCE(task->__state, *state);
+	return *state;
+}
+
 /* Ensure that nothing can wake it up, even SIGKILL */
 static bool ptrace_freeze_traced(struct task_struct *task)
 {
@@ -205,10 +216,12 @@ static bool ptrace_freeze_traced(struct
 	spin_lock_irq(&task->sighand->siglock);
 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
 	    !__fatal_signal_pending(task)) {
+		unsigned int state = __TASK_TRACED;
+
 		task->ptrace &= ~PT_STOPPED_MASK;
 		task->ptrace |= PT_STOPPED;
 		/* *TASK_TRACED -> __TASK_TRACED */
-		task_cond_set_special_state(task, !!(task->__state & __TASK_TRACED) * __TASK_TRACED);
+		task_call_func(task, __set_task_traced, &state);
 		ret = true;
 	}
 	spin_unlock_irq(&task->sighand->siglock);
@@ -233,9 +246,11 @@ static void ptrace_unfreeze_traced(struc
 			task->ptrace &= ~PT_STOPPED_MASK;
 			wake_up_state(task, __TASK_TRACED);
 		} else {
+			unsigned int state = TASK_TRACED;
+
 			task->ptrace |= PT_STOPPED_MASK;
 			/* *TASK_TRACED -> TASK_TRACED */
-			task_cond_set_special_state(task, !!(task->__state & __TASK_TRACED) * TASK_TRACED);
+			task_call_func(task, __set_task_traced, &state);
 		}
 	}
 	spin_unlock_irq(&task->sighand->siglock);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3181,7 +3181,7 @@ int migrate_swap(struct task_struct *cur
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
-static inline __wti_match(struct task_struct *p, unsigned int match_state)
+static inline bool __wti_match(struct task_struct *p, unsigned int match_state)
 {
 	unsigned int state = READ_ONCE(p->__state);
 

      reply	other threads:[~2021-10-18 14:10 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-09 10:07 [PATCH v3 0/6] Freezer rewrite Peter Zijlstra
2021-10-09 10:07 ` [PATCH v3 1/6] freezer: Have {,un}lock_system_sleep() save/restore flags Peter Zijlstra
2021-10-14  8:58   ` Will Deacon
2021-10-09 10:07 ` [PATCH v3 2/6] freezer,umh: Clean up freezer/initrd interaction Peter Zijlstra
2021-10-14  9:13   ` Will Deacon
2021-10-09 10:07 ` [PATCH v3 3/6] ptrace: Order and comment PT_flags Peter Zijlstra
2021-10-14  9:31   ` Will Deacon
2021-10-14 14:27     ` Peter Zijlstra
2021-10-09 10:07 ` [PATCH v3 4/6] ptrace: Track __TASK_TRACED state in p->ptrace Peter Zijlstra
2021-10-09 10:07 ` [PATCH v3 5/6] sched,ptrace: Avoid relying on __TASK_TRACED | __TASK_STOPPED Peter Zijlstra
2021-10-09 10:08 ` [PATCH v3 6/6] freezer,sched: Rewrite core freezer logic Peter Zijlstra
2021-10-18 13:36   ` Peter Zijlstra [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YW14cgrrRWSAzmYn@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=rjw@rjwysocki.net \
    --cc=rostedt@goodmis.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=will@kernel.org \
    --subject='Re: [PATCH v3 6/6] freezer,sched: Rewrite core freezer logic' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.