All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Oleg Nesterov <oleg@redhat.com>
Cc: rjw@rjwysocki.net, mingo@kernel.org, vincent.guittot@linaro.org,
	dietmar.eggemann@arm.com, rostedt@goodmis.org, mgorman@suse.de,
	ebiederm@xmission.com, bigeasy@linutronix.de,
	Will Deacon <will@kernel.org>,
	linux-kernel@vger.kernel.org, tj@kernel.org,
	linux-pm@vger.kernel.org
Subject: Re: [PATCH 2/5] sched,ptrace: Fix ptrace_check_attach() vs PREEMPT_RT
Date: Fri, 15 Apr 2022 00:45:26 +0200	[thread overview]
Message-ID: <YlikBjA3kL3XEQP5@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20220414183433.GC32752@redhat.com>

On Thu, Apr 14, 2022 at 08:34:33PM +0200, Oleg Nesterov wrote:
> On 04/14, Oleg Nesterov wrote:
> >
> > OK, let me think about it. Thanks!
> 
> So. what do you think about the patch below?

Might just work, will have to look at it again though ;-) Comments
below.

> If it can work, then 1/5 needs some changes, I think. In particular,
> it should not introduce JOBCTL_TRACED_FROZEN until 5/5, and perhaps

That TRACED_FROZEN was to distinguish the TASK_TRACED and __TASK_TRACED
state, and isn't related to the freezer.

> we can avoid this flag altogether...
> 
> This is how ptrace_check_attach() looks with the patch applied:
> 
> 	static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
> 	{
> 		int traced;
> 		/*
> 		 * We take the read lock around doing both checks to close a
> 		 * possible race where someone else attaches or detaches our
> 		 * natural child.
> 		 */
> 		read_lock(&tasklist_lock);
> 		traced = child->ptrace && child->parent == current;
> 		read_unlock(&tasklist_lock);
> 
> 		if (!traced)
> 			return -ESRCH;

The thing being, that if it is our ptrace child, it won't be going away
since we're running this code and not ptrace_detach().  Right?

I failed to realize this earlier and I think that's part of why my
solution is somewhat over complicated.

> 		WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
> 		if (ignore_state)
> 			return 0;
> 
> 		for (;;) {
> 			if (fatal_signal_pending(current))
> 				return -EINTR;

What if signal_wake_up(.resume=true) happens here? In that case we miss
the fatal pending, and task state isn't changed yet so we'll happily go
sleep.

> 			set_current_state(TASK_KILLABLE);
> 			if (!(READ_ONCE(child->jobctl) & JOBCTL_TRACED)) {

  TRACED_XXX ?

> 				__set_current_state(TASK_RUNNING);
> 				break;
> 			}
> 			schedule();
> 		}

That is, shouldn't we write this like:

		for (;;) {
			set_current_state(TASK_KILLABLE);
			if (fatal_signal_pending(current)) {
				ret = -EINTR;
				break;
			}
			if (!(READ_ONCE(current->jobctl) & JOBCTL_TRACED_XXX)) {
				ret = 0;
				break;
			}
			schedule();
		}
		__set_current_state(TASK_RUNNING);
		if (ret)
			return ret;

> 		if (!wait_task_inactive(child, TASK_TRACED) ||
> 		    !ptrace_freeze_traced(child))
> 			return -ESRCH;
> 
> 		return 0;
> 	}
> 
> Oleg.
> ---
> 
> diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
> index ec8b312f7506..1b5a57048e13 100644
> --- a/include/linux/sched/jobctl.h
> +++ b/include/linux/sched/jobctl.h
> @@ -22,7 +22,8 @@ struct task_struct;
>  
>  #define JOBCTL_STOPPED_BIT	24
>  #define JOBCTL_TRACED_BIT	25
> -#define JOBCTL_TRACED_FROZEN_BIT 26
> +#define JOBCTL_TRACED_XXX_BIT	25
> +#define JOBCTL_TRACED_FROZEN_BIT 27
>  
>  #define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
>  #define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
> @@ -35,6 +36,7 @@ struct task_struct;
>  
>  #define JOBCTL_STOPPED		(1UL << JOBCTL_STOPPED_BIT)
>  #define JOBCTL_TRACED		(1UL << JOBCTL_TRACED_BIT)
> +#define JOBCTL_TRACED_XXX	(1UL << JOBCTL_TRACED_XXX_BIT)
>  #define JOBCTL_TRACED_FROZEN	(1UL << JOBCTL_TRACED_FROZEN_BIT)
>  
>  #define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 626f96d275c7..5a03ae5cb0c0 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -193,20 +193,22 @@ static bool looks_like_a_spurious_pid(struct task_struct *task)
>   */
>  static bool ptrace_freeze_traced(struct task_struct *task)
>  {
> +	unsigned long flags;
>  	bool ret = false;
>  
>  	/* Lockless, nobody but us can set this flag */
>  	if (task->jobctl & JOBCTL_LISTENING)
>  		return ret;
> +	if (!lock_task_sighand(task, &flags))
> +		return ret;
>  
> -	spin_lock_irq(&task->sighand->siglock);
>  	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
>  	    !__fatal_signal_pending(task)) {
>  		task->jobctl |= JOBCTL_TRACED_FROZEN;
>  		WRITE_ONCE(task->__state, __TASK_TRACED);
>  		ret = true;
>  	}

I would feel much better if this were still a task_func_call()
validating !->on_rq && !->on_cpu.

> -	spin_unlock_irq(&task->sighand->siglock);
> +	unlock_task_sighand(task, &flags);
>  
>  	return ret;
>  }
> @@ -253,40 +255,39 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
>   */
>  static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
>  {
> -	int ret = -ESRCH;
> -
> +	int traced;
>  	/*
>  	 * We take the read lock around doing both checks to close a
> -	 * possible race where someone else was tracing our child and
> -	 * detached between these two checks.  After this locked check,
> -	 * we are sure that this is our traced child and that can only
> -	 * be changed by us so it's not changing right after this.
> +	 * possible race where someone else attaches or detaches our
> +	 * natural child.
>  	 */
>  	read_lock(&tasklist_lock);
> -	if (child->ptrace && child->parent == current) {
> -		WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
> -		/*
> -		 * child->sighand can't be NULL, release_task()
> -		 * does ptrace_unlink() before __exit_signal().
> -		 */
> -		if (ignore_state || ptrace_freeze_traced(child))
> -			ret = 0;
> -	}
> +	traced = child->ptrace && child->parent == current;
>  	read_unlock(&tasklist_lock);
>  
> -	if (!ret && !ignore_state) {
> -		if (!wait_task_inactive(child, __TASK_TRACED)) {
> -			/*
> -			 * This can only happen if may_ptrace_stop() fails and
> -			 * ptrace_stop() changes ->state back to TASK_RUNNING,
> -			 * so we should not worry about leaking __TASK_TRACED.
> -			 */
> -			WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
> -			ret = -ESRCH;
> +	if (!traced)
> +		return -ESRCH;
> +
> +	WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
> +	if (ignore_state)
> +		return 0;
> +
> +	for (;;) {
> +		if (fatal_signal_pending(current))
> +			return -EINTR;
> +		set_current_state(TASK_KILLABLE);
> +		if (!(READ_ONCE(child->jobctl) & JOBCTL_TRACED)) {
> +			__set_current_state(TASK_RUNNING);
> +			break;
>  		}
> +		schedule();
>  	}
>  
> -	return ret;
> +	if (!wait_task_inactive(child, TASK_TRACED) ||
> +	    !ptrace_freeze_traced(child))
> +		return -ESRCH;
> +
> +	return 0;
>  }
>  
>  static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 0aea3f0a8002..684f0a0e9c71 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -2182,6 +2182,14 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
>  	spin_unlock_irqrestore(&sighand->siglock, flags);
>  }
>  
> +static void clear_traced_xxx(void)
> +{
> +	spin_lock_irq(&current->sighand->siglock);
> +	current->jobctl &= ~JOBCTL_TRACED_XXX;
> +	spin_unlock_irq(&current->sighand->siglock);
> +	wake_up_state(current->parent, TASK_KILLABLE);
> +}
> +
>  /*
>   * This must be called with current->sighand->siglock held.
>   *
> @@ -2220,7 +2228,7 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
>  	 * schedule() will not sleep if there is a pending signal that
>  	 * can awaken the task.
>  	 */
> -	current->jobctl |= JOBCTL_TRACED;
> +	current->jobctl |= JOBCTL_TRACED | JOBCTL_TRACED_XXX;
>  	set_special_state(TASK_TRACED);
>  
>  	/*
> @@ -2282,6 +2290,7 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
>  		if (gstop_done && ptrace_reparented(current))
>  			do_notify_parent_cldstop(current, false, why);
>  
> +		clear_traced_xxx();
>  		/*
>  		 * Don't want to allow preemption here, because
>  		 * sys_ptrace() needs this task to be inactive.
> @@ -2296,9 +2305,6 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
>  		cgroup_leave_frozen(true);
>  	} else {
>  		/*
> -		 * By the time we got the lock, our tracer went away.
> -		 * Don't drop the lock yet, another tracer may come.
> -		 *
>  		 * If @gstop_done, the ptracer went away between group stop
>  		 * completion and here.  During detach, it would have set
>  		 * JOBCTL_STOP_PENDING on us and we'll re-enter
> @@ -2307,13 +2313,14 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
>  		 */
>  		if (gstop_done)
>  			do_notify_parent_cldstop(current, false, why);
> +		clear_traced_xxx();
> +		read_unlock(&tasklist_lock);
>  
> -		/* tasklist protects us from ptrace_freeze_traced() */
> +		/* JOBCTL_TRACED_XXX protects us from ptrace_freeze_traced() */

But... TRACED_XXX has just been cleared ?!

>  		__set_current_state(TASK_RUNNING);
>  		read_code = false;
>  		if (clear_code)
>  			exit_code = 0;
> -		read_unlock(&tasklist_lock);
>  	}
>  
>  	/*
> 

  reply	other threads:[~2022-04-14 22:46 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-12 11:44 [PATCH 0/5] ptrace-vs-PREEMPT_RT and freezer rewrite Peter Zijlstra
2022-04-12 11:44 ` [PATCH 1/5] sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED state Peter Zijlstra
2022-04-13 13:29   ` Oleg Nesterov
2022-04-13 16:47     ` Peter Zijlstra
2022-04-12 11:44 ` [PATCH 2/5] sched,ptrace: Fix ptrace_check_attach() vs PREEMPT_RT Peter Zijlstra
2022-04-13 13:24   ` Oleg Nesterov
2022-04-13 16:58     ` Peter Zijlstra
2022-04-13 18:57     ` Oleg Nesterov
2022-04-13 18:59       ` Oleg Nesterov
2022-04-13 19:20         ` Peter Zijlstra
2022-04-13 19:56           ` Peter Zijlstra
2022-04-14 11:54             ` Oleg Nesterov
2022-04-14 12:08               ` Oleg Nesterov
2022-04-14 18:34               ` Oleg Nesterov
2022-04-14 22:45                 ` Peter Zijlstra [this message]
2022-04-15 10:16                   ` Oleg Nesterov
2022-04-15 10:57                     ` Oleg Nesterov
2022-04-15 12:01                       ` Peter Zijlstra
2022-04-18 17:01                         ` Oleg Nesterov
2022-04-18 17:19                           ` Oleg Nesterov
2022-04-20 13:17                           ` Peter Zijlstra
2022-04-20 18:03                             ` Oleg Nesterov
2022-04-20 20:54                               ` [RFC][PATCH] ptrace: Don't change __state Eric W. Biederman
2022-04-21  7:21                                 ` Peter Zijlstra
2022-04-21 10:26                                   ` Peter Zijlstra
2022-04-21 10:49                                     ` Oleg Nesterov
2022-04-21 11:50                                       ` Peter Zijlstra
2022-04-21 14:45                                   ` Eric W. Biederman
2022-04-21  9:46                                 ` Oleg Nesterov
2022-04-21 15:01                                   ` Eric W. Biederman
2022-04-21 11:46                                 ` kernel test robot
2022-04-27  0:51                                 ` [ptrace] [confidence: ] 7d3fafb751: BUG:sleeping_function_called_from_invalid_context_at_arch/x86/entry/common.c kernel test robot
2022-04-27  0:51                                   ` kernel test robot
2022-04-20 10:20                       ` [PATCH 2/5] sched,ptrace: Fix ptrace_check_attach() vs PREEMPT_RT Peter Zijlstra
2022-04-20 11:35                         ` Oleg Nesterov
2022-04-15 12:00                     ` Peter Zijlstra
2022-04-15 12:56                       ` Oleg Nesterov
2022-04-12 11:44 ` [PATCH 3/5] freezer: Have {,un}lock_system_sleep() save/restore flags Peter Zijlstra
2022-04-12 11:44 ` [PATCH 4/5] freezer,umh: Clean up freezer/initrd interaction Peter Zijlstra
2022-04-12 11:44 ` [PATCH 5/5] freezer,sched: Rewrite core freezer logic Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YlikBjA3kL3XEQP5@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bigeasy@linutronix.de \
    --cc=dietmar.eggemann@arm.com \
    --cc=ebiederm@xmission.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=rjw@rjwysocki.net \
    --cc=rostedt@goodmis.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.