All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Mika Penttilä" <mika.penttila@nextfour.com>
To: Oleg Nesterov <oleg@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: Aleksa Sarai <asarai@suse.com>,
	Andy Lutomirski <luto@amacapital.net>,
	Attila Fazekas <afazekas@redhat.com>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Jann Horn <jann@thejh.net>, Kees Cook <keescook@chromium.org>,
	Michal Hocko <mhocko@kernel.org>,
	Ulrich Obergfell <uobergfe@redhat.com>,
	<linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 1/2] exec: don't wait for zombie threads with cred_guard_mutex held
Date: Mon, 13 Feb 2017 19:27:51 +0200	[thread overview]
Message-ID: <7ce94707-7bbe-a0d5-85a0-b93e73d76e22@nextfour.com> (raw)
In-Reply-To: <20170213141516.GA30233@redhat.com>


On 13.02.2017 16:15, Oleg Nesterov wrote:
> +	retval = de_thread(current);
> +	if (retval)
> +		return retval;
>  
>  	if (N_MAGIC(ex) == OMAGIC) {
>  		unsigned long text_addr, map_size;
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 4223702..79508f7 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -855,13 +855,17 @@ static int load_elf_binary(struct linux_binprm *bprm)
>  	setup_new_exec(bprm);
>  	install_exec_creds(bprm);
>  
> +	retval = de_thread(current);
> +	if (retval)
> +		goto out_free_dentry;
> +
>  	/* Do this so that we can load the interpreter, if need be.  We will
>  	   change some of these later */
>  	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
>  				 executable_stack);
>  	if (retval < 0)
>  		goto out_free_dentry;
> -	
> +
>  	current->mm->start_stack = bprm->p;
>  
>  	/* Now we do a little grungy work by mmapping the ELF image into
> diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
> index d2e36f8..75fd6d8 100644
> --- a/fs/binfmt_elf_fdpic.c
> +++ b/fs/binfmt_elf_fdpic.c
> @@ -430,6 +430,10 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
>  #endif
>  
>  	install_exec_creds(bprm);
> +	retval = de_thread(current);
> +	if (retval)
> +		goto error;
> +
>  	if (create_elf_fdpic_tables(bprm, current->mm,
>  				    &exec_params, &interp_params) < 0)
>  		goto error;
> diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
> index 9b2917a..a0ad9a3 100644
> --- a/fs/binfmt_flat.c
> +++ b/fs/binfmt_flat.c
> @@ -953,6 +953,9 @@ static int load_flat_binary(struct linux_binprm *bprm)
>  	}
>  
>  	install_exec_creds(bprm);
> +	res = de_thread(current);
> +	if (res)
> +		return res;
>  
>  	set_binfmt(&flat_format);
>  
> diff --git a/fs/exec.c b/fs/exec.c
> index e579466..8591c56 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1036,13 +1036,62 @@ static int exec_mmap(struct mm_struct *mm)
>  	return 0;
>  }
>  
> +static int wait_for_notify_count(struct task_struct *tsk, struct signal_struct *sig)
> +{
> +	for (;;) {
> +		if (unlikely(__fatal_signal_pending(tsk)))
> +			goto killed;
> +		set_current_state(TASK_KILLABLE);
> +		if (!sig->notify_count)
> +			break;
> +		schedule();
> +	}
> +	__set_current_state(TASK_RUNNING);
> +	return 0;
> +
> +killed:
> +	/* protects against exit_notify() and __exit_signal() */
> +	read_lock(&tasklist_lock);
> +	sig->group_exit_task = NULL;
> +	sig->notify_count = 0;
> +	read_unlock(&tasklist_lock);
> +	return -EINTR;
> +}
> +
> +/*
> + * Kill all the sub-threads and wait until they all pass exit_notify().
> + */
> +static int kill_sub_threads(struct task_struct *tsk)
> +{
> +	struct signal_struct *sig = tsk->signal;
> +	int err = -EINTR;
> +
> +	if (thread_group_empty(tsk))
> +		return 0;
> +
> +	read_lock(&tasklist_lock);
> +	spin_lock_irq(&tsk->sighand->siglock);
> +	if (!signal_group_exit(sig)) {
> +		sig->group_exit_task = tsk;
> +		sig->notify_count = -zap_other_threads(tsk);
> +		err = 0;
> +	}
> +	spin_unlock_irq(&tsk->sighand->siglock);
> +	read_unlock(&tasklist_lock);
> +
> +	if (!err)
> +		err = wait_for_notify_count(tsk, sig);
> +	return err;
> +
> +}
> +
>  /*
> - * This function makes sure the current process has its own signal table,
> - * so that flush_signal_handlers can later reset the handlers without
> - * disturbing other processes.  (Other processes might share the signal
> - * table via the CLONE_SIGHAND option to clone().)
> + * This function makes sure the current process has no other threads and
> + * has a private signal table so that flush_signal_handlers() can reset
> + * the handlers without disturbing other processes which might share the
> + * signal table via the CLONE_SIGHAND option to clone().
>   */
> -static int de_thread(struct task_struct *tsk)
> +int de_thread(struct task_struct *tsk)
>  {
>  	struct signal_struct *sig = tsk->signal;
>  	struct sighand_struct *oldsighand = tsk->sighand;
> @@ -1051,60 +1100,24 @@ static int de_thread(struct task_struct *tsk)
>  	if (thread_group_empty(tsk))
>  		goto no_thread_group;
>  
> -	/*
> -	 * Kill all other threads in the thread group.
> -	 */
>  	spin_lock_irq(lock);
> -	if (signal_group_exit(sig)) {
> -		/*
> -		 * Another group action in progress, just
> -		 * return so that the signal is processed.
> -		 */
> -		spin_unlock_irq(lock);
> -		return -EAGAIN;
> -	}
> -
> -	sig->group_exit_task = tsk;
> -	sig->notify_count = zap_other_threads(tsk);
> +	sig->notify_count = sig->nr_threads;


maybe nr_threads - 1 since nr_threads includes us ?

+	sig->notify_count = sig->nr_threads - 1;



>  	if (!thread_group_leader(tsk))
>  		sig->notify_count--;
> -
> -	while (sig->notify_count) {
> -		__set_current_state(TASK_KILLABLE);
> -		spin_unlock_irq(lock);
> -		schedule();
> -		if (unlikely(__fatal_signal_pending(tsk)))
> -			goto killed;
> -		spin_lock_irq(lock);
> -	}
>  	spin_unlock_irq(lock);
>  
> +	if (wait_for_notify_count(tsk, sig))
> +		return -EINTR;
> +
>  	/*
>  	 * At this point all other threads have exited, all we have to
> -	 * do is to wait for the thread group leader to become inactive,
> -	 * and to assume its PID:
> +	 * do is to reap the old leader and assume its PID.
>  	 */
>  	if (!thread_group_leader(tsk)) {
>  		struct task_struct *leader = tsk->group_leader;
>  
> -		for (;;) {
> -			threadgroup_change_begin(tsk);
> -			write_lock_irq(&tasklist_lock);
> -			/*
> -			 * Do this under tasklist_lock to ensure that
> -			 * exit_notify() can't miss ->group_exit_task
> -			 */
> -			sig->notify_count = -1;
> -			if (likely(leader->exit_state))
> -				break;
> -			__set_current_state(TASK_KILLABLE);
> -			write_unlock_irq(&tasklist_lock);
> -			threadgroup_change_end(tsk);
> -			schedule();
> -			if (unlikely(__fatal_signal_pending(tsk)))
> -				goto killed;
> -		}
> -
> +		threadgroup_change_begin(tsk);
> +		write_lock_irq(&tasklist_lock);
>  		/*
>  		 * The only record we have of the real-time age of a
>  		 * process, regardless of execs it's done, is start_time.
> @@ -1162,10 +1175,9 @@ static int de_thread(struct task_struct *tsk)
>  		release_task(leader);
>  	}
>  
> +no_thread_group:
>  	sig->group_exit_task = NULL;
>  	sig->notify_count = 0;
> -
> -no_thread_group:
>  	/* we have changed execution domain */
>  	tsk->exit_signal = SIGCHLD;
>  
> @@ -1198,15 +1210,8 @@ static int de_thread(struct task_struct *tsk)
>  	}
>  
>  	BUG_ON(!thread_group_leader(tsk));
> +	flush_signal_handlers(current, 0);
>  	return 0;
> -
> -killed:
> -	/* protects against exit_notify() and __exit_signal() */
> -	read_lock(&tasklist_lock);
> -	sig->group_exit_task = NULL;
> -	sig->notify_count = 0;
> -	read_unlock(&tasklist_lock);
> -	return -EAGAIN;
>  }
>  
>  char *get_task_comm(char *buf, struct task_struct *tsk)
> @@ -1237,11 +1242,7 @@ int flush_old_exec(struct linux_binprm * bprm)
>  {
>  	int retval;
>  
> -	/*
> -	 * Make sure we have a private signal table and that
> -	 * we are unassociated from the previous thread group.
> -	 */
> -	retval = de_thread(current);
> +	retval = kill_sub_threads(current);
>  	if (retval)
>  		goto out;
>  
> @@ -1336,7 +1337,6 @@ void setup_new_exec(struct linux_binprm * bprm)
>  	/* An exec changes our domain. We are no longer part of the thread
>  	   group */
>  	current->self_exec_id++;
> -	flush_signal_handlers(current, 0);
>  }
>  EXPORT_SYMBOL(setup_new_exec);
>  
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index 1303b57..06a5a7b 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -101,6 +101,7 @@ extern int __must_check remove_arg_zero(struct linux_binprm *);
>  extern int search_binary_handler(struct linux_binprm *);
>  extern int flush_old_exec(struct linux_binprm * bprm);
>  extern void setup_new_exec(struct linux_binprm * bprm);
> +extern int de_thread(struct task_struct *tsk);
>  extern void would_dump(struct linux_binprm *, struct file *);
>  
>  extern int suid_dumpable;
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 8f14b86..169d9f2 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -699,8 +699,9 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
>  	if (tsk->exit_state == EXIT_DEAD)
>  		list_add(&tsk->ptrace_entry, &dead);
>  
> -	/* mt-exec, de_thread() is waiting for group leader */
> -	if (unlikely(tsk->signal->notify_count < 0))
> +	/* mt-exec, kill_sub_threads() is waiting for group exit */
> +	if (unlikely(tsk->signal->notify_count < 0) &&
> +	    !++tsk->signal->notify_count)
>  		wake_up_process(tsk->signal->group_exit_task);
>  	write_unlock_irq(&tasklist_lock);
>  
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 3603d93..b78ce63 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -1200,13 +1200,12 @@ int zap_other_threads(struct task_struct *p)
>  
>  	while_each_thread(p, t) {
>  		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
> -		count++;
> -
>  		/* Don't bother with already dead threads */
>  		if (t->exit_state)
>  			continue;
>  		sigaddset(&t->pending.signal, SIGKILL);
>  		signal_wake_up(t, 1);
> +		count++;
>  	}
>  
>  	return count;

  parent reply	other threads:[~2017-02-13 17:28 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-13 14:14 [PATCH 0/2] fix the traced mt-exec deadlock Oleg Nesterov
2017-02-13 14:15 ` [PATCH 1/2] exec: don't wait for zombie threads with cred_guard_mutex held Oleg Nesterov
2017-02-13 16:12   ` kbuild test robot
2017-02-13 16:47     ` Oleg Nesterov
2017-02-13 16:39   ` kbuild test robot
2017-02-13 17:27   ` Mika Penttilä [this message]
2017-02-13 18:01     ` Oleg Nesterov
2017-02-13 18:04   ` [PATCH V2 " Oleg Nesterov
2017-02-16 11:42     ` Eric W. Biederman
2017-02-20 15:22       ` Oleg Nesterov
2017-02-20 15:36         ` Oleg Nesterov
2017-02-20 22:30         ` Eric W. Biederman
2017-02-21 17:53           ` Oleg Nesterov
2017-02-21 20:20             ` Eric W. Biederman
2017-02-22 17:41               ` Oleg Nesterov
2017-02-17  4:42     ` Eric W. Biederman
2017-02-20 15:50       ` Oleg Nesterov
2017-02-13 14:15 ` [PATCH 2/2] ptrace: ensure PTRACE_EVENT_EXIT won't stop if the tracee is killed by exec Oleg Nesterov
2017-02-24 16:03 ` [PATCH 0/2] fix the traced mt-exec deadlock Oleg Nesterov
2017-03-03  1:05   ` Eric W. Biederman
2017-03-03 17:33     ` Oleg Nesterov
2017-03-03 18:23       ` Eric W. Biederman
2017-03-03 18:23         ` Eric W. Biederman
2017-03-03 18:59         ` Eric W. Biederman
2017-03-03 18:59           ` Eric W. Biederman
2017-03-03 20:06           ` Eric W. Biederman
2017-03-03 20:06             ` Eric W. Biederman
2017-03-03 20:11             ` [RFC][PATCH] exec: Don't wait for ptraced threads to be reaped Eric W. Biederman
2017-03-03 20:11               ` Eric W. Biederman
2017-03-04 17:03               ` Oleg Nesterov
2017-03-30  8:07                 ` Eric W. Biederman
2017-04-01  5:11                   ` [RFC][PATCH 0/2] exec: Fixing ptrace'd mulit-threaded hang Eric W. Biederman
2017-04-01  5:11                     ` Eric W. Biederman
2017-04-01  5:14                     ` [RFC][PATCH 1/2] sighand: Count each thread group once in sighand_struct Eric W. Biederman
2017-04-01  5:14                       ` Eric W. Biederman
2017-04-01  5:16                     ` [RFC][PATCH 2/2] exec: If possible don't wait for ptraced threads to be reaped Eric W. Biederman
2017-04-01  5:16                       ` Eric W. Biederman
2017-04-02 15:35                       ` Oleg Nesterov
2017-04-02 15:35                         ` Oleg Nesterov
2017-04-02 18:53                         ` Eric W. Biederman
2017-04-02 18:53                           ` Eric W. Biederman
2017-04-03 18:12                           ` Oleg Nesterov
2017-04-03 18:12                             ` Oleg Nesterov
2017-04-03 21:04                             ` Eric W. Biederman
2017-04-05 16:44                               ` Oleg Nesterov
2017-04-02 15:38                     ` [RFC][PATCH 0/2] exec: Fixing ptrace'd mulit-threaded hang Oleg Nesterov
2017-04-02 15:38                       ` Oleg Nesterov
2017-04-02 22:50                     ` [RFC][PATCH v2 0/5] " Eric W. Biederman
2017-04-02 22:50                       ` Eric W. Biederman
2017-04-02 22:51                       ` [RFC][PATCH v2 1/5] ptrace: Don't wait in PTRACE_O_TRACEEXIT for exec or coredump Eric W. Biederman
2017-04-02 22:51                         ` Eric W. Biederman
2017-04-05 16:19                         ` Oleg Nesterov
2017-04-02 22:51                       ` [RFC][PATCH v2 2/5] sighand: Count each thread group once in sighand_struct Eric W. Biederman
2017-04-02 22:51                         ` Eric W. Biederman
2017-04-02 22:52                       ` [RFC][PATCH v2 3/5] clone: Disallown CLONE_THREAD with a shared sighand_struct Eric W. Biederman
2017-04-02 22:52                         ` Eric W. Biederman
2017-04-05 16:24                         ` Oleg Nesterov
2017-04-05 16:24                           ` Oleg Nesterov
2017-04-05 17:34                           ` Eric W. Biederman
2017-04-05 18:11                             ` Oleg Nesterov
2017-04-02 22:53                       ` [RFC][PATCH v2 4/5] exec: If possible don't wait for ptraced threads to be reaped Eric W. Biederman
2017-04-02 22:53                         ` Eric W. Biederman
2017-04-05 16:15                         ` Oleg Nesterov
2017-04-02 22:57                       ` [RFC][PATCH v2 5/5] signal: Don't allow accessing signal_struct by old threads after exec Eric W. Biederman
2017-04-02 22:57                         ` Eric W. Biederman
2017-04-05 16:18                         ` Oleg Nesterov
2017-04-05 16:18                           ` Oleg Nesterov
2017-04-05 18:16                           ` Eric W. Biederman
2017-04-05 18:16                             ` Eric W. Biederman
2017-04-06 15:48                             ` Oleg Nesterov
2017-04-06 15:48                               ` Oleg Nesterov
2017-04-02 16:15                   ` [RFC][PATCH] exec: Don't wait for ptraced threads to be reaped Oleg Nesterov
2017-04-02 16:15                     ` Oleg Nesterov
2017-04-02 21:07                     ` Eric W. Biederman
2017-04-02 21:07                       ` Eric W. Biederman
2017-04-03 18:37                       ` Oleg Nesterov
2017-04-03 18:37                         ` Oleg Nesterov
2017-04-03 22:49                         ` Eric W. Biederman
2017-04-03 22:49                           ` Eric W. Biederman
2017-04-03 22:49                         ` scope of cred_guard_mutex Eric W. Biederman
2017-04-03 22:49                           ` Eric W. Biederman
2017-04-05 16:08                           ` Oleg Nesterov
2017-04-05 16:11                             ` Kees Cook
2017-04-05 17:53                             ` Eric W. Biederman
2017-04-05 18:15                               ` Oleg Nesterov
2017-04-06 15:55                           ` Oleg Nesterov
2017-04-06 15:55                             ` Oleg Nesterov
2017-04-07 22:07                             ` Kees Cook
2017-04-07 22:07                               ` Kees Cook
2017-09-04  3:19                       ` [RFC][PATCH] exec: Don't wait for ptraced threads to be reaped Robert O'Callahan
2017-09-04  3:19                         ` Robert O'Callahan
2017-03-04 16:54         ` [PATCH 0/2] fix the traced mt-exec deadlock Oleg Nesterov
2017-03-04 16:54           ` Oleg Nesterov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7ce94707-7bbe-a0d5-85a0-b93e73d76e22@nextfour.com \
    --to=mika.penttila@nextfour.com \
    --cc=afazekas@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=asarai@suse.com \
    --cc=ebiederm@xmission.com \
    --cc=jann@thejh.net \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mhocko@kernel.org \
    --cc=oleg@redhat.com \
    --cc=uobergfe@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.