All of lore.kernel.org
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Aleksa Sarai <asarai@suse.com>,
	Andy Lutomirski <luto@amacapital.net>,
	Attila Fazekas <afazekas@redhat.com>, Jann Horn <jann@thejh.net>,
	Kees Cook <keescook@chromium.org>,
	Michal Hocko <mhocko@kernel.org>,
	Ulrich Obergfell <uobergfe@redhat.com>,
	linux-kernel@vger.kernel.org, linux-api@vger.kernel.org
Subject: [RFC][PATCH v2 4/5] exec: If possible don't wait for ptraced threads to be reaped
Date: Sun, 02 Apr 2017 17:53:52 -0500	[thread overview]
Message-ID: <87efxa8lq7.fsf_-_@xmission.com> (raw)
In-Reply-To: <874ly6a0h1.fsf_-_@xmission.com> (Eric W. Biederman's message of "Sun, 02 Apr 2017 17:50:02 -0500")


Take advantage of the situation when sighand->count == 1 to only wait
for threads to reach EXIT_ZOMBIE instead of EXIT_DEAD in de_thread.
Only old old linux threading libraries use CLONE_SIGHAND without
CLONE_THREAD.  So this situation should be present most of the time.

This allows ptracing through a multi-threaded exec without the danger
of stalling the exec.  As historically exec waits for the other
threads to be reaped in de_thread before completing.  This is
necessary as it is not safe to unshare the sighand_struct until all of
the other threads in this thread group are reaped, because the lock to
serialize threads in a thread group siglock lives in sighand_struct.

When oldsighand->count == 1 we know that there are no other
users and unsharing the sighand struct in exec is pointless.
This makes it safe to only wait for threads to become zombies
as the siglock won't change during exec and release_task
will use the samve siglock for the old threads as for
the new threads.

Cc: stable@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/exec.c       | 22 ++--------------------
 kernel/exit.c   | 18 ++++++++----------
 kernel/signal.c |  2 +-
 3 files changed, 11 insertions(+), 31 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 65145a3df065..303a114b00ce 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1071,9 +1071,6 @@ static int de_thread(struct task_struct *tsk)
 
 	sig->group_exit_task = tsk;
 	sig->notify_count = zap_other_threads(tsk);
-	if (!thread_group_leader(tsk))
-		sig->notify_count--;
-
 	while (sig->notify_count) {
 		__set_current_state(TASK_KILLABLE);
 		spin_unlock_irq(lock);
@@ -1092,23 +1089,8 @@ static int de_thread(struct task_struct *tsk)
 	if (!thread_group_leader(tsk)) {
 		struct task_struct *leader = tsk->group_leader;
 
-		for (;;) {
-			cgroup_threadgroup_change_begin(tsk);
-			write_lock_irq(&tasklist_lock);
-			/*
-			 * Do this under tasklist_lock to ensure that
-			 * exit_notify() can't miss ->group_exit_task
-			 */
-			sig->notify_count = -1;
-			if (likely(leader->exit_state))
-				break;
-			__set_current_state(TASK_KILLABLE);
-			write_unlock_irq(&tasklist_lock);
-			cgroup_threadgroup_change_end(tsk);
-			schedule();
-			if (unlikely(__fatal_signal_pending(tsk)))
-				goto killed;
-		}
+		cgroup_threadgroup_change_begin(tsk);
+		write_lock_irq(&tasklist_lock);
 
 		/*
 		 * The only record we have of the real-time age of a
diff --git a/kernel/exit.c b/kernel/exit.c
index 8c5b3e106298..955c96e3fc12 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,13 +118,6 @@ static void __exit_signal(struct task_struct *tsk)
 		tty = sig->tty;
 		sig->tty = NULL;
 	} else {
-		/*
-		 * If there is any task waiting for the group exit
-		 * then notify it:
-		 */
-		if (sig->notify_count > 0 && !--sig->notify_count)
-			wake_up_process(sig->group_exit_task);
-
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
 	}
@@ -712,6 +705,8 @@ static void forget_original_parent(struct task_struct *father,
  */
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
+	struct sighand_struct *sighand = tsk->sighand;
+	struct signal_struct *signal = tsk->signal;
 	bool autoreap;
 	struct task_struct *p, *n;
 	LIST_HEAD(dead);
@@ -739,9 +734,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	if (tsk->exit_state == EXIT_DEAD)
 		list_add(&tsk->ptrace_entry, &dead);
 
-	/* mt-exec, de_thread() is waiting for group leader */
-	if (unlikely(tsk->signal->notify_count < 0))
-		wake_up_process(tsk->signal->group_exit_task);
+	spin_lock(&sighand->siglock);
+	/* mt-exec, de_thread is waiting for threads to exit */
+	if (signal->notify_count > 0 && !--signal->notify_count)
+		wake_up_process(signal->group_exit_task);
+
+	spin_unlock(&sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 
 	list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 11fa736eb2ae..fd75ba33ee3d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1205,13 +1205,13 @@ int zap_other_threads(struct task_struct *p)
 
 	while_each_thread(p, t) {
 		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-		count++;
 
 		/* Don't bother with already dead threads */
 		if (t->exit_state)
 			continue;
 		sigaddset(&t->pending.signal, SIGKILL);
 		signal_wake_up(t, 1);
+		count++;
 	}
 
 	return count;
-- 
2.10.1

WARNING: multiple messages have this Message-ID (diff)
From: ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org (Eric W. Biederman)
To: Oleg Nesterov <oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: Andrew Morton
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>,
	Aleksa Sarai <asarai-IBi9RG/b67k@public.gmane.org>,
	Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org>,
	Attila Fazekas <afazekas-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Jann Horn <jann-XZ1E9jl8jIdeoWH0uzbU5w@public.gmane.org>,
	Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>,
	Michal Hocko <mhocko-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Ulrich Obergfell
	<uobergfe-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [RFC][PATCH v2 4/5] exec: If possible don't wait for ptraced threads to be reaped
Date: Sun, 02 Apr 2017 17:53:52 -0500	[thread overview]
Message-ID: <87efxa8lq7.fsf_-_@xmission.com> (raw)
In-Reply-To: <874ly6a0h1.fsf_-_-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> (Eric W. Biederman's message of "Sun, 02 Apr 2017 17:50:02 -0500")


Take advantage of the situation when sighand->count == 1 to only wait
for threads to reach EXIT_ZOMBIE instead of EXIT_DEAD in de_thread.
Only old old linux threading libraries use CLONE_SIGHAND without
CLONE_THREAD.  So this situation should be present most of the time.

This allows ptracing through a multi-threaded exec without the danger
of stalling the exec.  As historically exec waits for the other
threads to be reaped in de_thread before completing.  This is
necessary as it is not safe to unshare the sighand_struct until all of
the other threads in this thread group are reaped, because the lock to
serialize threads in a thread group siglock lives in sighand_struct.

When oldsighand->count == 1 we know that there are no other
users and unsharing the sighand struct in exec is pointless.
This makes it safe to only wait for threads to become zombies
as the siglock won't change during exec and release_task
will use the samve siglock for the old threads as for
the new threads.

Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 fs/exec.c       | 22 ++--------------------
 kernel/exit.c   | 18 ++++++++----------
 kernel/signal.c |  2 +-
 3 files changed, 11 insertions(+), 31 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 65145a3df065..303a114b00ce 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1071,9 +1071,6 @@ static int de_thread(struct task_struct *tsk)
 
 	sig->group_exit_task = tsk;
 	sig->notify_count = zap_other_threads(tsk);
-	if (!thread_group_leader(tsk))
-		sig->notify_count--;
-
 	while (sig->notify_count) {
 		__set_current_state(TASK_KILLABLE);
 		spin_unlock_irq(lock);
@@ -1092,23 +1089,8 @@ static int de_thread(struct task_struct *tsk)
 	if (!thread_group_leader(tsk)) {
 		struct task_struct *leader = tsk->group_leader;
 
-		for (;;) {
-			cgroup_threadgroup_change_begin(tsk);
-			write_lock_irq(&tasklist_lock);
-			/*
-			 * Do this under tasklist_lock to ensure that
-			 * exit_notify() can't miss ->group_exit_task
-			 */
-			sig->notify_count = -1;
-			if (likely(leader->exit_state))
-				break;
-			__set_current_state(TASK_KILLABLE);
-			write_unlock_irq(&tasklist_lock);
-			cgroup_threadgroup_change_end(tsk);
-			schedule();
-			if (unlikely(__fatal_signal_pending(tsk)))
-				goto killed;
-		}
+		cgroup_threadgroup_change_begin(tsk);
+		write_lock_irq(&tasklist_lock);
 
 		/*
 		 * The only record we have of the real-time age of a
diff --git a/kernel/exit.c b/kernel/exit.c
index 8c5b3e106298..955c96e3fc12 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,13 +118,6 @@ static void __exit_signal(struct task_struct *tsk)
 		tty = sig->tty;
 		sig->tty = NULL;
 	} else {
-		/*
-		 * If there is any task waiting for the group exit
-		 * then notify it:
-		 */
-		if (sig->notify_count > 0 && !--sig->notify_count)
-			wake_up_process(sig->group_exit_task);
-
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
 	}
@@ -712,6 +705,8 @@ static void forget_original_parent(struct task_struct *father,
  */
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
+	struct sighand_struct *sighand = tsk->sighand;
+	struct signal_struct *signal = tsk->signal;
 	bool autoreap;
 	struct task_struct *p, *n;
 	LIST_HEAD(dead);
@@ -739,9 +734,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	if (tsk->exit_state == EXIT_DEAD)
 		list_add(&tsk->ptrace_entry, &dead);
 
-	/* mt-exec, de_thread() is waiting for group leader */
-	if (unlikely(tsk->signal->notify_count < 0))
-		wake_up_process(tsk->signal->group_exit_task);
+	spin_lock(&sighand->siglock);
+	/* mt-exec, de_thread is waiting for threads to exit */
+	if (signal->notify_count > 0 && !--signal->notify_count)
+		wake_up_process(signal->group_exit_task);
+
+	spin_unlock(&sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 
 	list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 11fa736eb2ae..fd75ba33ee3d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1205,13 +1205,13 @@ int zap_other_threads(struct task_struct *p)
 
 	while_each_thread(p, t) {
 		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-		count++;
 
 		/* Don't bother with already dead threads */
 		if (t->exit_state)
 			continue;
 		sigaddset(&t->pending.signal, SIGKILL);
 		signal_wake_up(t, 1);
+		count++;
 	}
 
 	return count;
-- 
2.10.1

  parent reply	other threads:[~2017-04-02 22:59 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-13 14:14 [PATCH 0/2] fix the traced mt-exec deadlock Oleg Nesterov
2017-02-13 14:15 ` [PATCH 1/2] exec: don't wait for zombie threads with cred_guard_mutex held Oleg Nesterov
2017-02-13 16:12   ` kbuild test robot
2017-02-13 16:47     ` Oleg Nesterov
2017-02-13 16:39   ` kbuild test robot
2017-02-13 17:27   ` Mika Penttilä
2017-02-13 18:01     ` Oleg Nesterov
2017-02-13 18:04   ` [PATCH V2 " Oleg Nesterov
2017-02-16 11:42     ` Eric W. Biederman
2017-02-20 15:22       ` Oleg Nesterov
2017-02-20 15:36         ` Oleg Nesterov
2017-02-20 22:30         ` Eric W. Biederman
2017-02-21 17:53           ` Oleg Nesterov
2017-02-21 20:20             ` Eric W. Biederman
2017-02-22 17:41               ` Oleg Nesterov
2017-02-17  4:42     ` Eric W. Biederman
2017-02-20 15:50       ` Oleg Nesterov
2017-02-13 14:15 ` [PATCH 2/2] ptrace: ensure PTRACE_EVENT_EXIT won't stop if the tracee is killed by exec Oleg Nesterov
2017-02-24 16:03 ` [PATCH 0/2] fix the traced mt-exec deadlock Oleg Nesterov
2017-03-03  1:05   ` Eric W. Biederman
2017-03-03 17:33     ` Oleg Nesterov
2017-03-03 18:23       ` Eric W. Biederman
2017-03-03 18:23         ` Eric W. Biederman
2017-03-03 18:59         ` Eric W. Biederman
2017-03-03 18:59           ` Eric W. Biederman
2017-03-03 20:06           ` Eric W. Biederman
2017-03-03 20:06             ` Eric W. Biederman
2017-03-03 20:11             ` [RFC][PATCH] exec: Don't wait for ptraced threads to be reaped Eric W. Biederman
2017-03-03 20:11               ` Eric W. Biederman
2017-03-04 17:03               ` Oleg Nesterov
2017-03-30  8:07                 ` Eric W. Biederman
2017-04-01  5:11                   ` [RFC][PATCH 0/2] exec: Fixing ptrace'd mulit-threaded hang Eric W. Biederman
2017-04-01  5:11                     ` Eric W. Biederman
2017-04-01  5:14                     ` [RFC][PATCH 1/2] sighand: Count each thread group once in sighand_struct Eric W. Biederman
2017-04-01  5:14                       ` Eric W. Biederman
2017-04-01  5:16                     ` [RFC][PATCH 2/2] exec: If possible don't wait for ptraced threads to be reaped Eric W. Biederman
2017-04-01  5:16                       ` Eric W. Biederman
2017-04-02 15:35                       ` Oleg Nesterov
2017-04-02 15:35                         ` Oleg Nesterov
2017-04-02 18:53                         ` Eric W. Biederman
2017-04-02 18:53                           ` Eric W. Biederman
2017-04-03 18:12                           ` Oleg Nesterov
2017-04-03 18:12                             ` Oleg Nesterov
2017-04-03 21:04                             ` Eric W. Biederman
2017-04-05 16:44                               ` Oleg Nesterov
2017-04-02 15:38                     ` [RFC][PATCH 0/2] exec: Fixing ptrace'd mulit-threaded hang Oleg Nesterov
2017-04-02 15:38                       ` Oleg Nesterov
2017-04-02 22:50                     ` [RFC][PATCH v2 0/5] " Eric W. Biederman
2017-04-02 22:50                       ` Eric W. Biederman
2017-04-02 22:51                       ` [RFC][PATCH v2 1/5] ptrace: Don't wait in PTRACE_O_TRACEEXIT for exec or coredump Eric W. Biederman
2017-04-02 22:51                         ` Eric W. Biederman
2017-04-05 16:19                         ` Oleg Nesterov
2017-04-02 22:51                       ` [RFC][PATCH v2 2/5] sighand: Count each thread group once in sighand_struct Eric W. Biederman
2017-04-02 22:51                         ` Eric W. Biederman
2017-04-02 22:52                       ` [RFC][PATCH v2 3/5] clone: Disallown CLONE_THREAD with a shared sighand_struct Eric W. Biederman
2017-04-02 22:52                         ` Eric W. Biederman
2017-04-05 16:24                         ` Oleg Nesterov
2017-04-05 16:24                           ` Oleg Nesterov
2017-04-05 17:34                           ` Eric W. Biederman
2017-04-05 18:11                             ` Oleg Nesterov
2017-04-02 22:53                       ` Eric W. Biederman [this message]
2017-04-02 22:53                         ` [RFC][PATCH v2 4/5] exec: If possible don't wait for ptraced threads to be reaped Eric W. Biederman
2017-04-05 16:15                         ` Oleg Nesterov
2017-04-02 22:57                       ` [RFC][PATCH v2 5/5] signal: Don't allow accessing signal_struct by old threads after exec Eric W. Biederman
2017-04-02 22:57                         ` Eric W. Biederman
2017-04-05 16:18                         ` Oleg Nesterov
2017-04-05 16:18                           ` Oleg Nesterov
2017-04-05 18:16                           ` Eric W. Biederman
2017-04-05 18:16                             ` Eric W. Biederman
2017-04-06 15:48                             ` Oleg Nesterov
2017-04-06 15:48                               ` Oleg Nesterov
2017-04-02 16:15                   ` [RFC][PATCH] exec: Don't wait for ptraced threads to be reaped Oleg Nesterov
2017-04-02 16:15                     ` Oleg Nesterov
2017-04-02 21:07                     ` Eric W. Biederman
2017-04-02 21:07                       ` Eric W. Biederman
2017-04-03 18:37                       ` Oleg Nesterov
2017-04-03 18:37                         ` Oleg Nesterov
2017-04-03 22:49                         ` Eric W. Biederman
2017-04-03 22:49                           ` Eric W. Biederman
2017-04-03 22:49                         ` scope of cred_guard_mutex Eric W. Biederman
2017-04-03 22:49                           ` Eric W. Biederman
2017-04-05 16:08                           ` Oleg Nesterov
2017-04-05 16:11                             ` Kees Cook
2017-04-05 17:53                             ` Eric W. Biederman
2017-04-05 18:15                               ` Oleg Nesterov
2017-04-06 15:55                           ` Oleg Nesterov
2017-04-06 15:55                             ` Oleg Nesterov
2017-04-07 22:07                             ` Kees Cook
2017-04-07 22:07                               ` Kees Cook
2017-09-04  3:19                       ` [RFC][PATCH] exec: Don't wait for ptraced threads to be reaped Robert O'Callahan
2017-09-04  3:19                         ` Robert O'Callahan
2017-03-04 16:54         ` [PATCH 0/2] fix the traced mt-exec deadlock Oleg Nesterov
2017-03-04 16:54           ` Oleg Nesterov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87efxa8lq7.fsf_-_@xmission.com \
    --to=ebiederm@xmission.com \
    --cc=afazekas@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=asarai@suse.com \
    --cc=jann@thejh.net \
    --cc=keescook@chromium.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mhocko@kernel.org \
    --cc=oleg@redhat.com \
    --cc=uobergfe@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.