All of lore.kernel.org
 help / color / mirror / Atom feed
* + pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped.patch added to -mm tree
@ 2012-05-08 22:51 akpm
  0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2012-05-08 22:51 UTC (permalink / raw)
  To: mm-commits; +Cc: ebiederm, efault, gorcunov, louis.rilling, oleg, xemul


The patch titled
     Subject: pidns: guarantee that the pidns init will be the last pidns process reaped
has been added to the -mm tree.  Its filename is
     pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Eric W. Biederman <ebiederm@xmission.com>
Subject: pidns: guarantee that the pidns init will be the last pidns process reaped

This change extends the thread group zombie leader logic to work for pid
namespaces.  The task with pid 1 is declared the pid namespace leader.  A
pid namespace with no more processes is detected by observing that the
init task is a zombie in an empty thread group, and the the init task has
no children.

Instead of moving lingering EXIT_DEAD tasks off of init's ->children list
we now block init from exiting until those children have self reaped and
have removed themselves.  Which guarantees that the init task is the last
task in a pid namespace to be reaped.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Louis Rilling <louis.rilling@kerlabs.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 kernel/exit.c |   46 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff -puN kernel/exit.c~pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped kernel/exit.c
--- a/kernel/exit.c~pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped
+++ a/kernel/exit.c
@@ -164,6 +164,16 @@ static void delayed_put_task_struct(stru
 	put_task_struct(tsk);
 }
 
+static bool pidns_leader(struct task_struct *tsk)
+{
+	return is_child_reaper(task_pid(tsk));
+}
+
+static bool delay_pidns_leader(struct task_struct *tsk)
+{
+	return pidns_leader(tsk) &&
+	       (!thread_group_empty(tsk) || !list_empty(&tsk->children));
+}
 
 void release_task(struct task_struct * p)
 {
@@ -183,15 +193,23 @@ repeat:
 	__exit_signal(p);
 
 	/*
-	 * If we are the last non-leader member of the thread
-	 * group, and the leader is zombie, then notify the
-	 * group leader's parent process. (if it wants notification.)
+	 * If we are the last non-leader member of the thread group,
+	 * or the last non-leader member of the pid namespace, and the
+	 * leader is zombie, then notify the leader's parent
+	 * process. (if it wants notification.)
 	 */
 	zap_leader = 0;
-	leader = p->group_leader;
-	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
+	leader = NULL;
+	/* Do we need to worry about our thread_group or our pidns leader? */
+	if (p != p->group_leader)
+		leader = p->group_leader;
+	else if (pidns_leader(p->real_parent))
+		leader = p->real_parent;
+
+	if (leader && thread_group_empty(leader) &&
+	    leader->exit_state == EXIT_ZOMBIE && list_empty(&leader->children)) {
 		/*
-		 * If we were the last child thread and the leader has
+		 * If we were the last task in the group and the leader has
 		 * exited already, and the leader's parent ignores SIGCHLD,
 		 * then we are the one who should release the leader.
 		 */
@@ -720,11 +738,10 @@ static struct task_struct *find_new_reap
 		zap_pid_ns_processes(pid_ns);
 		write_lock_irq(&tasklist_lock);
 		/*
-		 * We can not clear ->child_reaper or leave it alone.
-		 * There may by stealth EXIT_DEAD tasks on ->children,
-		 * forget_original_parent() must move them somewhere.
+		 * Move all lingering EXIT_DEAD tasks onto the
+		 * children list of init's thread group leader.
 		 */
-		pid_ns->child_reaper = init_pid_ns.child_reaper;
+		pid_ns->child_reaper = father->group_leader;
 	} else if (father->signal->has_child_subreaper) {
 		struct task_struct *reaper;
 
@@ -798,6 +815,12 @@ static void forget_original_parent(struc
 	exit_ptrace(father);
 	reaper = find_new_reaper(father);
 
+	/* Return immediately if we aren't going to reparent anything */
+	if (unlikely(reaper == father)) {
+		write_unlock_irq(&tasklist_lock);
+		return;
+	}
+
 	list_for_each_entry_safe(p, n, &father->children, sibling) {
 		struct task_struct *t = p;
 		do {
@@ -853,6 +876,7 @@ static void exit_notify(struct task_stru
 		autoreap = do_notify_parent(tsk, sig);
 	} else if (thread_group_leader(tsk)) {
 		autoreap = thread_group_empty(tsk) &&
+			!delay_pidns_leader(tsk) &&
 			do_notify_parent(tsk, tsk->exit_signal);
 	} else {
 		autoreap = true;
@@ -1583,7 +1607,7 @@ static int wait_consider_task(struct wai
 		}
 
 		/* we don't reap group leaders with subthreads */
-		if (!delay_group_leader(p))
+		if (!delay_group_leader(p) && !delay_pidns_leader(p))
 			return wait_task_zombie(wo, p);
 
 		/*
_
Subject: Subject: pidns: guarantee that the pidns init will be the last pidns process reaped

Patches currently in -mm which might be from ebiederm@xmission.com are

linux-next.patch
namespaces-fix-leak-on-fork-failure.patch
connector-userns-replace-netlink-uses-of-cap_raised-with-capable.patch
cred-remove-task_is_dead-from-__task_cred-validation.patch
pidns-use-task_active_pid_ns-in-do_notify_parent.patch
pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped.patch
pidns-make-killed-children-autoreap.patch
syscalls-x86-add-__nr_kcmp-syscall-v8.patch
c-r-procfs-add-arg_start-end-env_start-end-and-exit_code-members-to-proc-pid-stat.patch


^ permalink raw reply	[flat|nested] 2+ messages in thread

* + pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped.patch added to -mm tree
@ 2012-06-13 21:53 akpm
  0 siblings, 0 replies; 2+ messages in thread
From: akpm @ 2012-06-13 21:53 UTC (permalink / raw)
  To: mm-commits; +Cc: ebiederm, efault, louis.rilling, oleg, xemul


The patch titled
     Subject: pidns: guarantee that the pidns init will be the last pidns process reaped
has been added to the -mm tree.  Its filename is
     pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Eric W. Biederman <ebiederm@xmission.com>
Subject: pidns: guarantee that the pidns init will be the last pidns process reaped

Today we have a twofold bug.  Sometimes release_task on pid == 1 in a pid
namespace can run before other processes in a pid namespace have had
release task called.  With the result that pid_ns_release_proc can be
called before the last proc_flus_task() is done using upid->ns->proc_mnt,
resulting in the use of a stale pointer.  This same set of circumstances
can lead to waitpid(...) returning for a processes started with
clone(CLONE_NEWPID) before the every process in the pid namespace has
actually exited.

To fix this modify zap_pid_ns_processess wait until all other processes in
the pid namespace have exited, even EXIT_DEAD zombies.

The delay_group_leader and related tests ensure that the thread gruop
leader will be the last thread of a process group to be reaped, or to
become EXIT_DEAD and self reap.  With the change to zap_pid_ns_processes
we get the guarantee that pid == 1 in a pid namespace will be the last
task that release_task is called on.

With pid == 1 being the last task to pass through release_task
pid_ns_release_proc can no longer be called too early nor can wait return
before all of the EXIT_DEAD tasks in a pid namespace have exited.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Louis Rilling <louis.rilling@kerlabs.com>
Cc: Mike Galbraith <efault@gmx.de>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 kernel/exit.c          |   14 +++++++++++++-
 kernel/pid_namespace.c |   20 ++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff -puN kernel/exit.c~pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped kernel/exit.c
--- a/kernel/exit.c~pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped
+++ a/kernel/exit.c
@@ -64,7 +64,6 @@ static void exit_mm(struct task_struct *
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
 	nr_threads--;
-	detach_pid(p, PIDTYPE_PID);
 	if (group_dead) {
 		detach_pid(p, PIDTYPE_PGID);
 		detach_pid(p, PIDTYPE_SID);
@@ -72,7 +71,20 @@ static void __unhash_process(struct task
 		list_del_rcu(&p->tasks);
 		list_del_init(&p->sibling);
 		__this_cpu_dec(process_counts);
+		/*
+		 * If we are the last child process in a pid namespace to be
+		 * reaped, notify the reaper sleeping zap_pid_ns_processes().
+		 */
+		if (IS_ENABLED(CONFIG_PID_NS)) {
+			struct task_struct *parent = p->real_parent;
+
+			if ((task_active_pid_ns(p)->child_reaper == parent) &&
+			    list_empty(&parent->children) &&
+			    (parent->flags & PF_EXITING))
+				wake_up_process(parent);
+		}
 	}
+	detach_pid(p, PIDTYPE_PID);
 	list_del_rcu(&p->thread_group);
 }
 
diff -puN kernel/pid_namespace.c~pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped kernel/pid_namespace.c
--- a/kernel/pid_namespace.c~pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped
+++ a/kernel/pid_namespace.c
@@ -184,11 +184,31 @@ void zap_pid_ns_processes(struct pid_nam
 	}
 	read_unlock(&tasklist_lock);
 
+	/* Firstly reap the EXIT_ZOMBIE children we may have. */
 	do {
 		clear_thread_flag(TIF_SIGPENDING);
 		rc = sys_wait4(-1, NULL, __WALL, NULL);
 	} while (rc != -ECHILD);
 
+	/*
+	 * sys_wait4() above can't reap the TASK_DEAD children.
+	 * Make sure they all go away, see __unhash_process().
+	 */
+	for (;;) {
+		bool need_wait = false;
+
+		read_lock(&tasklist_lock);
+		if (!list_empty(&current->children)) {
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			need_wait = true;
+		}
+		read_unlock(&tasklist_lock);
+
+		if (!need_wait)
+			break;
+		schedule();
+	}
+
 	if (pid_ns->reboot)
 		current->signal->group_exit_code = pid_ns->reboot;
 
_
Subject: Subject: pidns: guarantee that the pidns init will be the last pidns process reaped

Patches currently in -mm which might be from ebiederm@xmission.com are

linux-next.patch
pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped.patch
pidns-find_new_reaper-can-no-longer-switch-to-init_pid_nschild_reaper.patch
c-r-fcntl-add-f_getowner_uids-option.patch


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-06-13 21:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-08 22:51 + pidns-guarantee-that-the-pidns-init-will-be-the-last-pidns-process-reaped.patch added to -mm tree akpm
2012-06-13 21:53 akpm

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.