All of lore.kernel.org
 help / color / mirror / Atom feed
* + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup.patch added to -mm tree
@ 2009-08-20 21:14 akpm
  2009-08-21 10:26 ` + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch " Oleg Nesterov
  0 siblings, 1 reply; 20+ messages in thread
From: akpm @ 2009-08-20 21:14 UTC (permalink / raw)
  To: mm-commits; +Cc: bblum, ebiederm, lizf, matthltc, menage, oleg


The patch titled
     cgroups: add functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
has been added to the -mm tree.  Its filename is
     cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
out what to do about this

The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/

------------------------------------------------------
Subject: cgroups: add functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
From: Ben Blum <bblum@google.com>

Add an rwsem that lives in a threadgroup's sighand_struct (next to the
sighand's atomic count, to piggyback on its cacheline), and two functions
in kernel/cgroup.c (for now) for easily+safely obtaining and releasing it.

If another part of the kernel later wants to use such a locking mechanism,
the CONFIG_CGROUPS ifdefs should be changed to a higher-up flag that
CGROUPS and the other system would both depend on, and the lock/unlock
functions could be moved to sched.c or so.

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/linux/cgroup.h    |   14 ++++-
 include/linux/init_task.h |    9 +++
 include/linux/sched.h     |   15 ++++++
 kernel/cgroup.c           |   87 +++++++++++++++++++++++++++++++++++-
 kernel/fork.c             |    9 ++-
 5 files changed, 125 insertions(+), 9 deletions(-)

diff -puN include/linux/cgroup.h~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup include/linux/cgroup.h
--- a/include/linux/cgroup.h~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup
+++ a/include/linux/cgroup.h
@@ -30,10 +30,12 @@ extern int cgroup_init(void);
 extern void cgroup_lock(void);
 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
-extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_fork_callbacks(struct task_struct *p);
-extern void cgroup_post_fork(struct task_struct *p);
+extern void cgroup_post_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern void cgroup_fork_failed(struct task_struct *p, int run_callbacks,
+			       unsigned long clone_flags);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 
@@ -568,10 +570,14 @@ unsigned short css_depth(struct cgroup_s
 
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
-static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork(struct task_struct *p,
+			       unsigned long clone_flags) {}
 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
-static inline void cgroup_post_fork(struct task_struct *p) {}
+static inline void cgroup_post_fork(struct task_struct *p,
+				    unsigned long clone_flags) {}
 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+static inline void cgroup_fork_failed(struct task_struct *p, int callbacks,
+				      unsigned long clone_flags) {}
 
 static inline void cgroup_lock(void) {}
 static inline void cgroup_unlock(void) {}
diff -puN include/linux/init_task.h~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup include/linux/init_task.h
--- a/include/linux/init_task.h~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup
+++ a/include/linux/init_task.h
@@ -41,7 +41,16 @@ extern struct nsproxy init_nsproxy;
 	INIT_IPC_NS(ipc_ns)						\
 }
 
+#ifdef CONFIG_CGROUPS
+# define INIT_THREADGROUP_FORK_LOCK(sighand)				\
+	.threadgroup_fork_lock = 					\
+		__RWSEM_INITIALIZER(sighand.threadgroup_fork_lock),
+#else
+# define INIT_THREADGROUP_FORK_LOCK(sighand)
+#endif
+
 #define INIT_SIGHAND(sighand) {						\
+	INIT_THREADGROUP_FORK_LOCK(sighand)				\
 	.count		= ATOMIC_INIT(1), 				\
 	.action		= { { { .sa_handler = NULL, } }, },		\
 	.siglock	= __SPIN_LOCK_UNLOCKED(sighand.siglock),	\
diff -puN include/linux/sched.h~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup include/linux/sched.h
--- a/include/linux/sched.h~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup
+++ a/include/linux/sched.h
@@ -478,6 +478,21 @@ extern int get_dumpable(struct mm_struct
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
+#ifdef CONFIG_CGROUPS
+	/*
+	 * The threadgroup_fork_lock is used to prevent any threads in a
+	 * threadgroup from forking with CLONE_THREAD while held for writing,
+	 * used for threadgroup-wide operations that are fork-sensitive. It
+	 * lives here next to sighand.count as a cacheline optimization.
+	 *
+	 * TODO: if anybody besides cgroups uses this lock, change the
+	 * CONFIG_CGROUPS to a higher-up CONFIG_* that the other user and
+	 * cgroups would both depend upon. Also, they'll want to move where
+	 * the readlock happens - it currently lives in kernel/cgroup.c in
+	 * cgroup_{fork,post_fork,fork_failed}().
+	 */
+	struct rw_semaphore	threadgroup_fork_lock;
+#endif
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
diff -puN kernel/cgroup.c~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup kernel/cgroup.c
--- a/kernel/cgroup.c~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup
+++ a/kernel/cgroup.c
@@ -1529,6 +1529,65 @@ int cgroup_path(const struct cgroup *cgr
 }
 
 /**
+ * threadgroup_fork_lock - block all CLONE_THREAD forks in the threadgroup
+ * @tsk: the task whose threadgroup should be locked
+ *
+ * Takes the threadgroup_lock_mutex in the threadgroup's sighand_struct, by
+ * means of searching the threadgroup list for a live thread in the group.
+ * Returns the sighand_struct that should be given to threadgroup_fork_unlock,
+ * or NULL if all threads in the group are exiting and have cleared their
+ * sighand pointers.
+ */
+struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
+{
+	struct sighand_struct *sighand;
+	struct task_struct *p;
+
+	/* tasklist lock protects sighand_struct's disappearance in exit(). */
+	read_lock(&tasklist_lock);
+	if (likely(tsk->sighand)) {
+		/* simple case - check the thread we were given first */
+		sighand = tsk->sighand;
+	} else {
+		sighand = NULL;
+		/*
+		 * tsk is exiting; try to find another thread in the group
+		 * whose sighand pointer is still alive.
+		 */
+		rcu_read_lock();
+		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
+			if (p->sighand) {
+				sighand = tsk->sighand;
+				break;
+			}
+		}
+		rcu_read_unlock();
+	}
+	/* prevent sighand from vanishing before we let go of tasklist_lock */
+	if (likely(sighand))
+		atomic_inc(&sighand->count);
+
+	/* done searching. */
+	read_unlock(&tasklist_lock);
+
+	if (likely(sighand))
+		down_write(&sighand->threadgroup_fork_lock);
+	return sighand;
+}
+
+/**
+ * threadgroup_fork_lock - let threadgroup resume CLONE_THREAD forks.
+ * @sighand: the threadgroup's sighand that threadgroup_fork_lock gave back
+ *
+ * Lets go of the threadgroup_fork_lock, and drops the sighand reference.
+ */
+void threadgroup_fork_unlock(struct sighand_struct *sighand)
+{
+	up_write(&sighand->threadgroup_fork_lock);
+	__cleanup_sighand(sighand);
+}
+
+/**
  * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
  * @cgrp: the cgroup the task is attaching to
  * @tsk: the task to be attached
@@ -3421,8 +3480,10 @@ static struct file_operations proc_cgrou
  * At the point that cgroup_fork() is called, 'current' is the parent
  * task, and the passed argument 'child' points to the child task.
  */
-void cgroup_fork(struct task_struct *child)
+void cgroup_fork(struct task_struct *child, unsigned long clone_flags)
 {
+	if (clone_flags & CLONE_THREAD)
+		down_read(&current->sighand->threadgroup_fork_lock);
 	task_lock(current);
 	child->cgroups = current->cgroups;
 	get_css_set(child->cgroups);
@@ -3459,7 +3520,7 @@ void cgroup_fork_callbacks(struct task_s
  * with the first call to cgroup_iter_start() - to guarantee that the
  * new task ends up on its list.
  */
-void cgroup_post_fork(struct task_struct *child)
+void cgroup_post_fork(struct task_struct *child, unsigned long clone_flags)
 {
 	if (use_task_css_set_links) {
 		write_lock(&css_set_lock);
@@ -3469,6 +3530,8 @@ void cgroup_post_fork(struct task_struct
 		task_unlock(child);
 		write_unlock(&css_set_lock);
 	}
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->sighand->threadgroup_fork_lock);
 }
 /**
  * cgroup_exit - detach cgroup from exiting task
@@ -3540,6 +3603,26 @@ void cgroup_exit(struct task_struct *tsk
 }
 
 /**
+ * cgroup_fork_failed - undo operations for fork failure
+ * @tsk: pointer to  task_struct of exiting process
+ * @run_callback: run exit callbacks?
+ *
+ * Description: Undo cgroup operations after cgroup_fork in fork failure.
+ *
+ * We release the read lock that was taken in cgroup_fork(), since it is
+ * supposed to be dropped in cgroup_post_fork in the success case. The other
+ * thing that wants to be done is detaching the failed child task from the
+ * cgroup, so we wrap cgroup_exit.
+ */
+void cgroup_fork_failed(struct task_struct *tsk, int run_callbacks,
+			unsigned long clone_flags)
+{
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->sighand->threadgroup_fork_lock);
+	cgroup_exit(tsk, run_callbacks);
+}
+
+/**
  * cgroup_clone - clone the cgroup the given subsystem is attached to
  * @tsk: the task to be moved
  * @subsys: the given subsystem
diff -puN kernel/fork.c~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup kernel/fork.c
--- a/kernel/fork.c~cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup
+++ a/kernel/fork.c
@@ -792,6 +792,9 @@ static int copy_sighand(unsigned long cl
 		return -ENOMEM;
 	atomic_set(&sig->count, 1);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+#ifdef CONFIG_CGROUPS
+	init_rwsem(&sig->threadgroup_fork_lock);
+#endif
 	return 0;
 }
 
@@ -1074,7 +1077,7 @@ static struct task_struct *copy_process(
 	monotonic_to_bootbased(&p->real_start_time);
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	cgroup_fork(p);
+	cgroup_fork(p, clone_flags);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
  	if (IS_ERR(p->mempolicy)) {
@@ -1292,7 +1295,7 @@ static struct task_struct *copy_process(
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
-	cgroup_post_fork(p);
+	cgroup_post_fork(p, clone_flags);
 	perf_counter_fork(p);
 	return p;
 
@@ -1324,7 +1327,7 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
-	cgroup_exit(p, cgroup_callbacks_done);
+	cgroup_fork_failed(p, cgroup_callbacks_done, clone_flags);
 	delayacct_tsk_free(p);
 	if (p->binfmt)
 		module_put(p->binfmt->module);
_

Patches currently in -mm which might be from bblum@google.com are

cgroups-add-a-read-only-procs-file-similar-to-tasks-that-shows-only-unique-tgids.patch
cgroups-ensure-correct-concurrent-opening-reading-of-pidlists-across-pid-namespaces.patch
cgroups-use-vmalloc-for-large-cgroups-pidlist-allocations.patch
cgroups-change-css_set-freeing-mechanism-to-be-under-rcu.patch
cgroups-let-ss-can_attach-and-ss-attach-do-whole-threadgroups-at-a-time.patch
cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup.patch
cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup-fix.patch
cgroups-add-ability-to-move-all-threads-in-a-process-to-a-new-cgroup-atomically.patch


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch added to -mm tree
  2009-08-20 21:14 + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup.patch added to -mm tree akpm
@ 2009-08-21 10:26 ` Oleg Nesterov
  2009-08-21 10:45   ` Oleg Nesterov
  0 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2009-08-21 10:26 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, bblum, ebiederm, lizf, matthltc, menage

On 08/20, Andrew Morton wrote:
>
> Subject: cgroups: add functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
> From: Ben Blum <bblum@google.com>
>
> Add an rwsem that lives in a threadgroup's sighand_struct (next to the
> sighand's atomic count, to piggyback on its cacheline), and two functions
> in kernel/cgroup.c (for now) for easily+safely obtaining and releasing it.

Sorry. Currently I have no time to read these patched. Absolutely :/

But the very first change I noticed outside of cgroups.[ch] looks very wrong,

> +struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
> +{
> +	struct sighand_struct *sighand;
> +	struct task_struct *p;
> +
> +	/* tasklist lock protects sighand_struct's disappearance in exit(). */
> +	read_lock(&tasklist_lock);
> +	if (likely(tsk->sighand)) {
> +		/* simple case - check the thread we were given first */
> +		sighand = tsk->sighand;
> +	} else {
> +		sighand = NULL;
> +		/*
> +		 * tsk is exiting; try to find another thread in the group
> +		 * whose sighand pointer is still alive.
> +		 */
> +		rcu_read_lock();
> +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {

If ->sighand == NULL we can't use list_for_each_entry_rcu(->thread_group),
and rcu_read_lock() can't help.

The task was removed from ->thread_group, its ->next points to nowhere.

list_for_rcu(head) can _only_ work if we can trust head->next: it should
point either to "head" (list_empty), or to the valid entry.

Please correct me if I missed something.

Otherwise, please send the changes which touch the process-management
code separately. And please do not forget to CC people who work with
this code ;)

Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch added to -mm tree
  2009-08-21 10:26 ` + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch " Oleg Nesterov
@ 2009-08-21 10:45   ` Oleg Nesterov
  2009-08-21 23:37     ` Paul Menage
  0 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2009-08-21 10:45 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, bblum, ebiederm, lizf, matthltc, menage

In case I wasn't clear.

Let's suppose we have subthreads T1 and T2, and we have a reference to T1.
T1->thread_group->next == T2.

T1 dies, T1->thread_group->next is still T2.

T2 dies, rcu passed, its memory is freed and and re-used.
But T1->thread_group->next is still T2.

Now, we call threadgroup_fork_lock(T1), it sees T1->sighand == NULL and does

	rcu_read_lock();
	list_for_each_entry_rcu(T1->thread_group);

T1->thread_group->next points to nowhere.


Once again, I didn't actually read these patches, perhaps I missed something.

Oleg.

On 08/21, Oleg Nesterov wrote:
>
> On 08/20, Andrew Morton wrote:
> >
> > Subject: cgroups: add functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
> > From: Ben Blum <bblum@google.com>
> >
> > Add an rwsem that lives in a threadgroup's sighand_struct (next to the
> > sighand's atomic count, to piggyback on its cacheline), and two functions
> > in kernel/cgroup.c (for now) for easily+safely obtaining and releasing it.
> 
> Sorry. Currently I have no time to read these patched. Absolutely :/
> 
> But the very first change I noticed outside of cgroups.[ch] looks very wrong,
> 
> > +struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
> > +{
> > +	struct sighand_struct *sighand;
> > +	struct task_struct *p;
> > +
> > +	/* tasklist lock protects sighand_struct's disappearance in exit(). */
> > +	read_lock(&tasklist_lock);
> > +	if (likely(tsk->sighand)) {
> > +		/* simple case - check the thread we were given first */
> > +		sighand = tsk->sighand;
> > +	} else {
> > +		sighand = NULL;
> > +		/*
> > +		 * tsk is exiting; try to find another thread in the group
> > +		 * whose sighand pointer is still alive.
> > +		 */
> > +		rcu_read_lock();
> > +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
> 
> If ->sighand == NULL we can't use list_for_each_entry_rcu(->thread_group),
> and rcu_read_lock() can't help.
> 
> The task was removed from ->thread_group, its ->next points to nowhere.
> 
> list_for_rcu(head) can _only_ work if we can trust head->next: it should
> point either to "head" (list_empty), or to the valid entry.
> 
> Please correct me if I missed something.
> 
> Otherwise, please send the changes which touch the process-management
> code separately. And please do not forget to CC people who work with
> this code ;)
> 
> Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe  r-threadgroup.patch added to -mm tree
  2009-08-21 10:45   ` Oleg Nesterov
@ 2009-08-21 23:37     ` Paul Menage
  2009-08-22 13:09       ` Oleg Nesterov
  0 siblings, 1 reply; 20+ messages in thread
From: Paul Menage @ 2009-08-21 23:37 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: akpm, linux-kernel, bblum, ebiederm, lizf, matthltc

On Fri, Aug 21, 2009 at 3:45 AM, Oleg Nesterov<oleg@redhat.com> wrote:
> In case I wasn't clear.
>
> Let's suppose we have subthreads T1 and T2, and we have a reference to T1.

In this case, T1 is also the thread group leader.

And we hold tasklist_lock around the entire operation. (So the
rcu_read_lock() call is probably a red herring - Li Zefan already
suggested that it be removed).

But you're saying that could still be a problem if tsk exits  before
we even get to this point?

My impression was that if the thread group leader exits, it hangs
around (still attached to its thread group list) until all its threads
have exited. In which case as long as we're holding tasklist_lock, the
thread group list should remain valid.

Paul

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch added to -mm tree
  2009-08-21 23:37     ` Paul Menage
@ 2009-08-22 13:09       ` Oleg Nesterov
  2009-08-22 13:28         ` Paul Menage
       [not found]         ` <20090822130952.GA4240-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 2 replies; 20+ messages in thread
From: Oleg Nesterov @ 2009-08-22 13:09 UTC (permalink / raw)
  To: Paul Menage; +Cc: akpm, linux-kernel, bblum, ebiederm, lizf, matthltc

On 08/21, Paul Menage wrote:
>
> On Fri, Aug 21, 2009 at 3:45 AM, Oleg Nesterov<oleg@redhat.com> wrote:
> > In case I wasn't clear.
> >
> > Let's suppose we have subthreads T1 and T2, and we have a reference to T1.
>
> In this case, T1 is also the thread group leader.

You forced me to take a look at the next patch,
cgroups-add-ability-to-move-all-threads-in-a-process-to-a-new-cgroup-atomically.patch
which uses this helper ;) please see below.

> And we hold tasklist_lock around the entire operation. (So the
> rcu_read_lock() call is probably a red herring - Li Zefan already
> suggested that it be removed).

Yes, rcu_read_lock() is not needed under tasklist to iterate over
->thread_group.

> But you're saying that could still be a problem if tsk exits  before
> we even get to this point?
>
> My impression was that if the thread group leader exits, it hangs
> around (still attached to its thread group list) until all its threads
> have exited.

Yes, unless the non-leader execs, in this case the leader can die before
sub-thread, the execing thread becomes the new leader.

IOW. Yes, ->group_leader dies last, but exec can change ->group_leader.

> In which case as long as we're holding tasklist_lock, the
> thread group list should remain valid.

Only if it was valid before we take tasklist.



OK, cgroups-add-ability-to-move-all-threads-in-a-process-to-a-new-cgroup-atomically.patch
does

	threadgroup_sighand = threadgroup_fork_lock(leader);

	rcu_read_lock();
	list_for_each_entry_rcu(tsk, &leader->thread_group, thread_group)

and this is equally wrong afais. Hmm, and other similar
list_for_each_entry_rcu's doesn't look right. This code can do something
like

	rcu_read_lock();
	if (!tsk->sighand)	// tsk is leader or not, doesn't matter
		fail;
	list_for_each_rcu(tsk->thread_group) {}
	rcu_read_unlock();

though.



And why do we need sighand->threadgroup_fork_lock ? I gueess, to protect
against clone(CLONE_THREAD).

But. threadgroup_fork_lock() has another problem. Even if the process
P is singlethreaded, I can't see how ->threadgroup_fork_lock work.

threadgroup_fork_lock() bumps P->sighand->count. If P exec, it will
notice sighand->count != 1 and switch to another ->sighand.

Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe  r-threadgroup.patch added to -mm tree
  2009-08-22 13:09       ` Oleg Nesterov
@ 2009-08-22 13:28         ` Paul Menage
       [not found]         ` <20090822130952.GA4240-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  1 sibling, 0 replies; 20+ messages in thread
From: Paul Menage @ 2009-08-22 13:28 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: akpm, linux-kernel, bblum, ebiederm, lizf, matthltc

On Sat, Aug 22, 2009 at 6:09 AM, Oleg Nesterov<oleg@redhat.com> wrote:
>
>
> And why do we need sighand->threadgroup_fork_lock ? I gueess, to protect
> against clone(CLONE_THREAD).

Right - we want to be able to atomically move all the threads in the
thread group into a new cgroup, without leaving any behind if we
happen to race with a clone(CLONE_THREAD).

Putting the lock in the sighand structure seemed like an appropriate
place since it's involved in existing clone() synchronization.

>
> threadgroup_fork_lock() bumps P->sighand->count. If P exec, it will
> notice sighand->count != 1 and switch to another ->sighand.

So maybe we should also down_read(threadgroup_fork_lock) in the exec
path? That would prevent a child thread from execing and taking over
the group leadership, so it would remain safe to iterate over the
group leader's thread list.

Paul

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch added to -mm tree
  2009-08-22 13:09       ` Oleg Nesterov
@ 2010-01-03 19:06             ` Ben Blum
       [not found]         ` <20090822130952.GA4240-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  1 sibling, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-03 19:06 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: bblum-OM76b2Iv3yLQjUSlxSEPGw, bblum-hpIqsD4AKlfQT0dZR+AlfA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Paul Menage, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b

On Sat, Aug 22, 2009 at 03:09:52PM +0200, Oleg Nesterov wrote:
> 
> OK, cgroups-add-ability-to-move-all-threads-in-a-process-to-a-new-cgroup-atomically.patch
> does
> 
> 	threadgroup_sighand = threadgroup_fork_lock(leader);
> 
> 	rcu_read_lock();
> 	list_for_each_entry_rcu(tsk, &leader->thread_group, thread_group)
> 
> and this is equally wrong afais. Hmm, and other similar
> list_for_each_entry_rcu's doesn't look right. This code can do something
> like
> 
> 	rcu_read_lock();
> 	if (!tsk->sighand)	// tsk is leader or not, doesn't matter
> 		fail;
> 	list_for_each_rcu(tsk->thread_group) {}
> 	rcu_read_unlock();
> 
> though.
> 
> 
> 
> And why do we need sighand->threadgroup_fork_lock ? I gueess, to protect
> against clone(CLONE_THREAD).
> 
> But. threadgroup_fork_lock() has another problem. Even if the process
> P is singlethreaded, I can't see how ->threadgroup_fork_lock work.
> 
> threadgroup_fork_lock() bumps P->sighand->count. If P exec, it will
> notice sighand->count != 1 and switch to another ->sighand.
> 
> Oleg.

So how about this: Each time we take tasklist_lock to iterate over
thread_group (once in getting the sighand, once to move all the tasks),
check if we raced with exec. The two problems are 1) group_leader
changes - we'll need to find the new leader's task_struct anyway - means
we can't iterate over thread_group, and 2) sighand changes after we take
the old one, means we've taken a useless lock.

I put together draft revisions of the old patches that check for racing
with exec in both cases, and if so, returning EAGAIN. I have the wrapper
function cgroup_procs_write loop around the return value, but it could
also possibly give EAGAIN back to userspace. Hopefully the code is safe
and sane this time :)

-- bblum

---
 Documentation/cgroups/cgroups.txt |    7
 include/linux/cgroup.h            |   14 -
 include/linux/init_task.h         |    9
 include/linux/sched.h             |   15 +
 kernel/cgroup.c                   |  519 ++++++++++++++++++++++++++++++++++----
 kernel/fork.c                     |    9
 6 files changed, 524 insertions(+), 49 deletions(-)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch added to -mm tree
@ 2010-01-03 19:06             ` Ben Blum
  0 siblings, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-03 19:06 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Paul Menage, akpm, linux-kernel, bblum, ebiederm, lizf, matthltc,
	containers, bblum

On Sat, Aug 22, 2009 at 03:09:52PM +0200, Oleg Nesterov wrote:
> 
> OK, cgroups-add-ability-to-move-all-threads-in-a-process-to-a-new-cgroup-atomically.patch
> does
> 
> 	threadgroup_sighand = threadgroup_fork_lock(leader);
> 
> 	rcu_read_lock();
> 	list_for_each_entry_rcu(tsk, &leader->thread_group, thread_group)
> 
> and this is equally wrong afais. Hmm, and other similar
> list_for_each_entry_rcu's doesn't look right. This code can do something
> like
> 
> 	rcu_read_lock();
> 	if (!tsk->sighand)	// tsk is leader or not, doesn't matter
> 		fail;
> 	list_for_each_rcu(tsk->thread_group) {}
> 	rcu_read_unlock();
> 
> though.
> 
> 
> 
> And why do we need sighand->threadgroup_fork_lock ? I gueess, to protect
> against clone(CLONE_THREAD).
> 
> But. threadgroup_fork_lock() has another problem. Even if the process
> P is singlethreaded, I can't see how ->threadgroup_fork_lock work.
> 
> threadgroup_fork_lock() bumps P->sighand->count. If P exec, it will
> notice sighand->count != 1 and switch to another ->sighand.
> 
> Oleg.

So how about this: Each time we take tasklist_lock to iterate over
thread_group (once in getting the sighand, once to move all the tasks),
check if we raced with exec. The two problems are 1) group_leader
changes - we'll need to find the new leader's task_struct anyway - means
we can't iterate over thread_group, and 2) sighand changes after we take
the old one, means we've taken a useless lock.

I put together draft revisions of the old patches that check for racing
with exec in both cases, and if so, returning EAGAIN. I have the wrapper
function cgroup_procs_write loop around the return value, but it could
also possibly give EAGAIN back to userspace. Hopefully the code is safe
and sane this time :)

-- bblum

---
 Documentation/cgroups/cgroups.txt |    7
 include/linux/cgroup.h            |   14 -
 include/linux/init_task.h         |    9
 include/linux/sched.h             |   15 +
 kernel/cgroup.c                   |  519 ++++++++++++++++++++++++++++++++++----
 kernel/fork.c                     |    9
 6 files changed, 524 insertions(+), 49 deletions(-)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
  2010-01-03 19:06             ` Ben Blum
@ 2010-01-03 19:07                 ` Ben Blum
  -1 siblings, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-03 19:07 UTC (permalink / raw)
  To: Oleg Nesterov, Paul Menage,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bblum-hpIqsD4AKlfQT0dZR+AlfA, ebiederm-aS9lmoZGLiVWk0Htik3J/w,
	lizf

[-- Attachment #1: cgroup-threadgroup-fork-lock.patch --]
[-- Type: text/plain, Size: 10573 bytes --]

Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup

From: Ben Blum <bblum-OM76b2Iv3yLQjUSlxSEPGw@public.gmane.org>

This patch adds an rwsem that lives in a threadgroup's sighand_struct (next to
the sighand's atomic count, to piggyback on its cacheline), and two functions
in kernel/cgroup.c (for now) for easily+safely obtaining and releasing it. If
another part of the kernel later wants to use such a locking mechanism, the
CONFIG_CGROUPS ifdefs should be changed to a higher-up flag that CGROUPS and
the other system would both depend on, and the lock/unlock functions could be
moved to sched.c or so.

This is a pre-patch for cgroups-procs-write.patch.

Signed-off-by: Ben Blum <bblum-OM76b2Iv3yLQjUSlxSEPGw@public.gmane.org>
---
 include/linux/cgroup.h    |   14 +++++--
 include/linux/init_task.h |    9 ++++
 include/linux/sched.h     |   15 +++++++
 kernel/cgroup.c           |   93 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/fork.c             |    9 +++-
 5 files changed, 131 insertions(+), 9 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9be4c22..2eb54bb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -30,10 +30,12 @@ extern int cgroup_init(void);
 extern void cgroup_lock(void);
 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
-extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_fork_callbacks(struct task_struct *p);
-extern void cgroup_post_fork(struct task_struct *p);
+extern void cgroup_post_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern void cgroup_fork_failed(struct task_struct *p, int run_callbacks,
+			       unsigned long clone_flags);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 extern int cgroup_load_subsys(struct cgroup_subsys *ss);
@@ -580,10 +582,14 @@ unsigned short css_depth(struct cgroup_subsys_state *css);
 
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
-static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork(struct task_struct *p,
+			       unsigned long clone_flags) {}
 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
-static inline void cgroup_post_fork(struct task_struct *p) {}
+static inline void cgroup_post_fork(struct task_struct *p,
+				    unsigned long clone_flags) {}
 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+static inline void cgroup_fork_failed(struct task_struct *p, int callbacks,
+				      unsigned long clone_flags) {}
 
 static inline void cgroup_lock(void) {}
 static inline void cgroup_unlock(void) {}
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 8ed0abf..aaa4b9c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -41,7 +41,16 @@ extern struct nsproxy init_nsproxy;
 	INIT_IPC_NS(ipc_ns)						\
 }
 
+#ifdef CONFIG_CGROUPS
+# define INIT_THREADGROUP_FORK_LOCK(sighand)				\
+	.threadgroup_fork_lock = 					\
+		__RWSEM_INITIALIZER(sighand.threadgroup_fork_lock),
+#else
+# define INIT_THREADGROUP_FORK_LOCK(sighand)
+#endif
+
 #define INIT_SIGHAND(sighand) {						\
+	INIT_THREADGROUP_FORK_LOCK(sighand)				\
 	.count		= ATOMIC_INIT(1), 				\
 	.action		= { { { .sa_handler = NULL, } }, },		\
 	.siglock	= __SPIN_LOCK_UNLOCKED(sighand.siglock),	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 23b26c7..10a22a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -475,6 +475,21 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
+#ifdef CONFIG_CGROUPS
+	/*
+	 * The threadgroup_fork_lock is used to prevent any threads in a
+	 * threadgroup from forking with CLONE_THREAD while held for writing,
+	 * used for threadgroup-wide operations that are fork-sensitive. It
+	 * lives here next to sighand.count as a cacheline optimization.
+	 *
+	 * TODO: if anybody besides cgroups uses this lock, change the
+	 * CONFIG_CGROUPS to a higher-up CONFIG_* that the other user and
+	 * cgroups would both depend upon. Also, they'll want to move where
+	 * the readlock happens - it currently lives in kernel/cgroup.c in
+	 * cgroup_{fork,post_fork,fork_failed}().
+	 */
+	struct rw_semaphore	threadgroup_fork_lock;
+#endif
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cc2e1f6..99782a0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1623,6 +1623,71 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 }
 
 /**
+ * threadgroup_fork_lock - block all CLONE_THREAD forks in the threadgroup
+ * @tsk: the task whose threadgroup should be locked
+ *
+ * Takes the threadgroup_lock_mutex in the threadgroup's sighand_struct, by
+ * means of searching the threadgroup list for a live thread in the group.
+ * Returns the sighand_struct that should be given to threadgroup_fork_unlock,
+ * or -ESRCH if all threads in the group are exiting and have cleared their
+ * sighand pointers, or -EAGAIN if tsk is not the threadgroup leader.
+ */
+struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
+{
+	struct sighand_struct *sighand;
+	struct task_struct *p;
+
+	/* tasklist lock protects sighand_struct's disappearance in exit(). */
+	read_lock(&tasklist_lock);
+
+	/* make sure the threadgroup's state is sane before we proceed */
+	if (unlikely(!thread_group_leader(tsk))) {
+		/* a race with de_thread() stripped us of our leadership */
+		read_unlock(&tasklist_lock);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	/* now try to find a sighand */
+	if (likely(tsk->sighand)) {
+		sighand = tsk->sighand;
+	} else {
+		sighand = ERR_PTR(-ESRCH);
+		/*
+		 * tsk is exiting; try to find another thread in the group
+		 * whose sighand pointer is still alive.
+		 */
+		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
+			if (p->sighand) {
+				sighand = tsk->sighand;
+				break;
+			}
+		}
+	}
+	/* prevent sighand from vanishing before we let go of tasklist_lock */
+	if (likely(sighand))
+		atomic_inc(&sighand->count);
+
+	/* done searching. */
+	read_unlock(&tasklist_lock);
+
+	if (likely(sighand))
+		down_write(&sighand->threadgroup_fork_lock);
+	return sighand;
+}
+
+/**
+ * threadgroup_fork_lock - let threadgroup resume CLONE_THREAD forks.
+ * @sighand: the threadgroup's sighand that threadgroup_fork_lock gave back
+ *
+ * Lets go of the threadgroup_fork_lock, and drops the sighand reference.
+ */
+void threadgroup_fork_unlock(struct sighand_struct *sighand)
+{
+	up_write(&sighand->threadgroup_fork_lock);
+	__cleanup_sighand(sighand);
+}
+
+/**
  * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
  * @cgrp: the cgroup the task is attaching to
  * @tsk: the task to be attached
@@ -3713,8 +3778,10 @@ static const struct file_operations proc_cgroupstats_operations = {
  * At the point that cgroup_fork() is called, 'current' is the parent
  * task, and the passed argument 'child' points to the child task.
  */
-void cgroup_fork(struct task_struct *child)
+void cgroup_fork(struct task_struct *child, unsigned long clone_flags)
 {
+	if (clone_flags & CLONE_THREAD)
+		down_read(&current->sighand->threadgroup_fork_lock);
 	task_lock(current);
 	child->cgroups = current->cgroups;
 	get_css_set(child->cgroups);
@@ -3756,7 +3823,7 @@ void cgroup_fork_callbacks(struct task_struct *child)
  * with the first call to cgroup_iter_start() - to guarantee that the
  * new task ends up on its list.
  */
-void cgroup_post_fork(struct task_struct *child)
+void cgroup_post_fork(struct task_struct *child, unsigned long clone_flags)
 {
 	if (use_task_css_set_links) {
 		write_lock(&css_set_lock);
@@ -3766,6 +3833,8 @@ void cgroup_post_fork(struct task_struct *child)
 		task_unlock(child);
 		write_unlock(&css_set_lock);
 	}
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->sighand->threadgroup_fork_lock);
 }
 /**
  * cgroup_exit - detach cgroup from exiting task
@@ -3841,6 +3910,26 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 }
 
 /**
+ * cgroup_fork_failed - undo operations for fork failure
+ * @tsk: pointer to  task_struct of exiting process
+ * @run_callback: run exit callbacks?
+ *
+ * Description: Undo cgroup operations after cgroup_fork in fork failure.
+ *
+ * We release the read lock that was taken in cgroup_fork(), since it is
+ * supposed to be dropped in cgroup_post_fork in the success case. The other
+ * thing that wants to be done is detaching the failed child task from the
+ * cgroup, so we wrap cgroup_exit.
+ */
+void cgroup_fork_failed(struct task_struct *tsk, int run_callbacks,
+			unsigned long clone_flags)
+{
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->sighand->threadgroup_fork_lock);
+	cgroup_exit(tsk, run_callbacks);
+}
+
+/**
  * cgroup_clone - clone the cgroup the given subsystem is attached to
  * @tsk: the task to be moved
  * @subsys: the given subsystem
diff --git a/kernel/fork.c b/kernel/fork.c
index 404e6ca..daf5967 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -809,6 +809,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 		return -ENOMEM;
 	atomic_set(&sig->count, 1);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+#ifdef CONFIG_CGROUPS
+	init_rwsem(&sig->threadgroup_fork_lock);
+#endif
 	return 0;
 }
 
@@ -1091,7 +1094,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	monotonic_to_bootbased(&p->real_start_time);
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	cgroup_fork(p);
+	cgroup_fork(p, clone_flags);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
  	if (IS_ERR(p->mempolicy)) {
@@ -1316,7 +1319,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
-	cgroup_post_fork(p);
+	cgroup_post_fork(p, clone_flags);
 	perf_event_fork(p);
 	return p;
 
@@ -1350,7 +1353,7 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
-	cgroup_exit(p, cgroup_callbacks_done);
+	cgroup_fork_failed(p, cgroup_callbacks_done, clone_flags);
 	delayacct_tsk_free(p);
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
@ 2010-01-03 19:07                 ` Ben Blum
  0 siblings, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-03 19:07 UTC (permalink / raw)
  To: Oleg Nesterov, Paul Menage, akpm, linux-kernel, bblum, ebiederm,
	lizf, matthltc, containers, bblum

[-- Attachment #1: cgroup-threadgroup-fork-lock.patch --]
[-- Type: text/plain, Size: 10523 bytes --]

Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup

From: Ben Blum <bblum@andrew.cmu.edu>

This patch adds an rwsem that lives in a threadgroup's sighand_struct (next to
the sighand's atomic count, to piggyback on its cacheline), and two functions
in kernel/cgroup.c (for now) for easily+safely obtaining and releasing it. If
another part of the kernel later wants to use such a locking mechanism, the
CONFIG_CGROUPS ifdefs should be changed to a higher-up flag that CGROUPS and
the other system would both depend on, and the lock/unlock functions could be
moved to sched.c or so.

This is a pre-patch for cgroups-procs-write.patch.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
---
 include/linux/cgroup.h    |   14 +++++--
 include/linux/init_task.h |    9 ++++
 include/linux/sched.h     |   15 +++++++
 kernel/cgroup.c           |   93 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/fork.c             |    9 +++-
 5 files changed, 131 insertions(+), 9 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9be4c22..2eb54bb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -30,10 +30,12 @@ extern int cgroup_init(void);
 extern void cgroup_lock(void);
 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
-extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_fork_callbacks(struct task_struct *p);
-extern void cgroup_post_fork(struct task_struct *p);
+extern void cgroup_post_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern void cgroup_fork_failed(struct task_struct *p, int run_callbacks,
+			       unsigned long clone_flags);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 extern int cgroup_load_subsys(struct cgroup_subsys *ss);
@@ -580,10 +582,14 @@ unsigned short css_depth(struct cgroup_subsys_state *css);
 
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
-static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork(struct task_struct *p,
+			       unsigned long clone_flags) {}
 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
-static inline void cgroup_post_fork(struct task_struct *p) {}
+static inline void cgroup_post_fork(struct task_struct *p,
+				    unsigned long clone_flags) {}
 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+static inline void cgroup_fork_failed(struct task_struct *p, int callbacks,
+				      unsigned long clone_flags) {}
 
 static inline void cgroup_lock(void) {}
 static inline void cgroup_unlock(void) {}
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 8ed0abf..aaa4b9c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -41,7 +41,16 @@ extern struct nsproxy init_nsproxy;
 	INIT_IPC_NS(ipc_ns)						\
 }
 
+#ifdef CONFIG_CGROUPS
+# define INIT_THREADGROUP_FORK_LOCK(sighand)				\
+	.threadgroup_fork_lock = 					\
+		__RWSEM_INITIALIZER(sighand.threadgroup_fork_lock),
+#else
+# define INIT_THREADGROUP_FORK_LOCK(sighand)
+#endif
+
 #define INIT_SIGHAND(sighand) {						\
+	INIT_THREADGROUP_FORK_LOCK(sighand)				\
 	.count		= ATOMIC_INIT(1), 				\
 	.action		= { { { .sa_handler = NULL, } }, },		\
 	.siglock	= __SPIN_LOCK_UNLOCKED(sighand.siglock),	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 23b26c7..10a22a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -475,6 +475,21 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
+#ifdef CONFIG_CGROUPS
+	/*
+	 * The threadgroup_fork_lock is used to prevent any threads in a
+	 * threadgroup from forking with CLONE_THREAD while held for writing,
+	 * used for threadgroup-wide operations that are fork-sensitive. It
+	 * lives here next to sighand.count as a cacheline optimization.
+	 *
+	 * TODO: if anybody besides cgroups uses this lock, change the
+	 * CONFIG_CGROUPS to a higher-up CONFIG_* that the other user and
+	 * cgroups would both depend upon. Also, they'll want to move where
+	 * the readlock happens - it currently lives in kernel/cgroup.c in
+	 * cgroup_{fork,post_fork,fork_failed}().
+	 */
+	struct rw_semaphore	threadgroup_fork_lock;
+#endif
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cc2e1f6..99782a0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1623,6 +1623,71 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 }
 
 /**
+ * threadgroup_fork_lock - block all CLONE_THREAD forks in the threadgroup
+ * @tsk: the task whose threadgroup should be locked
+ *
+ * Takes the threadgroup_lock_mutex in the threadgroup's sighand_struct, by
+ * means of searching the threadgroup list for a live thread in the group.
+ * Returns the sighand_struct that should be given to threadgroup_fork_unlock,
+ * or -ESRCH if all threads in the group are exiting and have cleared their
+ * sighand pointers, or -EAGAIN if tsk is not the threadgroup leader.
+ */
+struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
+{
+	struct sighand_struct *sighand;
+	struct task_struct *p;
+
+	/* tasklist lock protects sighand_struct's disappearance in exit(). */
+	read_lock(&tasklist_lock);
+
+	/* make sure the threadgroup's state is sane before we proceed */
+	if (unlikely(!thread_group_leader(tsk))) {
+		/* a race with de_thread() stripped us of our leadership */
+		read_unlock(&tasklist_lock);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	/* now try to find a sighand */
+	if (likely(tsk->sighand)) {
+		sighand = tsk->sighand;
+	} else {
+		sighand = ERR_PTR(-ESRCH);
+		/*
+		 * tsk is exiting; try to find another thread in the group
+		 * whose sighand pointer is still alive.
+		 */
+		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
+			if (p->sighand) {
+				sighand = tsk->sighand;
+				break;
+			}
+		}
+	}
+	/* prevent sighand from vanishing before we let go of tasklist_lock */
+	if (likely(sighand))
+		atomic_inc(&sighand->count);
+
+	/* done searching. */
+	read_unlock(&tasklist_lock);
+
+	if (likely(sighand))
+		down_write(&sighand->threadgroup_fork_lock);
+	return sighand;
+}
+
+/**
+ * threadgroup_fork_lock - let threadgroup resume CLONE_THREAD forks.
+ * @sighand: the threadgroup's sighand that threadgroup_fork_lock gave back
+ *
+ * Lets go of the threadgroup_fork_lock, and drops the sighand reference.
+ */
+void threadgroup_fork_unlock(struct sighand_struct *sighand)
+{
+	up_write(&sighand->threadgroup_fork_lock);
+	__cleanup_sighand(sighand);
+}
+
+/**
  * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
  * @cgrp: the cgroup the task is attaching to
  * @tsk: the task to be attached
@@ -3713,8 +3778,10 @@ static const struct file_operations proc_cgroupstats_operations = {
  * At the point that cgroup_fork() is called, 'current' is the parent
  * task, and the passed argument 'child' points to the child task.
  */
-void cgroup_fork(struct task_struct *child)
+void cgroup_fork(struct task_struct *child, unsigned long clone_flags)
 {
+	if (clone_flags & CLONE_THREAD)
+		down_read(&current->sighand->threadgroup_fork_lock);
 	task_lock(current);
 	child->cgroups = current->cgroups;
 	get_css_set(child->cgroups);
@@ -3756,7 +3823,7 @@ void cgroup_fork_callbacks(struct task_struct *child)
  * with the first call to cgroup_iter_start() - to guarantee that the
  * new task ends up on its list.
  */
-void cgroup_post_fork(struct task_struct *child)
+void cgroup_post_fork(struct task_struct *child, unsigned long clone_flags)
 {
 	if (use_task_css_set_links) {
 		write_lock(&css_set_lock);
@@ -3766,6 +3833,8 @@ void cgroup_post_fork(struct task_struct *child)
 		task_unlock(child);
 		write_unlock(&css_set_lock);
 	}
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->sighand->threadgroup_fork_lock);
 }
 /**
  * cgroup_exit - detach cgroup from exiting task
@@ -3841,6 +3910,26 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 }
 
 /**
+ * cgroup_fork_failed - undo operations for fork failure
+ * @tsk: pointer to  task_struct of exiting process
+ * @run_callback: run exit callbacks?
+ *
+ * Description: Undo cgroup operations after cgroup_fork in fork failure.
+ *
+ * We release the read lock that was taken in cgroup_fork(), since it is
+ * supposed to be dropped in cgroup_post_fork in the success case. The other
+ * thing that wants to be done is detaching the failed child task from the
+ * cgroup, so we wrap cgroup_exit.
+ */
+void cgroup_fork_failed(struct task_struct *tsk, int run_callbacks,
+			unsigned long clone_flags)
+{
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->sighand->threadgroup_fork_lock);
+	cgroup_exit(tsk, run_callbacks);
+}
+
+/**
  * cgroup_clone - clone the cgroup the given subsystem is attached to
  * @tsk: the task to be moved
  * @subsys: the given subsystem
diff --git a/kernel/fork.c b/kernel/fork.c
index 404e6ca..daf5967 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -809,6 +809,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 		return -ENOMEM;
 	atomic_set(&sig->count, 1);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+#ifdef CONFIG_CGROUPS
+	init_rwsem(&sig->threadgroup_fork_lock);
+#endif
 	return 0;
 }
 
@@ -1091,7 +1094,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	monotonic_to_bootbased(&p->real_start_time);
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	cgroup_fork(p);
+	cgroup_fork(p, clone_flags);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
  	if (IS_ERR(p->mempolicy)) {
@@ -1316,7 +1319,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
-	cgroup_post_fork(p);
+	cgroup_post_fork(p, clone_flags);
 	perf_event_fork(p);
 	return p;
 
@@ -1350,7 +1353,7 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
-	cgroup_exit(p, cgroup_callbacks_done);
+	cgroup_fork_failed(p, cgroup_callbacks_done, clone_flags);
 	delayacct_tsk_free(p);
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RFC] [PATCH 2/2] cgroups: make procs file writable
  2010-01-03 19:06             ` Ben Blum
@ 2010-01-03 19:09                 ` Ben Blum
  -1 siblings, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-03 19:09 UTC (permalink / raw)
  To: Oleg Nesterov, Paul Menage,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bblum-hpIqsD4AKlfQT0dZR+AlfA, ebiederm-aS9lmoZGLiVWk0Htik3J/w,
	lizf

[-- Attachment #1: cgroup-procs-writable.patch --]
[-- Type: text/plain, Size: 17301 bytes --]

Makes procs file writable to move all threads by tgid at once

From: Ben Blum <bblum-OM76b2Iv3yLQjUSlxSEPGw@public.gmane.org>

This patch adds functionality that enables users to move all threads in a
threadgroup at once to a cgroup by writing the tgid to the 'cgroup.procs'
file. This current implementation makes use of a per-threadgroup rwsem that's
taken for reading in the fork() path to prevent newly forking threads within
the threadgroup from "escaping" while the move is in progress.

Signed-off-by: Ben Blum <bblum-OM76b2Iv3yLQjUSlxSEPGw@public.gmane.org>
---
 Documentation/cgroups/cgroups.txt |    7 +
 kernel/cgroup.c                   |  426 ++++++++++++++++++++++++++++++++++---
 2 files changed, 393 insertions(+), 40 deletions(-)

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 7527bac..a5f1e6a 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -9,6 +9,7 @@ Portions Copyright (C) 2004 BULL SA.
 Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj-sJ/iWh9BUns@public.gmane.org>
 Modified by Christoph Lameter <clameter-sJ/iWh9BUns@public.gmane.org>
+Modified by Ben Blum <bblum-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
 
 CONTENTS:
 =========
@@ -415,6 +416,12 @@ You can attach the current shell task by echoing 0:
 
 # echo 0 > tasks
 
+You can use the cgroup.procs file instead of the tasks file to move all
+threads in a threadgroup at once. Echoing the pid of any task in a
+threadgroup to cgroup.procs causes all tasks in that threadgroup to be
+be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
+in the writing task's threadgroup.
+
 2.3 Mounting hierarchies by name
 --------------------------------
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 99782a0..f79d70b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1622,6 +1622,87 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 	return 0;
 }
 
+/*
+ * cgroup_task_migrate - move a task from one cgroup to another.
+ *
+ * 'guarantee' is set if the caller promises that a new css_set for the task
+ * will already exist. If not set, this function might sleep, and can fail
+ * with -ENOMEM. Otherwise, it can only fail with -ESRCH.
+ */
+static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
+			       struct task_struct *tsk, int guarantee)
+{
+	struct css_set *oldcg;
+	struct css_set *newcg;
+
+	/*
+	 * get old css_set. we need to take task_lock and refcount it, because
+	 * an exiting task can change its css_set to init_css_set and drop its
+	 * old one without taking cgroup_mutex.
+	 */
+	task_lock(tsk);
+	oldcg = tsk->cgroups;
+	get_css_set(oldcg);
+	task_unlock(tsk);
+
+	/*
+	 * locate or allocate a new css_set for this task. 'guarantee' tells
+	 * us whether or not we are sure that a new css_set already exists;
+	 * in that case, we are not allowed to fail or sleep, as we won't need
+	 * malloc.
+	 */
+	if (guarantee) {
+		/*
+		 * our caller promises us that the css_set we want already
+		 * exists, so we use find_existing_css_set directly.
+		 */
+		struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+		read_lock(&css_set_lock);
+		newcg = find_existing_css_set(oldcg, cgrp, template);
+		BUG_ON(!newcg);
+		get_css_set(newcg);
+		read_unlock(&css_set_lock);
+	} else {
+		might_sleep();
+		/* find_css_set will give us newcg already referenced. */
+		newcg = find_css_set(oldcg, cgrp);
+		if (!newcg) {
+			put_css_set(oldcg);
+			return -ENOMEM;
+		}
+	}
+	put_css_set(oldcg);
+
+	/*
+	 * we cannot move a task that's declared itself as exiting, as once
+	 * PF_EXITING is set, the tsk->cgroups pointer is no longer safe.
+	 */
+	task_lock(tsk);
+	if (tsk->flags & PF_EXITING) {
+		task_unlock(tsk);
+		put_css_set(newcg);
+		return -ESRCH;
+	}
+	rcu_assign_pointer(tsk->cgroups, newcg);
+	task_unlock(tsk);
+
+	/* Update the css_set linked lists if we're using them */
+	write_lock(&css_set_lock);
+	if (!list_empty(&tsk->cg_list))
+		list_move(&tsk->cg_list, &newcg->tasks);
+	write_unlock(&css_set_lock);
+
+	/*
+	 * We just gained a reference on oldcg by taking it from the task. As
+	 * trading it for newcg is protected by cgroup_mutex, we're safe to
+	 * drop it here; it will be freed under RCU.
+	 */
+	put_css_set(oldcg);
+
+	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+	return 0;
+}
+
 /**
  * threadgroup_fork_lock - block all CLONE_THREAD forks in the threadgroup
  * @tsk: the task whose threadgroup should be locked
@@ -1697,11 +1778,9 @@ void threadgroup_fork_unlock(struct sighand_struct *sighand)
  */
 int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
-	int retval = 0;
+	int retval;
 	struct cgroup_subsys *ss;
 	struct cgroup *oldcgrp;
-	struct css_set *cg;
-	struct css_set *newcg;
 	struct cgroupfs_root *root = cgrp->root;
 
 	/* Nothing to do if the task is already in that cgroup */
@@ -1717,75 +1796,326 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		}
 	}
 
-	task_lock(tsk);
-	cg = tsk->cgroups;
-	get_css_set(cg);
-	task_unlock(tsk);
+	retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, 0);
+	if (retval)
+		return retval;
+
+	for_each_subsys(root, ss) {
+		if (ss->attach)
+			ss->attach(ss, cgrp, oldcgrp, tsk, false);
+	}
+
+	synchronize_rcu();
+
 	/*
-	 * Locate or allocate a new css_set for this task,
-	 * based on its final set of cgroups
+	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
+	 * is no longer empty.
 	 */
+	cgroup_wakeup_rmdir_waiter(cgrp);
+	return 0;
+}
+
+/*
+ * cgroup_attach_proc works in two stages, the first of which prefetches all
+ * new css_sets needed (to make sure we have enough memory before committing
+ * to the move) and stores them in a list, of entries of the following type.
+ * TODO: possible optimization: use css_set->rcu_head for chaining instead
+ */
+struct cg_list_entry {
+	struct css_set *cg;
+	struct list_head links;
+};
+
+static bool css_set_check_fetched(struct cgroup *cgrp,
+				  struct task_struct *tsk, struct css_set *cg,
+				  struct list_head *newcg_list)
+{
+	struct css_set *newcg;
+	struct cg_list_entry *cg_entry;
+	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+
+	read_lock(&css_set_lock);
+	newcg = find_existing_css_set(cg, cgrp, template);
+	if (newcg)
+		get_css_set(newcg);
+	read_unlock(&css_set_lock);
+
+	/* doesn't exist at all? */
+	if (!newcg)
+		return false;
+	/* see if it's already in the list */
+	list_for_each_entry(cg_entry, newcg_list, links) {
+		if (cg_entry->cg == newcg) {
+			put_css_set(newcg);
+			return true;
+		}
+	}
+
+	/* not found */
+	put_css_set(newcg);
+	return false;
+}
+
+/*
+ * Find the new css_set and store it in the list in preparation for moving
+ * the given task to the given cgroup. Returns 0 on success, -ENOMEM if we
+ * run out of memory.
+ */
+static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
+			    struct list_head *newcg_list)
+{
+	struct css_set *newcg;
+	struct cg_list_entry *cg_entry;
+	/* ensure a new css_set will exist for this thread */
 	newcg = find_css_set(cg, cgrp);
-	put_css_set(cg);
 	if (!newcg)
 		return -ENOMEM;
+	/* add new element to list */
+	cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
+	if (!cg_entry) {
+		put_css_set(newcg);
+		return -ENOMEM;
+	}
+	cg_entry->cg = newcg;
+	list_add(&cg_entry->links, newcg_list);
+	return 0;
+}
 
-	task_lock(tsk);
-	if (tsk->flags & PF_EXITING) {
+/**
+ * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
+ * @cgrp: the cgroup to attach to
+ * @leader: the threadgroup leader task_struct of the group to be attached
+ *
+ * Call holding cgroup_mutex. Will take task_lock of each thread in leader's
+ * threadgroup individually in turn.
+ */
+int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
+{
+	int retval;
+	struct cgroup_subsys *ss;
+	struct cgroup *oldcgrp;
+	struct css_set *oldcg;
+	struct cgroupfs_root *root = cgrp->root;
+	/* threadgroup list cursor */
+	struct task_struct *tsk;
+	/*
+	 * we need to make sure we have css_sets for all the tasks we're
+	 * going to move -before- we actually start moving them, so that in
+	 * case we get an ENOMEM we can bail out before making any changes.
+	 */
+	struct list_head newcg_list;
+	struct cg_list_entry *cg_entry;
+	/* needed for locking the threadgroup */
+	struct sighand_struct *threadgroup_sighand;
+
+	/*
+	 * because of possible races with de_thread() we can't distinguish
+	 * between the case where the user gives a non-leader tid and the case
+	 * where it changes out from under us.
+	 */
+	leader = leader->group_leader;
+
+	/*
+	 * check that we can legitimately attach to the cgroup.
+	 */
+	for_each_subsys(root, ss) {
+		if (ss->can_attach) {
+			retval = ss->can_attach(ss, cgrp, leader, true);
+			if (retval)
+				return retval;
+		}
+	}
+
+	/*
+	 * step 1: make sure css_sets exist for all threads to be migrated.
+	 * we use find_css_set, which allocates a new one if necessary.
+	 */
+	INIT_LIST_HEAD(&newcg_list);
+	oldcgrp = task_cgroup_from_root(leader, root);
+	if (cgrp != oldcgrp) {
+		/* get old css_set */
+		task_lock(leader);
+		if (leader->flags & PF_EXITING) {
+			task_unlock(leader);
+			goto prefetch_loop;
+		}
+		oldcg = leader->cgroups;
+		get_css_set(oldcg);
+		task_unlock(leader);
+		/* acquire new one */
+		retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
+		put_css_set(oldcg);
+		if (retval)
+			goto list_teardown;
+	}
+prefetch_loop:
+	rcu_read_lock();
+	/*
+	 * if we need to fetch a new css_set for this task, we must exit the
+	 * rcu_read section because allocating it can sleep. afterwards, we'll
+	 * need to restart iteration on the threadgroup list - the whole thing
+	 * will be O(nm) in the number of threads and css_sets; as the typical
+	 * case only has one css_set for all of them, usually O(n).
+	 */
+	list_for_each_entry_rcu(tsk, &leader->thread_group, thread_group) {
+		/* nothing to do if this task is already in the cgroup */
+		oldcgrp = task_cgroup_from_root(tsk, root);
+		if (cgrp == oldcgrp)
+			continue;
+		/* get old css_set pointer */
+		task_lock(tsk);
+		if (tsk->flags & PF_EXITING) {
+			/* ignore this task if it's going away */
+			task_unlock(tsk);
+			continue;
+		}
+		oldcg = tsk->cgroups;
+		get_css_set(oldcg);
 		task_unlock(tsk);
-		put_css_set(newcg);
-		return -ESRCH;
+		/* see if the new one for us is already in the list? */
+		if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
+			/* was already there, nothing to do. */
+			put_css_set(oldcg);
+		} else {
+			/* we don't already have it. get new one. */
+			rcu_read_unlock();
+			retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
+			put_css_set(oldcg);
+			if (retval)
+				goto list_teardown;
+			/* begin iteration again. */
+			goto prefetch_loop;
+		}
 	}
-	rcu_assign_pointer(tsk->cgroups, newcg);
-	task_unlock(tsk);
+	rcu_read_unlock();
 
-	/* Update the css_set linked lists if we're using them */
-	write_lock(&css_set_lock);
-	if (!list_empty(&tsk->cg_list)) {
-		list_del(&tsk->cg_list);
-		list_add(&tsk->cg_list, &newcg->tasks);
+	/*
+	 * step 2: now that we're guaranteed success wrt the css_sets, proceed
+	 * to move all tasks to the new cgroup. Even if the threadgroup leader
+	 * is PF_EXITING, we still proceed to move all of its sub-threads to
+	 * the new cgroup; if everybody is PF_EXITING, we'll just end up doing
+	 * nothing, which is ok.
+	 */
+	oldcgrp = task_cgroup_from_root(leader, root);
+	/* if leader is already there, skip moving him */
+	if (cgrp != oldcgrp) {
+		retval = cgroup_task_migrate(cgrp, oldcgrp, leader, 1);
+		BUG_ON(retval != 0 && retval != -ESRCH);
 	}
-	write_unlock(&css_set_lock);
 
+	/*
+	 * now move all the rest of the threads - need to lock against
+	 * possible races with fork(). (Remember, the sighand's lock needs
+	 * to be taken outside of tasklist_lock.)
+	 */
+	threadgroup_sighand = threadgroup_fork_lock(leader);
+	if (unlikely(IS_ERR(threadgroup_sighand))) {
+		/*
+		 * this happens with either ESRCH or EAGAIN; either way, the
+		 * calling function takes care of it.
+		 */
+		retval = PTR_ERR(threadgroup_sighand);
+		goto list_teardown;
+	}
+	read_lock(&tasklist_lock);
+	/*
+	 * Finally, before we can continue, make sure the threadgroup is sane.
+	 * First, if de_thread() changed the leader, then no guarantees on the
+	 * safety of iterating leader->thread_group. Second, regardless of
+	 * leader, de_thread() can change the sighand since we grabbed a
+	 * reference on it. Either case is a race with exec() and therefore
+	 * not safe to proceed.
+	 */
+	if (!thread_group_leader(leader) ||
+	    (leader->sighand && leader->sighand != threadgroup_sighand)) {
+		retval = -EAGAIN;
+		read_unlock(&tasklist_lock);
+		threadgroup_fork_unlock(threadgroup_sighand);
+		goto list_teardown;
+	}
+
+	list_for_each_entry_rcu(tsk, &leader->thread_group, thread_group) {
+		/* leave current thread as it is if it's already there */
+		oldcgrp = task_cgroup_from_root(tsk, root);
+		if (cgrp == oldcgrp)
+			continue;
+		/* we don't care whether these threads are exiting */
+		retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, 1);
+		BUG_ON(retval != 0 && retval != -ESRCH);
+	}
+
+	/*
+	 * step 3: attach whole threadgroup to each subsystem
+	 * TODO: if ever a subsystem needs to know the oldcgrp for each task
+	 * being moved, this call will need to be reworked to communicate that
+	 * information.
+	 */
 	for_each_subsys(root, ss) {
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, tsk, false);
+			ss->attach(ss, cgrp, oldcgrp, tsk, true);
 	}
-	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
-	synchronize_rcu();
-	put_css_set(cg);
+
+	/* holding these until here keeps us safe from exec() and fork(). */
+	read_unlock(&tasklist_lock);
+	threadgroup_fork_unlock(threadgroup_sighand);
 
 	/*
-	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
-	 * is no longer empty.
+	 * step 4: success! ...and cleanup
 	 */
+	synchronize_rcu();
 	cgroup_wakeup_rmdir_waiter(cgrp);
-	return 0;
+	retval = 0;
+list_teardown:
+	/* no longer need the list of css_sets, so get rid of it */
+	while (!list_empty(&newcg_list)) {
+		/* pop from the list */
+		cg_entry = list_first_entry(&newcg_list, struct cg_list_entry,
+					    links);
+		list_del(&cg_entry->links);
+		/* drop the refcount */
+		put_css_set(cg_entry->cg);
+		kfree(cg_entry);
+	}
+	/* done! */
+	return retval;
 }
 
 /*
- * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
- * held. May take task_lock of task
+ * Find the task_struct of the task to attach by vpid and pass it along to the
+ * function to attach either it or all tasks in its threadgroup. Will take
+ * cgroup_mutex; may take task_lock of task.
  */
-static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
+static int attach_task_by_pid(struct cgroup *cgrp, u64 pid,
+			      int attach(struct cgroup *,
+					 struct task_struct *))
 {
 	struct task_struct *tsk;
 	const struct cred *cred = current_cred(), *tcred;
 	int ret;
 
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+
 	if (pid) {
 		rcu_read_lock();
 		tsk = find_task_by_vpid(pid);
 		if (!tsk || tsk->flags & PF_EXITING) {
 			rcu_read_unlock();
+			cgroup_unlock();
 			return -ESRCH;
 		}
-
+		/*
+		 * even if we're attaching all tasks in the thread group, we
+		 * only need to check permissions on the group leader, because
+		 * even if another task has different permissions, the group
+		 * leader will have sufficient access to change it.
+		 */
 		tcred = __task_cred(tsk);
 		if (cred->euid &&
 		    cred->euid != tcred->uid &&
 		    cred->euid != tcred->suid) {
 			rcu_read_unlock();
+			cgroup_unlock();
 			return -EACCES;
 		}
 		get_task_struct(tsk);
@@ -1795,18 +2125,34 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
 		get_task_struct(tsk);
 	}
 
-	ret = cgroup_attach_task(cgrp, tsk);
+	/*
+	 * Note that the check for whether the task is its threadgroup leader
+	 * is done in cgroup_attach_proc. This means that writing 0 to the
+	 * procs file will only work if the writing task is the leader.
+	 */
+	ret = attach(cgrp, tsk);
 	put_task_struct(tsk);
+	cgroup_unlock();
 	return ret;
 }
 
 static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
 {
+	return attach_task_by_pid(cgrp, pid, cgroup_attach_task);
+}
+
+static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
+{
 	int ret;
-	if (!cgroup_lock_live_group(cgrp))
-		return -ENODEV;
-	ret = attach_task_by_pid(cgrp, pid);
-	cgroup_unlock();
+	do {
+		/*
+		 * Nobody lower than us can handle the EAGAIN, since if a race
+		 * with de_thread() changes the group leader, the task_struct
+		 * matching the given tgid will have changed, and we'll need
+		 * to find it again.
+		 */
+		ret = attach_task_by_pid(cgrp, tgid, cgroup_attach_proc);
+	} while (ret == -EAGAIN);
 	return ret;
 }
 
@@ -2966,9 +3312,9 @@ static struct cftype files[] = {
 	{
 		.name = CGROUP_FILE_GENERIC_PREFIX "procs",
 		.open = cgroup_procs_open,
-		/* .write_u64 = cgroup_procs_write, TODO */
+		.write_u64 = cgroup_procs_write,
 		.release = cgroup_pidlist_release,
-		.mode = S_IRUGO,
+		.mode = S_IRUGO | S_IWUSR,
 	},
 	{
 		.name = "notify_on_release",

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RFC] [PATCH 2/2] cgroups: make procs file writable
@ 2010-01-03 19:09                 ` Ben Blum
  0 siblings, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-03 19:09 UTC (permalink / raw)
  To: Oleg Nesterov, Paul Menage, akpm, linux-kernel, bblum, ebiederm,
	lizf, matthltc, containers, bblum

[-- Attachment #1: cgroup-procs-writable.patch --]
[-- Type: text/plain, Size: 17180 bytes --]

Makes procs file writable to move all threads by tgid at once

From: Ben Blum <bblum@andrew.cmu.edu>

This patch adds functionality that enables users to move all threads in a
threadgroup at once to a cgroup by writing the tgid to the 'cgroup.procs'
file. This current implementation makes use of a per-threadgroup rwsem that's
taken for reading in the fork() path to prevent newly forking threads within
the threadgroup from "escaping" while the move is in progress.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
---
 Documentation/cgroups/cgroups.txt |    7 +
 kernel/cgroup.c                   |  426 ++++++++++++++++++++++++++++++++++---
 2 files changed, 393 insertions(+), 40 deletions(-)

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 7527bac..a5f1e6a 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -9,6 +9,7 @@ Portions Copyright (C) 2004 BULL SA.
 Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj@sgi.com>
 Modified by Christoph Lameter <clameter@sgi.com>
+Modified by Ben Blum <bblum@google.com>
 
 CONTENTS:
 =========
@@ -415,6 +416,12 @@ You can attach the current shell task by echoing 0:
 
 # echo 0 > tasks
 
+You can use the cgroup.procs file instead of the tasks file to move all
+threads in a threadgroup at once. Echoing the pid of any task in a
+threadgroup to cgroup.procs causes all tasks in that threadgroup to be
+be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
+in the writing task's threadgroup.
+
 2.3 Mounting hierarchies by name
 --------------------------------
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 99782a0..f79d70b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1622,6 +1622,87 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 	return 0;
 }
 
+/*
+ * cgroup_task_migrate - move a task from one cgroup to another.
+ *
+ * 'guarantee' is set if the caller promises that a new css_set for the task
+ * will already exist. If not set, this function might sleep, and can fail
+ * with -ENOMEM. Otherwise, it can only fail with -ESRCH.
+ */
+static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
+			       struct task_struct *tsk, int guarantee)
+{
+	struct css_set *oldcg;
+	struct css_set *newcg;
+
+	/*
+	 * get old css_set. we need to take task_lock and refcount it, because
+	 * an exiting task can change its css_set to init_css_set and drop its
+	 * old one without taking cgroup_mutex.
+	 */
+	task_lock(tsk);
+	oldcg = tsk->cgroups;
+	get_css_set(oldcg);
+	task_unlock(tsk);
+
+	/*
+	 * locate or allocate a new css_set for this task. 'guarantee' tells
+	 * us whether or not we are sure that a new css_set already exists;
+	 * in that case, we are not allowed to fail or sleep, as we won't need
+	 * malloc.
+	 */
+	if (guarantee) {
+		/*
+		 * our caller promises us that the css_set we want already
+		 * exists, so we use find_existing_css_set directly.
+		 */
+		struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+		read_lock(&css_set_lock);
+		newcg = find_existing_css_set(oldcg, cgrp, template);
+		BUG_ON(!newcg);
+		get_css_set(newcg);
+		read_unlock(&css_set_lock);
+	} else {
+		might_sleep();
+		/* find_css_set will give us newcg already referenced. */
+		newcg = find_css_set(oldcg, cgrp);
+		if (!newcg) {
+			put_css_set(oldcg);
+			return -ENOMEM;
+		}
+	}
+	put_css_set(oldcg);
+
+	/*
+	 * we cannot move a task that's declared itself as exiting, as once
+	 * PF_EXITING is set, the tsk->cgroups pointer is no longer safe.
+	 */
+	task_lock(tsk);
+	if (tsk->flags & PF_EXITING) {
+		task_unlock(tsk);
+		put_css_set(newcg);
+		return -ESRCH;
+	}
+	rcu_assign_pointer(tsk->cgroups, newcg);
+	task_unlock(tsk);
+
+	/* Update the css_set linked lists if we're using them */
+	write_lock(&css_set_lock);
+	if (!list_empty(&tsk->cg_list))
+		list_move(&tsk->cg_list, &newcg->tasks);
+	write_unlock(&css_set_lock);
+
+	/*
+	 * We just gained a reference on oldcg by taking it from the task. As
+	 * trading it for newcg is protected by cgroup_mutex, we're safe to
+	 * drop it here; it will be freed under RCU.
+	 */
+	put_css_set(oldcg);
+
+	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+	return 0;
+}
+
 /**
  * threadgroup_fork_lock - block all CLONE_THREAD forks in the threadgroup
  * @tsk: the task whose threadgroup should be locked
@@ -1697,11 +1778,9 @@ void threadgroup_fork_unlock(struct sighand_struct *sighand)
  */
 int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
-	int retval = 0;
+	int retval;
 	struct cgroup_subsys *ss;
 	struct cgroup *oldcgrp;
-	struct css_set *cg;
-	struct css_set *newcg;
 	struct cgroupfs_root *root = cgrp->root;
 
 	/* Nothing to do if the task is already in that cgroup */
@@ -1717,75 +1796,326 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		}
 	}
 
-	task_lock(tsk);
-	cg = tsk->cgroups;
-	get_css_set(cg);
-	task_unlock(tsk);
+	retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, 0);
+	if (retval)
+		return retval;
+
+	for_each_subsys(root, ss) {
+		if (ss->attach)
+			ss->attach(ss, cgrp, oldcgrp, tsk, false);
+	}
+
+	synchronize_rcu();
+
 	/*
-	 * Locate or allocate a new css_set for this task,
-	 * based on its final set of cgroups
+	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
+	 * is no longer empty.
 	 */
+	cgroup_wakeup_rmdir_waiter(cgrp);
+	return 0;
+}
+
+/*
+ * cgroup_attach_proc works in two stages, the first of which prefetches all
+ * new css_sets needed (to make sure we have enough memory before committing
+ * to the move) and stores them in a list, of entries of the following type.
+ * TODO: possible optimization: use css_set->rcu_head for chaining instead
+ */
+struct cg_list_entry {
+	struct css_set *cg;
+	struct list_head links;
+};
+
+static bool css_set_check_fetched(struct cgroup *cgrp,
+				  struct task_struct *tsk, struct css_set *cg,
+				  struct list_head *newcg_list)
+{
+	struct css_set *newcg;
+	struct cg_list_entry *cg_entry;
+	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+
+	read_lock(&css_set_lock);
+	newcg = find_existing_css_set(cg, cgrp, template);
+	if (newcg)
+		get_css_set(newcg);
+	read_unlock(&css_set_lock);
+
+	/* doesn't exist at all? */
+	if (!newcg)
+		return false;
+	/* see if it's already in the list */
+	list_for_each_entry(cg_entry, newcg_list, links) {
+		if (cg_entry->cg == newcg) {
+			put_css_set(newcg);
+			return true;
+		}
+	}
+
+	/* not found */
+	put_css_set(newcg);
+	return false;
+}
+
+/*
+ * Find the new css_set and store it in the list in preparation for moving
+ * the given task to the given cgroup. Returns 0 on success, -ENOMEM if we
+ * run out of memory.
+ */
+static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
+			    struct list_head *newcg_list)
+{
+	struct css_set *newcg;
+	struct cg_list_entry *cg_entry;
+	/* ensure a new css_set will exist for this thread */
 	newcg = find_css_set(cg, cgrp);
-	put_css_set(cg);
 	if (!newcg)
 		return -ENOMEM;
+	/* add new element to list */
+	cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
+	if (!cg_entry) {
+		put_css_set(newcg);
+		return -ENOMEM;
+	}
+	cg_entry->cg = newcg;
+	list_add(&cg_entry->links, newcg_list);
+	return 0;
+}
 
-	task_lock(tsk);
-	if (tsk->flags & PF_EXITING) {
+/**
+ * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
+ * @cgrp: the cgroup to attach to
+ * @leader: the threadgroup leader task_struct of the group to be attached
+ *
+ * Call holding cgroup_mutex. Will take task_lock of each thread in leader's
+ * threadgroup individually in turn.
+ */
+int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
+{
+	int retval;
+	struct cgroup_subsys *ss;
+	struct cgroup *oldcgrp;
+	struct css_set *oldcg;
+	struct cgroupfs_root *root = cgrp->root;
+	/* threadgroup list cursor */
+	struct task_struct *tsk;
+	/*
+	 * we need to make sure we have css_sets for all the tasks we're
+	 * going to move -before- we actually start moving them, so that in
+	 * case we get an ENOMEM we can bail out before making any changes.
+	 */
+	struct list_head newcg_list;
+	struct cg_list_entry *cg_entry;
+	/* needed for locking the threadgroup */
+	struct sighand_struct *threadgroup_sighand;
+
+	/*
+	 * because of possible races with de_thread() we can't distinguish
+	 * between the case where the user gives a non-leader tid and the case
+	 * where it changes out from under us.
+	 */
+	leader = leader->group_leader;
+
+	/*
+	 * check that we can legitimately attach to the cgroup.
+	 */
+	for_each_subsys(root, ss) {
+		if (ss->can_attach) {
+			retval = ss->can_attach(ss, cgrp, leader, true);
+			if (retval)
+				return retval;
+		}
+	}
+
+	/*
+	 * step 1: make sure css_sets exist for all threads to be migrated.
+	 * we use find_css_set, which allocates a new one if necessary.
+	 */
+	INIT_LIST_HEAD(&newcg_list);
+	oldcgrp = task_cgroup_from_root(leader, root);
+	if (cgrp != oldcgrp) {
+		/* get old css_set */
+		task_lock(leader);
+		if (leader->flags & PF_EXITING) {
+			task_unlock(leader);
+			goto prefetch_loop;
+		}
+		oldcg = leader->cgroups;
+		get_css_set(oldcg);
+		task_unlock(leader);
+		/* acquire new one */
+		retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
+		put_css_set(oldcg);
+		if (retval)
+			goto list_teardown;
+	}
+prefetch_loop:
+	rcu_read_lock();
+	/*
+	 * if we need to fetch a new css_set for this task, we must exit the
+	 * rcu_read section because allocating it can sleep. afterwards, we'll
+	 * need to restart iteration on the threadgroup list - the whole thing
+	 * will be O(nm) in the number of threads and css_sets; as the typical
+	 * case only has one css_set for all of them, usually O(n).
+	 */
+	list_for_each_entry_rcu(tsk, &leader->thread_group, thread_group) {
+		/* nothing to do if this task is already in the cgroup */
+		oldcgrp = task_cgroup_from_root(tsk, root);
+		if (cgrp == oldcgrp)
+			continue;
+		/* get old css_set pointer */
+		task_lock(tsk);
+		if (tsk->flags & PF_EXITING) {
+			/* ignore this task if it's going away */
+			task_unlock(tsk);
+			continue;
+		}
+		oldcg = tsk->cgroups;
+		get_css_set(oldcg);
 		task_unlock(tsk);
-		put_css_set(newcg);
-		return -ESRCH;
+		/* see if the new one for us is already in the list? */
+		if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
+			/* was already there, nothing to do. */
+			put_css_set(oldcg);
+		} else {
+			/* we don't already have it. get new one. */
+			rcu_read_unlock();
+			retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
+			put_css_set(oldcg);
+			if (retval)
+				goto list_teardown;
+			/* begin iteration again. */
+			goto prefetch_loop;
+		}
 	}
-	rcu_assign_pointer(tsk->cgroups, newcg);
-	task_unlock(tsk);
+	rcu_read_unlock();
 
-	/* Update the css_set linked lists if we're using them */
-	write_lock(&css_set_lock);
-	if (!list_empty(&tsk->cg_list)) {
-		list_del(&tsk->cg_list);
-		list_add(&tsk->cg_list, &newcg->tasks);
+	/*
+	 * step 2: now that we're guaranteed success wrt the css_sets, proceed
+	 * to move all tasks to the new cgroup. Even if the threadgroup leader
+	 * is PF_EXITING, we still proceed to move all of its sub-threads to
+	 * the new cgroup; if everybody is PF_EXITING, we'll just end up doing
+	 * nothing, which is ok.
+	 */
+	oldcgrp = task_cgroup_from_root(leader, root);
+	/* if leader is already there, skip moving him */
+	if (cgrp != oldcgrp) {
+		retval = cgroup_task_migrate(cgrp, oldcgrp, leader, 1);
+		BUG_ON(retval != 0 && retval != -ESRCH);
 	}
-	write_unlock(&css_set_lock);
 
+	/*
+	 * now move all the rest of the threads - need to lock against
+	 * possible races with fork(). (Remember, the sighand's lock needs
+	 * to be taken outside of tasklist_lock.)
+	 */
+	threadgroup_sighand = threadgroup_fork_lock(leader);
+	if (unlikely(IS_ERR(threadgroup_sighand))) {
+		/*
+		 * this happens with either ESRCH or EAGAIN; either way, the
+		 * calling function takes care of it.
+		 */
+		retval = PTR_ERR(threadgroup_sighand);
+		goto list_teardown;
+	}
+	read_lock(&tasklist_lock);
+	/*
+	 * Finally, before we can continue, make sure the threadgroup is sane.
+	 * First, if de_thread() changed the leader, then no guarantees on the
+	 * safety of iterating leader->thread_group. Second, regardless of
+	 * leader, de_thread() can change the sighand since we grabbed a
+	 * reference on it. Either case is a race with exec() and therefore
+	 * not safe to proceed.
+	 */
+	if (!thread_group_leader(leader) ||
+	    (leader->sighand && leader->sighand != threadgroup_sighand)) {
+		retval = -EAGAIN;
+		read_unlock(&tasklist_lock);
+		threadgroup_fork_unlock(threadgroup_sighand);
+		goto list_teardown;
+	}
+
+	list_for_each_entry_rcu(tsk, &leader->thread_group, thread_group) {
+		/* leave current thread as it is if it's already there */
+		oldcgrp = task_cgroup_from_root(tsk, root);
+		if (cgrp == oldcgrp)
+			continue;
+		/* we don't care whether these threads are exiting */
+		retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, 1);
+		BUG_ON(retval != 0 && retval != -ESRCH);
+	}
+
+	/*
+	 * step 3: attach whole threadgroup to each subsystem
+	 * TODO: if ever a subsystem needs to know the oldcgrp for each task
+	 * being moved, this call will need to be reworked to communicate that
+	 * information.
+	 */
 	for_each_subsys(root, ss) {
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, tsk, false);
+			ss->attach(ss, cgrp, oldcgrp, tsk, true);
 	}
-	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
-	synchronize_rcu();
-	put_css_set(cg);
+
+	/* holding these until here keeps us safe from exec() and fork(). */
+	read_unlock(&tasklist_lock);
+	threadgroup_fork_unlock(threadgroup_sighand);
 
 	/*
-	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
-	 * is no longer empty.
+	 * step 4: success! ...and cleanup
 	 */
+	synchronize_rcu();
 	cgroup_wakeup_rmdir_waiter(cgrp);
-	return 0;
+	retval = 0;
+list_teardown:
+	/* no longer need the list of css_sets, so get rid of it */
+	while (!list_empty(&newcg_list)) {
+		/* pop from the list */
+		cg_entry = list_first_entry(&newcg_list, struct cg_list_entry,
+					    links);
+		list_del(&cg_entry->links);
+		/* drop the refcount */
+		put_css_set(cg_entry->cg);
+		kfree(cg_entry);
+	}
+	/* done! */
+	return retval;
 }
 
 /*
- * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
- * held. May take task_lock of task
+ * Find the task_struct of the task to attach by vpid and pass it along to the
+ * function to attach either it or all tasks in its threadgroup. Will take
+ * cgroup_mutex; may take task_lock of task.
  */
-static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
+static int attach_task_by_pid(struct cgroup *cgrp, u64 pid,
+			      int attach(struct cgroup *,
+					 struct task_struct *))
 {
 	struct task_struct *tsk;
 	const struct cred *cred = current_cred(), *tcred;
 	int ret;
 
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+
 	if (pid) {
 		rcu_read_lock();
 		tsk = find_task_by_vpid(pid);
 		if (!tsk || tsk->flags & PF_EXITING) {
 			rcu_read_unlock();
+			cgroup_unlock();
 			return -ESRCH;
 		}
-
+		/*
+		 * even if we're attaching all tasks in the thread group, we
+		 * only need to check permissions on the group leader, because
+		 * even if another task has different permissions, the group
+		 * leader will have sufficient access to change it.
+		 */
 		tcred = __task_cred(tsk);
 		if (cred->euid &&
 		    cred->euid != tcred->uid &&
 		    cred->euid != tcred->suid) {
 			rcu_read_unlock();
+			cgroup_unlock();
 			return -EACCES;
 		}
 		get_task_struct(tsk);
@@ -1795,18 +2125,34 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
 		get_task_struct(tsk);
 	}
 
-	ret = cgroup_attach_task(cgrp, tsk);
+	/*
+	 * Note that the check for whether the task is its threadgroup leader
+	 * is done in cgroup_attach_proc. This means that writing 0 to the
+	 * procs file will only work if the writing task is the leader.
+	 */
+	ret = attach(cgrp, tsk);
 	put_task_struct(tsk);
+	cgroup_unlock();
 	return ret;
 }
 
 static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
 {
+	return attach_task_by_pid(cgrp, pid, cgroup_attach_task);
+}
+
+static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
+{
 	int ret;
-	if (!cgroup_lock_live_group(cgrp))
-		return -ENODEV;
-	ret = attach_task_by_pid(cgrp, pid);
-	cgroup_unlock();
+	do {
+		/*
+		 * Nobody lower than us can handle the EAGAIN, since if a race
+		 * with de_thread() changes the group leader, the task_struct
+		 * matching the given tgid will have changed, and we'll need
+		 * to find it again.
+		 */
+		ret = attach_task_by_pid(cgrp, tgid, cgroup_attach_proc);
+	} while (ret == -EAGAIN);
 	return ret;
 }
 
@@ -2966,9 +3312,9 @@ static struct cftype files[] = {
 	{
 		.name = CGROUP_FILE_GENERIC_PREFIX "procs",
 		.open = cgroup_procs_open,
-		/* .write_u64 = cgroup_procs_write, TODO */
+		.write_u64 = cgroup_procs_write,
 		.release = cgroup_pidlist_release,
-		.mode = S_IRUGO,
+		.mode = S_IRUGO | S_IWUSR,
 	},
 	{
 		.name = "notify_on_release",

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
       [not found]                 ` <20100103190752.GB13423-OM76b2Iv3yLQjUSlxSEPGw@public.gmane.org>
@ 2010-01-05 18:53                   ` Oleg Nesterov
  0 siblings, 0 replies; 20+ messages in thread
From: Oleg Nesterov @ 2010-01-05 18:53 UTC (permalink / raw)
  To: Paul Menage, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bblum-hpIqsD4AKlfQT0dZR+AlfA, ebiederm-aS9lmoZGLiVWk0Htik3J/w,
	lizf-BthXqXjhjHXQFUHtdCDX3A, matthltc-r/Jw6+rmf7Fhl2p70BpVqQ

On 01/03, Ben Blum wrote:
>
> Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup

I didn't actually read this series, but at first glance it still has
problems...

> +struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)

static?

> +{
> +	struct sighand_struct *sighand;
> +	struct task_struct *p;
> +
> +	/* tasklist lock protects sighand_struct's disappearance in exit(). */
> +	read_lock(&tasklist_lock);
> +
> +	/* make sure the threadgroup's state is sane before we proceed */
> +	if (unlikely(!thread_group_leader(tsk))) {
> +		/* a race with de_thread() stripped us of our leadership */
> +		read_unlock(&tasklist_lock);
> +		return ERR_PTR(-EAGAIN);

I don't understand how this can close the race with de_thread().

Suppose this tsk is the new leader, after de_thread() changed ->group_leader
and dropped tasklist_lock.

threadgroup_fork_lock() bumps sighand->count

de_thread() continues, notices oldsighand->count != 1 and switches
to the new ->sighand.

After that tsk can spawn other threads, but cgroup_fork() will use
newsighand->threadgroup_fork_lock while cgroup_attach_proc() relies
on oldsighand->threadgroup_fork_lock.

> +	/* now try to find a sighand */
> +	if (likely(tsk->sighand)) {
> +		sighand = tsk->sighand;
> +	} else {
> +		sighand = ERR_PTR(-ESRCH);
> +		/*
> +		 * tsk is exiting; try to find another thread in the group
> +		 * whose sighand pointer is still alive.
> +		 */
> +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
> +			if (p->sighand) {
> +				sighand = tsk->sighand;

can't understand this "else {}" code... We hold tasklist, if the group
leader is dead (->sighand == NULL), then the whole thread group is
dead.

Even if we had another thread with ->sighand != NULL, what is the point
of "if (unlikely(!thread_group_leader(tsk)))" check then?

Oleg.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
  2010-01-03 19:07                 ` Ben Blum
  (?)
@ 2010-01-05 18:53                 ` Oleg Nesterov
       [not found]                   ` <20100105185330.GA17545-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  -1 siblings, 1 reply; 20+ messages in thread
From: Oleg Nesterov @ 2010-01-05 18:53 UTC (permalink / raw)
  To: Paul Menage, akpm, linux-kernel, bblum, ebiederm, lizf, matthltc,
	containers

On 01/03, Ben Blum wrote:
>
> Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup

I didn't actually read this series, but at first glance it still has
problems...

> +struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)

static?

> +{
> +	struct sighand_struct *sighand;
> +	struct task_struct *p;
> +
> +	/* tasklist lock protects sighand_struct's disappearance in exit(). */
> +	read_lock(&tasklist_lock);
> +
> +	/* make sure the threadgroup's state is sane before we proceed */
> +	if (unlikely(!thread_group_leader(tsk))) {
> +		/* a race with de_thread() stripped us of our leadership */
> +		read_unlock(&tasklist_lock);
> +		return ERR_PTR(-EAGAIN);

I don't understand how this can close the race with de_thread().

Suppose this tsk is the new leader, after de_thread() changed ->group_leader
and dropped tasklist_lock.

threadgroup_fork_lock() bumps sighand->count

de_thread() continues, notices oldsighand->count != 1 and switches
to the new ->sighand.

After that tsk can spawn other threads, but cgroup_fork() will use
newsighand->threadgroup_fork_lock while cgroup_attach_proc() relies
on oldsighand->threadgroup_fork_lock.

> +	/* now try to find a sighand */
> +	if (likely(tsk->sighand)) {
> +		sighand = tsk->sighand;
> +	} else {
> +		sighand = ERR_PTR(-ESRCH);
> +		/*
> +		 * tsk is exiting; try to find another thread in the group
> +		 * whose sighand pointer is still alive.
> +		 */
> +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
> +			if (p->sighand) {
> +				sighand = tsk->sighand;

can't understand this "else {}" code... We hold tasklist, if the group
leader is dead (->sighand == NULL), then the whole thread group is
dead.

Even if we had another thread with ->sighand != NULL, what is the point
of "if (unlikely(!thread_group_leader(tsk)))" check then?

Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
  2010-01-05 18:53                 ` Oleg Nesterov
@ 2010-01-17 20:48                       ` Ben Blum
  0 siblings, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-17 20:48 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: bblum-OM76b2Iv3yLQjUSlxSEPGw,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Paul Menage,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w

On Tue, Jan 05, 2010 at 07:53:30PM +0100, Oleg Nesterov wrote:
> On 01/03, Ben Blum wrote:
> >
> > Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
> 
> I didn't actually read this series, but at first glance it still has
> problems...
> 
> > +struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
> 
> static?

sure, though in the end this is perhaps not the best place for the
function anyway. in fact, this function only does half of the job, so
a good amount of refactoring might be in order.

> 
> > +{
> > +	struct sighand_struct *sighand;
> > +	struct task_struct *p;
> > +
> > +	/* tasklist lock protects sighand_struct's disappearance in exit(). */
> > +	read_lock(&tasklist_lock);
> > +
> > +	/* make sure the threadgroup's state is sane before we proceed */
> > +	if (unlikely(!thread_group_leader(tsk))) {
> > +		/* a race with de_thread() stripped us of our leadership */
> > +		read_unlock(&tasklist_lock);
> > +		return ERR_PTR(-EAGAIN);
> 
> I don't understand how this can close the race with de_thread().
> 
> Suppose this tsk is the new leader, after de_thread() changed ->group_leader
> and dropped tasklist_lock.
> 
> threadgroup_fork_lock() bumps sighand->count
> 
> de_thread() continues, notices oldsighand->count != 1 and switches
> to the new ->sighand.
> 
> After that tsk can spawn other threads, but cgroup_fork() will use
> newsighand->threadgroup_fork_lock while cgroup_attach_proc() relies
> on oldsighand->threadgroup_fork_lock.

the race with the sighand is handled in the next patch, in attach_proc,
not in this function. this check is just to make sure that the list is
safe to iterate over, since de_thread changing group leadership could
ruin that. so in the end, there are two places where EAGAIN can happen -
one here, and one later (in the second patch).

> 
> > +	/* now try to find a sighand */
> > +	if (likely(tsk->sighand)) {
> > +		sighand = tsk->sighand;
> > +	} else {
> > +		sighand = ERR_PTR(-ESRCH);
> > +		/*
> > +		 * tsk is exiting; try to find another thread in the group
> > +		 * whose sighand pointer is still alive.
> > +		 */
> > +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
> > +			if (p->sighand) {
> > +				sighand = tsk->sighand;
> 
> can't understand this "else {}" code... We hold tasklist, if the group
> leader is dead (->sighand == NULL), then the whole thread group is
> dead.
> 
> Even if we had another thread with ->sighand != NULL, what is the point
> of "if (unlikely(!thread_group_leader(tsk)))" check then?

doesn't the group leader stay on the threadgroup list even when it dies?
sighand can be null if the group leader has exited, but other threads
are still running.

> 
> Oleg.
> 

hope that makes more sense. I'd like to have the code between these two
patches refactored, but first want to make sure it's correct.

-- bblum

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
@ 2010-01-17 20:48                       ` Ben Blum
  0 siblings, 0 replies; 20+ messages in thread
From: Ben Blum @ 2010-01-17 20:48 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Paul Menage, akpm, linux-kernel, ebiederm, lizf, matthltc,
	containers, bblum

On Tue, Jan 05, 2010 at 07:53:30PM +0100, Oleg Nesterov wrote:
> On 01/03, Ben Blum wrote:
> >
> > Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
> 
> I didn't actually read this series, but at first glance it still has
> problems...
> 
> > +struct sighand_struct *threadgroup_fork_lock(struct task_struct *tsk)
> 
> static?

sure, though in the end this is perhaps not the best place for the
function anyway. in fact, this function only does half of the job, so
a good amount of refactoring might be in order.

> 
> > +{
> > +	struct sighand_struct *sighand;
> > +	struct task_struct *p;
> > +
> > +	/* tasklist lock protects sighand_struct's disappearance in exit(). */
> > +	read_lock(&tasklist_lock);
> > +
> > +	/* make sure the threadgroup's state is sane before we proceed */
> > +	if (unlikely(!thread_group_leader(tsk))) {
> > +		/* a race with de_thread() stripped us of our leadership */
> > +		read_unlock(&tasklist_lock);
> > +		return ERR_PTR(-EAGAIN);
> 
> I don't understand how this can close the race with de_thread().
> 
> Suppose this tsk is the new leader, after de_thread() changed ->group_leader
> and dropped tasklist_lock.
> 
> threadgroup_fork_lock() bumps sighand->count
> 
> de_thread() continues, notices oldsighand->count != 1 and switches
> to the new ->sighand.
> 
> After that tsk can spawn other threads, but cgroup_fork() will use
> newsighand->threadgroup_fork_lock while cgroup_attach_proc() relies
> on oldsighand->threadgroup_fork_lock.

the race with the sighand is handled in the next patch, in attach_proc,
not in this function. this check is just to make sure that the list is
safe to iterate over, since de_thread changing group leadership could
ruin that. so in the end, there are two places where EAGAIN can happen -
one here, and one later (in the second patch).

> 
> > +	/* now try to find a sighand */
> > +	if (likely(tsk->sighand)) {
> > +		sighand = tsk->sighand;
> > +	} else {
> > +		sighand = ERR_PTR(-ESRCH);
> > +		/*
> > +		 * tsk is exiting; try to find another thread in the group
> > +		 * whose sighand pointer is still alive.
> > +		 */
> > +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
> > +			if (p->sighand) {
> > +				sighand = tsk->sighand;
> 
> can't understand this "else {}" code... We hold tasklist, if the group
> leader is dead (->sighand == NULL), then the whole thread group is
> dead.
> 
> Even if we had another thread with ->sighand != NULL, what is the point
> of "if (unlikely(!thread_group_leader(tsk)))" check then?

doesn't the group leader stay on the threadgroup list even when it dies?
sighand can be null if the group leader has exited, but other threads
are still running.

> 
> Oleg.
> 

hope that makes more sense. I'd like to have the code between these two
patches refactored, but first want to make sure it's correct.

-- bblum

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
       [not found]                       ` <20100117204833.GA29596-DC+BO+AtSRVCM1neWV3AGuCmf2DRS9x2@public.gmane.org>
@ 2010-03-22 10:22                         ` Oleg Nesterov
  0 siblings, 0 replies; 20+ messages in thread
From: Oleg Nesterov @ 2010-03-22 10:22 UTC (permalink / raw)
  To: Ben Blum
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Paul Menage,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w

On 01/17, Ben Blum wrote:
>
> On Tue, Jan 05, 2010 at 07:53:30PM +0100, Oleg Nesterov wrote:
> >
> > I don't understand how this can close the race with de_thread().
> > ...
>
> the race with the sighand is handled in the next patch, in attach_proc,
> not in this function.

OK. I didn't verify this, the patches don't apply to 2.6.32-rc, but this
doesn't matter. Please see below.

> > > +	/* now try to find a sighand */
> > > +	if (likely(tsk->sighand)) {
> > > +		sighand = tsk->sighand;
> > > +	} else {
> > > +		sighand = ERR_PTR(-ESRCH);
> > > +		/*
> > > +		 * tsk is exiting; try to find another thread in the group
> > > +		 * whose sighand pointer is still alive.
> > > +		 */
> > > +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
> > > +			if (p->sighand) {
> > > +				sighand = tsk->sighand;
> >
> > can't understand this "else {}" code... We hold tasklist, if the group
> > leader is dead (->sighand == NULL), then the whole thread group is
> > dead.
> >
> > Even if we had another thread with ->sighand != NULL, what is the point
> > of "if (unlikely(!thread_group_leader(tsk)))" check then?
>
> doesn't the group leader stay on the threadgroup list even when it dies?
> sighand can be null if the group leader has exited, but other threads
> are still running.

No, leader->sighand != NULL until all threads have exited.


Ben, I'd suggest you to redo these patches even if they are correct.
->sighand is not the right place for the mutex/locking

	- it is per CLONE_SIGHAND, not per process

	- we have to avoid the nasty and hard-to-test races with exec

	- we have to play with sighand->count and I really dislike this.
	  this ->count is not just a reference counter, look at
	  unshare_sighand(). Yes, this is fake, but still.

Please use ->signal instead. By the lucky coincidence the lifetime rules
for (greatly misnamed) signal_struct were changed recently in -mm.

With the recent changes, it is always safe to use task->signal. It can't
be changed, can't go away, no need to bump the counter, no races, etc.

What do you think?

Oleg.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
  2010-01-17 20:48                       ` Ben Blum
  (?)
  (?)
@ 2010-03-22 10:22                       ` Oleg Nesterov
       [not found]                         ` <20100322102247.GA8363-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  2010-03-22 23:57                         ` Paul Menage
  -1 siblings, 2 replies; 20+ messages in thread
From: Oleg Nesterov @ 2010-03-22 10:22 UTC (permalink / raw)
  To: Ben Blum
  Cc: Paul Menage, akpm, linux-kernel, ebiederm, lizf, matthltc, containers

On 01/17, Ben Blum wrote:
>
> On Tue, Jan 05, 2010 at 07:53:30PM +0100, Oleg Nesterov wrote:
> >
> > I don't understand how this can close the race with de_thread().
> > ...
>
> the race with the sighand is handled in the next patch, in attach_proc,
> not in this function.

OK. I didn't verify this, the patches don't apply to 2.6.32-rc, but this
doesn't matter. Please see below.

> > > +	/* now try to find a sighand */
> > > +	if (likely(tsk->sighand)) {
> > > +		sighand = tsk->sighand;
> > > +	} else {
> > > +		sighand = ERR_PTR(-ESRCH);
> > > +		/*
> > > +		 * tsk is exiting; try to find another thread in the group
> > > +		 * whose sighand pointer is still alive.
> > > +		 */
> > > +		list_for_each_entry_rcu(p, &tsk->thread_group, thread_group) {
> > > +			if (p->sighand) {
> > > +				sighand = tsk->sighand;
> >
> > can't understand this "else {}" code... We hold tasklist, if the group
> > leader is dead (->sighand == NULL), then the whole thread group is
> > dead.
> >
> > Even if we had another thread with ->sighand != NULL, what is the point
> > of "if (unlikely(!thread_group_leader(tsk)))" check then?
>
> doesn't the group leader stay on the threadgroup list even when it dies?
> sighand can be null if the group leader has exited, but other threads
> are still running.

No, leader->sighand != NULL until all threads have exited.


Ben, I'd suggest you to redo these patches even if they are correct.
->sighand is not the right place for the mutex/locking

	- it is per CLONE_SIGHAND, not per process

	- we have to avoid the nasty and hard-to-test races with exec

	- we have to play with sighand->count and I really dislike this.
	  this ->count is not just a reference counter, look at
	  unshare_sighand(). Yes, this is fake, but still.

Please use ->signal instead. By the lucky coincidence the lifetime rules
for (greatly misnamed) signal_struct were changed recently in -mm.

With the recent changes, it is always safe to use task->signal. It can't
be changed, can't go away, no need to bump the counter, no races, etc.

What do you think?

Oleg.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
       [not found]                         ` <20100322102247.GA8363-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2010-03-22 23:57                           ` Paul Menage
  0 siblings, 0 replies; 20+ messages in thread
From: Paul Menage @ 2010-03-22 23:57 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ben Blum, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w

On Mon, Mar 22, 2010 at 3:22 AM, Oleg Nesterov <oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>
> Please use ->signal instead. By the lucky coincidence the lifetime rules
> for (greatly misnamed) signal_struct were changed recently in -mm.
>
> With the recent changes, it is always safe to use task->signal. It can't
> be changed, can't go away, no need to bump the counter, no races, etc.
>
> What do you think?

If signal_struct is much simpler to reason about, then using it seems
like a good idea.

Paul

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking  per threadgroup
  2010-03-22 10:22                       ` Oleg Nesterov
       [not found]                         ` <20100322102247.GA8363-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2010-03-22 23:57                         ` Paul Menage
  1 sibling, 0 replies; 20+ messages in thread
From: Paul Menage @ 2010-03-22 23:57 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ben Blum, akpm, linux-kernel, ebiederm, lizf, matthltc, containers

On Mon, Mar 22, 2010 at 3:22 AM, Oleg Nesterov <oleg@redhat.com> wrote:
>
> Please use ->signal instead. By the lucky coincidence the lifetime rules
> for (greatly misnamed) signal_struct were changed recently in -mm.
>
> With the recent changes, it is always safe to use task->signal. It can't
> be changed, can't go away, no need to bump the counter, no races, etc.
>
> What do you think?

If signal_struct is much simpler to reason about, then using it seems
like a good idea.

Paul

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2010-03-22 23:57 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-20 21:14 + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-per-threadgroup.patch added to -mm tree akpm
2009-08-21 10:26 ` + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch " Oleg Nesterov
2009-08-21 10:45   ` Oleg Nesterov
2009-08-21 23:37     ` Paul Menage
2009-08-22 13:09       ` Oleg Nesterov
2009-08-22 13:28         ` Paul Menage
     [not found]         ` <20090822130952.GA4240-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2010-01-03 19:06           ` Ben Blum
2010-01-03 19:06             ` Ben Blum
     [not found]             ` <20100103190613.GA13423-OM76b2Iv3yLQjUSlxSEPGw@public.gmane.org>
2010-01-03 19:07               ` [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup Ben Blum
2010-01-03 19:07                 ` Ben Blum
2010-01-05 18:53                 ` Oleg Nesterov
     [not found]                   ` <20100105185330.GA17545-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2010-01-17 20:48                     ` Ben Blum
2010-01-17 20:48                       ` Ben Blum
     [not found]                       ` <20100117204833.GA29596-DC+BO+AtSRVCM1neWV3AGuCmf2DRS9x2@public.gmane.org>
2010-03-22 10:22                         ` Oleg Nesterov
2010-03-22 10:22                       ` Oleg Nesterov
     [not found]                         ` <20100322102247.GA8363-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2010-03-22 23:57                           ` Paul Menage
2010-03-22 23:57                         ` Paul Menage
     [not found]                 ` <20100103190752.GB13423-OM76b2Iv3yLQjUSlxSEPGw@public.gmane.org>
2010-01-05 18:53                   ` Oleg Nesterov
2010-01-03 19:09               ` [RFC] [PATCH 2/2] cgroups: make procs file writable Ben Blum
2010-01-03 19:09                 ` Ben Blum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.