All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7][RFC] CGroup hierarchy extensions
@ 2009-03-12 10:51 Paul Menage
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

The following series implements support for:

- named cgroup hierarchies
- cgroup hierarchies with no bound subsystems
- cgroup subsystems that can be bound to multiple hierarchies

These patches aren't yet very well tested (only brief testing running
under UML so far) or well documented, but I wanted to get them sent
out for comment before I went on vacation.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/7] [RFC] Support named cgroups hierarchies
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2009-03-12 10:51   ` Paul Menage
       [not found]     ` <20090312105122.24154.73633.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-12 10:51   ` [PATCH 2/7] [RFC]Move the cgroup debug subsys into cgroup.c to access internal state Paul Menage
                     ` (6 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

[RFC] Support named cgroups hierarchies

To simplify referring to cgroup hierarchies in mount statements, and
to allow disambiguation in the presence of empty hierarchies and
multiply-bindable subsystems (see later patches in series) this patch
adds support for naming a new cgroup hierarchy via the "name=" mount
option

A pre-existing hierarchy may be specified by either name or by
subsystems; a hierarchy's name cannot be changed by a remount
operation.

Example usage:

# To create a hierarchy called "foo" containing the "cpu" subsystem
mount -t cgroup -oname=foo,cpu cgroup /mnt/cgroup1

# To mount the "foo" hierarchy on a second location
mount -t cgroup -oname=foo cgroup /mnt/cgroup2

Open issues:

- should the specification be via a name= option as in this patch, or
  should we simply use the "device name" as passed to the mount()
  system call?  Using the device name is more conceptually clean and
  consistent with the filesystem API; however, given that the device
  name is currently ignored by cgroups, this would lead to a
  user-visible behaviour change.

Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

---

 kernel/cgroup.c |  126 ++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 82 insertions(+), 44 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5995477..aa5edc8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -92,6 +92,9 @@ struct cgroupfs_root {
 
 	/* The path to use for release notifications. */
 	char release_agent_path[PATH_MAX];
+
+	/* The name for this hierarchy - may be empty */
+	char name[PATH_MAX];
 };
 
 /*
@@ -826,6 +829,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_puts(seq, ",noprefix");
 	if (strlen(root->release_agent_path))
 		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+	if (strlen(root->name))
+		seq_printf(seq, ",name=%s", root->name);
 	mutex_unlock(&cgroup_mutex);
 	return 0;
 }
@@ -834,18 +839,22 @@ struct cgroup_sb_opts {
 	unsigned long subsys_bits;
 	unsigned long flags;
 	char *release_agent;
+	char *name;
+	/* A flag indicating that a root was created from this options block */
+	bool created_root;
 };
 
 /* Convert a hierarchy specifier into a bitmask of subsystems and
  * flags. */
 static int parse_cgroupfs_options(char *data,
-				     struct cgroup_sb_opts *opts)
+				  struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data ?: "all";
 
 	opts->subsys_bits = 0;
 	opts->flags = 0;
 	opts->release_agent = NULL;
+	opts->name = NULL;
 
 	while ((token = strsep(&o, ",")) != NULL) {
 		if (!*token)
@@ -870,6 +879,15 @@ static int parse_cgroupfs_options(char *data,
 				return -ENOMEM;
 			strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
 			opts->release_agent[PATH_MAX - 1] = 0;
+		} else if (!strncmp(token, "name=", 5)) {
+			/* Specifying two names is forbidden */
+			if (opts->name)
+				return -EINVAL;
+			opts->name = kzalloc(PATH_MAX, GFP_KERNEL);
+			if (!opts->name)
+				return -ENOMEM;
+			strncpy(opts->name, token + 5, PATH_MAX - 1);
+			opts->name[PATH_MAX - 1] = 0;
 		} else {
 			struct cgroup_subsys *ss;
 			int i;
@@ -887,7 +905,7 @@ static int parse_cgroupfs_options(char *data,
 	}
 
 	/* We can't have an empty hierarchy */
-	if (!opts->subsys_bits)
+	if (!opts->subsys_bits && !opts->name)
 		return -EINVAL;
 
 	return 0;
@@ -914,6 +932,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 		goto out_unlock;
 	}
 
+	/* Don't allow name to change at remount */
+	if (opts.name && strcmp(opts.name, root->name)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	ret = rebind_subsystems(root, opts.subsys_bits);
 	if (ret)
 		goto out_unlock;
@@ -925,6 +949,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 		strcpy(root->release_agent_path, opts.release_agent);
  out_unlock:
 	kfree(opts.release_agent);
+	kfree(opts.name);
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
 	return ret;
@@ -958,28 +983,56 @@ static void init_cgroup_root(struct cgroupfs_root *root)
 
 static int cgroup_test_super(struct super_block *sb, void *data)
 {
-	struct cgroupfs_root *new = data;
+	struct cgroup_sb_opts *new = data;
 	struct cgroupfs_root *root = sb->s_fs_info;
 
-	/* First check subsystems */
-	if (new->subsys_bits != root->subsys_bits)
-	    return 0;
+	/* If we asked for a name then it must match */
+	if (new->name && strcmp(new->name, root->name))
+		return 0;
 
-	/* Next check flags */
-	if (new->flags != root->flags)
+	/* If we asked for subsystems then they must match */
+	if (new->subsys_bits && new->subsys_bits != root->subsys_bits)
 		return 0;
 
 	return 1;
 }
 
+static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
+{
+	struct cgroupfs_root *root;
+
+	if (!opts->subsys_bits)
+		return ERR_PTR(-EINVAL);
+
+	root = kzalloc(sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return ERR_PTR(-ENOMEM);
+
+	init_cgroup_root(root);
+	root->subsys_bits = opts->subsys_bits;
+	root->flags = opts->flags;
+	if (opts->release_agent)
+		strcpy(root->release_agent_path, opts->release_agent);
+	if (opts->name)
+		strcpy(root->name, opts->name);
+	opts->created_root = true;
+	return root;
+}
+
 static int cgroup_set_super(struct super_block *sb, void *data)
 {
 	int ret;
-	struct cgroupfs_root *root = data;
+	struct cgroup_sb_opts *opts = data;
+	struct cgroupfs_root *root;
 
+	root = cgroup_root_from_opts(opts);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
 	ret = set_anon_super(sb, NULL);
-	if (ret)
+	if (ret) {
+		kfree(root);
 		return ret;
+	}
 
 	sb->s_fs_info = root;
 	root->sb = sb;
@@ -1018,47 +1071,26 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 			 int flags, const char *unused_dev_name,
 			 void *data, struct vfsmount *mnt)
 {
-	struct cgroup_sb_opts opts;
+	struct cgroup_sb_opts opts = { 0 };
 	int ret = 0;
 	struct super_block *sb;
-	struct cgroupfs_root *root;
-	struct list_head tmp_cg_links;
 
 	/* First find the desired set of subsystems */
 	ret = parse_cgroupfs_options(data, &opts);
-	if (ret) {
-		kfree(opts.release_agent);
-		return ret;
-	}
-
-	root = kzalloc(sizeof(*root), GFP_KERNEL);
-	if (!root) {
-		kfree(opts.release_agent);
-		return -ENOMEM;
-	}
-
-	init_cgroup_root(root);
-	root->subsys_bits = opts.subsys_bits;
-	root->flags = opts.flags;
-	if (opts.release_agent) {
-		strcpy(root->release_agent_path, opts.release_agent);
-		kfree(opts.release_agent);
-	}
+	if (ret)
+		goto out_err;
 
-	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
+	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
 
 	if (IS_ERR(sb)) {
-		kfree(root);
-		return PTR_ERR(sb);
+		ret = PTR_ERR(sb);
+		goto out_err;
 	}
 
-	if (sb->s_fs_info != root) {
-		/* Reusing an existing superblock */
-		BUG_ON(sb->s_root == NULL);
-		kfree(root);
-		root = NULL;
-	} else {
+	if (opts.created_root) {
 		/* New superblock */
+		struct cgroupfs_root *root = sb->s_fs_info;
+		struct list_head tmp_cg_links;
 		struct cgroup *root_cgrp = &root->top_cgroup;
 		struct inode *inode;
 		int i;
@@ -1091,7 +1123,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 		if (ret == -EBUSY) {
 			mutex_unlock(&cgroup_mutex);
 			mutex_unlock(&inode->i_mutex);
-			goto free_cg_links;
+			free_cg_links(&tmp_cg_links);
+			goto drop_new_super;
 		}
 
 		/* EBUSY should be the only error here */
@@ -1130,11 +1163,14 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 	simple_set_mnt(mnt, sb);
 	return 0;
 
- free_cg_links:
-	free_cg_links(&tmp_cg_links);
  drop_new_super:
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
+
+ out_err:
+	kfree(opts.release_agent);
+	kfree(opts.name);
+
 	return ret;
 }
 
@@ -2906,6 +2942,9 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
 		seq_printf(m, "%lu:", root->subsys_bits);
 		for_each_subsys(root, ss)
 			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+		if (strlen(root->name))
+			seq_printf(m, "%sname=%s",
+				   count++ ? "," : "", root->name);
 		seq_putc(m, ':');
 		get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
 		cgrp = task_cgroup(tsk, subsys_id);
@@ -3606,4 +3645,3 @@ css_get_next(struct cgroup_subsys *ss, int id,
 	}
 	return ret;
 }
-

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/7] [RFC]Move the cgroup debug subsys into cgroup.c to access internal state
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-12 10:51   ` [PATCH 1/7] [RFC] Support named cgroups hierarchies Paul Menage
@ 2009-03-12 10:51   ` Paul Menage
       [not found]     ` <20090312105127.24154.39200.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-12 10:51   ` [PATCH 3/7] [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup Paul Menage
                     ` (5 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

[RFC]Move the cgroup debug subsys into cgroup.c to access internal state

While it's architecturally clean to have the cgroup debug subsystem be
completely independent of the cgroups framework, it limits its
usefulness for debugging the contents of internal data structures.
Move the debug subsystem code into the scope of all the cgroups data
structures to make more detailed debugging possible.

Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

---

 kernel/Makefile       |    1 
 kernel/cgroup.c       |   94 +++++++++++++++++++++++++++++++++++++++++++
 kernel/cgroup_debug.c |  107 -------------------------------------------------
 3 files changed, 94 insertions(+), 108 deletions(-)
 delete mode 100644 kernel/cgroup_debug.c

diff --git a/kernel/Makefile b/kernel/Makefile
index 97b5060..bdc528f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -57,7 +57,6 @@ obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
-obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa5edc8..ecf00ad 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3645,3 +3645,97 @@ css_get_next(struct cgroup_subsys *ss, int id,
 	}
 	return ret;
 }
+
+#ifdef CONFIG_CGROUP_DEBUG
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+						   struct cgroup *cont)
+{
+	struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+	if (!css)
+		return ERR_PTR(-ENOMEM);
+
+	return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+	return atomic_read(&cont->count);
+}
+
+static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+	u64 count;
+
+	cgroup_lock();
+	count = cgroup_task_count(cont);
+	cgroup_unlock();
+	return count;
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+	return (u64)(long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+					   struct cftype *cft)
+{
+	u64 count;
+
+	rcu_read_lock();
+	count = atomic_read(&current->cgroups->refcount);
+	rcu_read_unlock();
+	return count;
+}
+
+static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
+{
+	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+static struct cftype debug_files[] =  {
+	{
+		.name = "cgroup_refcount",
+		.read_u64 = cgroup_refcount_read,
+	},
+	{
+		.name = "taskcount",
+		.read_u64 = taskcount_read,
+	},
+
+	{
+		.name = "current_css_set",
+		.read_u64 = current_css_set_read,
+	},
+
+	{
+		.name = "current_css_set_refcount",
+		.read_u64 = current_css_set_refcount_read,
+	},
+
+	{
+		.name = "releasable",
+		.read_u64 = releasable_read,
+	},
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, debug_files,
+				ARRAY_SIZE(debug_files));
+}
+
+struct cgroup_subsys debug_subsys = {
+	.name = "debug",
+	.create = debug_create,
+	.destroy = debug_destroy,
+	.populate = debug_populate,
+	.subsys_id = debug_subsys_id,
+};
+#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
deleted file mode 100644
index daca620..0000000
--- a/kernel/cgroup_debug.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * kernel/cgroup_debug.c - Example cgroup subsystem that
- * exposes debug info
- *
- * Copyright (C) Google Inc, 2007
- *
- * Developed by Paul Menage (menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org)
- *
- */
-
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/rcupdate.h>
-
-#include <asm/atomic.h>
-
-static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
-						   struct cgroup *cont)
-{
-	struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
-
-	if (!css)
-		return ERR_PTR(-ENOMEM);
-
-	return css;
-}
-
-static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
-{
-	kfree(cont->subsys[debug_subsys_id]);
-}
-
-static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
-{
-	return atomic_read(&cont->count);
-}
-
-static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
-{
-	u64 count;
-
-	cgroup_lock();
-	count = cgroup_task_count(cont);
-	cgroup_unlock();
-	return count;
-}
-
-static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
-{
-	return (u64)(long)current->cgroups;
-}
-
-static u64 current_css_set_refcount_read(struct cgroup *cont,
-					   struct cftype *cft)
-{
-	u64 count;
-
-	rcu_read_lock();
-	count = atomic_read(&current->cgroups->refcount);
-	rcu_read_unlock();
-	return count;
-}
-
-static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
-{
-	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
-}
-
-static struct cftype files[] =  {
-	{
-		.name = "cgroup_refcount",
-		.read_u64 = cgroup_refcount_read,
-	},
-	{
-		.name = "taskcount",
-		.read_u64 = taskcount_read,
-	},
-
-	{
-		.name = "current_css_set",
-		.read_u64 = current_css_set_read,
-	},
-
-	{
-		.name = "current_css_set_refcount",
-		.read_u64 = current_css_set_refcount_read,
-	},
-
-	{
-		.name = "releasable",
-		.read_u64 = releasable_read,
-	},
-};
-
-static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-{
-	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
-}
-
-struct cgroup_subsys debug_subsys = {
-	.name = "debug",
-	.create = debug_create,
-	.destroy = debug_destroy,
-	.populate = debug_populate,
-	.subsys_id = debug_subsys_id,
-};

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/7] [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-12 10:51   ` [PATCH 1/7] [RFC] Support named cgroups hierarchies Paul Menage
  2009-03-12 10:51   ` [PATCH 2/7] [RFC]Move the cgroup debug subsys into cgroup.c to access internal state Paul Menage
@ 2009-03-12 10:51   ` Paul Menage
       [not found]     ` <20090312105132.24154.99250.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-12 10:51   ` [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no bound subsystems Paul Menage
                     ` (4 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

[RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup

Currently the cgroups code makes the assumption that the subsystem
pointers in a struct css_set uniquely identify the hierarchy->cgroup
mappings associated with the css_set; and there's no way to directly
identify the associated set of cgroups other than by indirecting
through the appropriate subsystem state pointers.

This patch removes the need for that assumption by adding a
back-pointer from struct cg_cgroup_link object to its associated
cgroup; this allows the set of cgroups to be determined by traversing
the cg_links list in the struct css_set.

Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

---

 kernel/cgroup.c |  218 +++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 169 insertions(+), 49 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ecf00ad..9cdbbac 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -202,6 +202,7 @@ struct cg_cgroup_link {
 	 * cgroup, anchored on cgroup->css_sets
 	 */
 	struct list_head cgrp_link_list;
+	struct cgroup *cgrp;
 	/*
 	 * List running through cg_cgroup_links pointing at a
 	 * single css_set object, anchored on css_set->cg_links
@@ -228,8 +229,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
 static DEFINE_RWLOCK(css_set_lock);
 static int css_set_count;
 
-/* hash table for cgroup groups. This improves the performance to
- * find an existing css_set */
+/*
+ * hash table for cgroup groups. This improves the performance to find
+ * an existing css_set. This hash doesn't (currently) take into
+ * account cgroups in empty hierarchies.
+ */
 #define CSS_SET_HASH_BITS	7
 #define CSS_SET_TABLE_SIZE	(1 << CSS_SET_HASH_BITS)
 static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
@@ -339,6 +343,77 @@ static inline void put_css_set_taskexit(struct css_set *cg)
 }
 
 /*
+ * compare_css_sets - helper function for find_existing_css_set().
+ * @cg: candidate css_set being tested
+ * @old_cg: existing css_set for a task
+ * @new_cgrp: cgroup that's being entered by the task
+ * @template: desired set of css pointers in css_set (pre-calculated)
+ *
+ * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * which "new_cgrp" belongs to, for which it should match "new_cgrp".
+ */
+static bool compare_css_sets(struct css_set *cg,
+			     struct css_set *old_cg,
+			     struct cgroup *new_cgrp,
+			     struct cgroup_subsys_state *template[])
+{
+	struct list_head *l1, *l2;
+	if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+		/* Not all subsystems matched */
+		return false;
+	}
+
+	/*
+	 * Compare cgroup pointers in order to distinguish between
+	 * different cgroups in heirarchies with no subsystems. We
+	 * could get by with just this check alone (and skip the
+	 * memcmp above) but on most setups the memcmp check will
+	 * avoid the need for this more expensive check on almost all
+	 * candidates.
+	 */
+
+	l1 = &cg->cg_links;
+	l2 = &old_cg->cg_links;
+	while (1) {
+		struct cg_cgroup_link *cgl1, *cgl2;
+		struct cgroup *cg1, *cg2;
+
+		l1 = l1->next;
+		l2 = l2->next;
+		/* See if we reached the end - both lists are equal length. */
+		if (l1 == &cg->cg_links) {
+			BUG_ON(l2 != &old_cg->cg_links);
+			break;
+		} else {
+			BUG_ON(l2 == &old_cg->cg_links);
+		}
+		/* Locate the cgroups associated with these links. */
+		cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
+		cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
+		cg1 = cgl1->cgrp;
+		cg2 = cgl2->cgrp;
+		/* Hierarchies should be linked in the same order. */
+		BUG_ON(cg1->root != cg2->root);
+
+		/*
+		 * If this hierarchy is the hierarchy of the cgroup
+		 * that's changing, then we need to check that this
+		 * css_set points to the new cgroup; if it's any other
+		 * hierarchy, then this css_set should point to the
+		 * same cgroup as the old css_set.
+		 */
+		if (cg1->root == new_cgrp->root) {
+			if (cg1 != new_cgrp)
+				return false;
+		} else {
+			if (cg1 != cg2)
+				return false;
+		}
+	}
+	return true;
+}
+
+/*
  * find_existing_css_set() is a helper for
  * find_css_set(), and checks to see whether an existing
  * css_set is suitable.
@@ -379,10 +454,11 @@ static struct css_set *find_existing_css_set(
 
 	hhead = css_set_hash(template);
 	hlist_for_each_entry(cg, node, hhead, hlist) {
-		if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
-			/* All subsystems matched */
-			return cg;
-		}
+		if (!compare_css_sets(cg, oldcg, cgrp, template))
+			continue;
+
+		/* This css_set matches what we need */
+		return cg;
 	}
 
 	/* No existing cgroup group matched */
@@ -436,8 +512,13 @@ static void link_css_set(struct list_head *tmp_cg_links,
 	link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
 				cgrp_link_list);
 	link->cg = cg;
+	link->cgrp = cgrp;
 	list_move(&link->cgrp_link_list, &cgrp->css_sets);
-	list_add(&link->cg_link_list, &cg->cg_links);
+	/*
+	 * Always add links to the tail of the list so that the list
+	 * is sorted by order of hierarchy creation
+	 */
+	list_add_tail(&link->cg_link_list, &cg->cg_links);
 }
 
 /*
@@ -457,6 +538,7 @@ static struct css_set *find_css_set(
 	struct list_head tmp_cg_links;
 
 	struct hlist_head *hhead;
+	struct cg_cgroup_link *link, *saved_link;
 
 	/* First see if we already have a cgroup group that matches
 	 * the desired set */
@@ -492,18 +574,15 @@ static struct css_set *find_css_set(
 	/* Add reference counts and links from the new css_set. */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup *cgrp = res->subsys[i]->cgroup;
-		struct cgroup_subsys *ss = subsys[i];
 		atomic_inc(&cgrp->count);
-		/*
-		 * We want to add a link once per cgroup, so we
-		 * only do it for the first subsystem in each
-		 * hierarchy
-		 */
-		if (ss->root->subsys_list.next == &ss->sibling)
-			link_css_set(&tmp_cg_links, res, cgrp);
 	}
-	if (list_empty(&rootnode.subsys_list))
-		link_css_set(&tmp_cg_links, res, dummytop);
+	list_for_each_entry_safe(link, saved_link, &oldcg->cg_links,
+				 cg_link_list) {
+		struct cgroup *c = link->cgrp;
+		if (c->root == cgrp->root)
+			c = cgrp;
+		link_css_set(&tmp_cg_links, res, c);
+	}
 
 	BUG_ON(!list_empty(&tmp_cg_links));
 
@@ -519,6 +598,42 @@ static struct css_set *find_css_set(
 }
 
 /*
+ * Return the cgroup for "task" from the given hierarchy. Must be
+ * called with cgroup_mutex held.
+ */
+struct cgroup *task_cgroup_from_root(struct task_struct *task,
+				     struct cgroupfs_root *root)
+{
+	struct css_set *css;
+	struct cgroup *res = NULL;
+
+	BUG_ON(!mutex_is_locked(&cgroup_mutex));
+	read_lock(&css_set_lock);
+	/*
+	 * No need to lock the task - since we hold cgroup_mutex the
+	 * task can't change groups, so the only thing that can happen
+	 * is that it exits and its css is set back to init_css_set.
+	 */
+	css = task->cgroups;
+	if (css == &init_css_set) {
+		res = &root->top_cgroup;
+	} else {
+		struct cg_cgroup_link *link, *saved_link;
+		list_for_each_entry_safe(link, saved_link, &css->cg_links,
+					 cg_link_list) {
+			struct cgroup *c = link->cgrp;
+			if (c->root == root) {
+				res = c;
+				break;
+			}
+		}
+	}
+	read_unlock(&css_set_lock);
+	BUG_ON(!res);
+	return res;
+}
+
+/*
  * There is one global cgroup mutex. We also require taking
  * task_lock() when dereferencing a task's cgroup subsys pointers.
  * See "The task_lock() exception", at the end of this comment.
@@ -1281,27 +1396,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 	return 0;
 }
 
-/*
- * Return the first subsystem attached to a cgroup's hierarchy, and
- * its subsystem id.
- */
-
-static void get_first_subsys(const struct cgroup *cgrp,
-			struct cgroup_subsys_state **css, int *subsys_id)
-{
-	const struct cgroupfs_root *root = cgrp->root;
-	const struct cgroup_subsys *test_ss;
-	BUG_ON(list_empty(&root->subsys_list));
-	test_ss = list_entry(root->subsys_list.next,
-			     struct cgroup_subsys, sibling);
-	if (css) {
-		*css = cgrp->subsys[test_ss->subsys_id];
-		BUG_ON(!*css);
-	}
-	if (subsys_id)
-		*subsys_id = test_ss->subsys_id;
-}
-
 /**
  * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
  * @cgrp: the cgroup the task is attaching to
@@ -1318,12 +1412,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	struct css_set *cg;
 	struct css_set *newcg;
 	struct cgroupfs_root *root = cgrp->root;
-	int subsys_id;
-
-	get_first_subsys(cgrp, NULL, &subsys_id);
 
 	/* Nothing to do if the task is already in that cgroup */
-	oldcgrp = task_cgroup(tsk, subsys_id);
+	oldcgrp = task_cgroup_from_root(tsk, root);
 	if (cgrp == oldcgrp)
 		return 0;
 
@@ -1881,7 +1972,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
  * the start of a css_set
  */
 static void cgroup_advance_iter(struct cgroup *cgrp,
-					  struct cgroup_iter *it)
+				struct cgroup_iter *it)
 {
 	struct list_head *l = it->cg_link;
 	struct cg_cgroup_link *link;
@@ -2829,6 +2920,7 @@ int __init cgroup_init_early(void)
 	init_task.cgroups = &init_css_set;
 
 	init_css_set_link.cg = &init_css_set;
+	init_css_set_link.cgrp = dummytop;
 	list_add(&init_css_set_link.cgrp_link_list,
 		 &rootnode.top_cgroup.css_sets);
 	list_add(&init_css_set_link.cg_link_list,
@@ -2936,7 +3028,6 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
 	for_each_active_root(root) {
 		struct cgroup_subsys *ss;
 		struct cgroup *cgrp;
-		int subsys_id;
 		int count = 0;
 
 		seq_printf(m, "%lu:", root->subsys_bits);
@@ -2946,8 +3037,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
 			seq_printf(m, "%sname=%s",
 				   count++ ? "," : "", root->name);
 		seq_putc(m, ':');
-		get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
-		cgrp = task_cgroup(tsk, subsys_id);
+		cgrp = task_cgroup_from_root(tsk, root);
 		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
 		if (retval < 0)
 			goto out_unlock;
@@ -3273,13 +3363,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
 {
 	int ret;
 	struct cgroup *target;
-	int subsys_id;
 
 	if (cgrp == dummytop)
 		return 1;
 
-	get_first_subsys(cgrp, NULL, &subsys_id);
-	target = task_cgroup(task, subsys_id);
+	target = task_cgroup_from_root(task, cgrp->root);
 	while (cgrp != target && cgrp!= cgrp->top_cgroup)
 		cgrp = cgrp->parent;
 	ret = (cgrp == target);
@@ -3694,6 +3782,33 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
 	return count;
 }
 
+static int current_css_set_cg_links_read(struct cgroup *cont,
+					 struct cftype *cft,
+					 struct seq_file *seq)
+{
+	struct cg_cgroup_link *link, *saved_link;
+	struct css_set *cg;
+	write_lock_irq(&css_set_lock);
+	task_lock(current);
+	cg = current->cgroups;
+	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+				 cg_link_list) {
+		struct cgroup *c = link->cgrp;
+		const char *name;
+		rcu_read_lock();
+		if (c->dentry)
+			name = c->dentry->d_name.name;
+		else
+			name = "?";
+		seq_printf(seq, "Root %lu group %s\n",
+			   c->root->subsys_bits, name);
+		rcu_read_unlock();
+	}
+	task_unlock(current);
+	write_unlock_irq(&css_set_lock);
+	return 0;
+}
+
 static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
 {
 	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
@@ -3720,6 +3835,11 @@ static struct cftype debug_files[] =  {
 	},
 
 	{
+		.name = "current_css_set_cg_links",
+		.read_seq_string = current_css_set_cg_links_read,
+	},
+
+	{
 		.name = "releasable",
 		.read_u64 = releasable_read,
 	},

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no bound subsystems
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
                     ` (2 preceding siblings ...)
  2009-03-12 10:51   ` [PATCH 3/7] [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup Paul Menage
@ 2009-03-12 10:51   ` Paul Menage
       [not found]     ` <20090312105137.24154.34890.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-12 10:51   ` [PATCH 5/7] [RFC] Remove cgroup_subsys.root pointer Paul Menage
                     ` (3 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

[RFC] Allow cgroup hierarchies to be created with no bound subsystems

This patch removes the restriction that a cgroup hierarchy must have
at least one bound subsystem. The mount option "none" is treated as an
explicit request for no bound subsystems.

A hierarchy with no subsystems can be useful for plan task tracking,
and is also a step towards the support for multiply-bindable
subsystems in a later patch in this series.

As part of this change, the hierarchy id is no longer calculated from
the bitmask of subsystems in the hierarchy (since this is not
guaranteed to be unique) but is allocated via an ida. Reference counts
on cgroups from css_set objects are now taken explicitly one per
hierarchy, rather than one per subsystem.

Example usage:

mount -t cgroup -o none,name=foo cgroup /mnt/cgroup

Based on the "no-op"/"none" subsystem concept proposed by
kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org

Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

---

 kernel/cgroup.c |  158 ++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 104 insertions(+), 54 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9cdbbac..4a7ef2c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -72,6 +72,9 @@ struct cgroupfs_root {
 	 */
 	unsigned long subsys_bits;
 
+	/* Unique id for this hierarchy. */
+	int hierarchy_id;
+
 	/* The bitmask of subsystems currently attached to this hierarchy */
 	unsigned long actual_subsys_bits;
 
@@ -142,6 +145,10 @@ struct css_id {
 static LIST_HEAD(roots);
 static int root_count;
 
+static DEFINE_IDA(hierarchy_ida);
+static int next_hierarchy_id;
+static DEFINE_SPINLOCK(hierarchy_id_lock);
+
 /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
 #define dummytop (&rootnode.top_cgroup)
 
@@ -259,42 +266,10 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
  * compiled into their kernel but not actually in use */
 static int use_task_css_set_links __read_mostly;
 
-/* When we create or destroy a css_set, the operation simply
- * takes/releases a reference count on all the cgroups referenced
- * by subsystems in this css_set. This can end up multiple-counting
- * some cgroups, but that's OK - the ref-count is just a
- * busy/not-busy indicator; ensuring that we only count each cgroup
- * once would require taking a global lock to ensure that no
- * subsystems moved between hierarchies while we were doing so.
- *
- * Possible TODO: decide at boot time based on the number of
- * registered subsystems and the number of CPUs or NUMA nodes whether
- * it's better for performance to ref-count every subsystem, or to
- * take a global lock and only add one ref count to each hierarchy.
- */
-
-/*
- * unlink a css_set from the list and free it
- */
-static void unlink_css_set(struct css_set *cg)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
 	struct cg_cgroup_link *link;
 	struct cg_cgroup_link *saved_link;
-
-	hlist_del(&cg->hlist);
-	css_set_count--;
-
-	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
-				 cg_link_list) {
-		list_del(&link->cg_link_list);
-		list_del(&link->cgrp_link_list);
-		kfree(link);
-	}
-}
-
-static void __put_css_set(struct css_set *cg, int taskexit)
-{
-	int i;
 	/*
 	 * Ensure that the refcount doesn't hit zero while any readers
 	 * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -307,20 +282,27 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 		write_unlock(&css_set_lock);
 		return;
 	}
-	unlink_css_set(cg);
-	write_unlock(&css_set_lock);
 
-	rcu_read_lock();
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
+	/* This css_set is dead. unlink it and release cgroup refcounts */
+	hlist_del(&cg->hlist);
+	css_set_count--;
+
+	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+				 cg_link_list) {
+		struct cgroup *cgrp = link->cgrp;
+		list_del(&link->cg_link_list);
+		list_del(&link->cgrp_link_list);
 		if (atomic_dec_and_test(&cgrp->count) &&
 		    notify_on_release(cgrp)) {
 			if (taskexit)
 				set_bit(CGRP_RELEASABLE, &cgrp->flags);
 			check_for_release(cgrp);
 		}
+
+		kfree(link);
 	}
-	rcu_read_unlock();
+
+	write_unlock(&css_set_lock);
 	kfree(cg);
 }
 
@@ -513,6 +495,7 @@ static void link_css_set(struct list_head *tmp_cg_links,
 				cgrp_link_list);
 	link->cg = cg;
 	link->cgrp = cgrp;
+	atomic_inc(&cgrp->count);
 	list_move(&link->cgrp_link_list, &cgrp->css_sets);
 	/*
 	 * Always add links to the tail of the list so that the list
@@ -533,7 +516,6 @@ static struct css_set *find_css_set(
 {
 	struct css_set *res;
 	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
-	int i;
 
 	struct list_head tmp_cg_links;
 
@@ -572,10 +554,6 @@ static struct css_set *find_css_set(
 
 	write_lock(&css_set_lock);
 	/* Add reference counts and links from the new css_set. */
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		struct cgroup *cgrp = res->subsys[i]->cgroup;
-		atomic_inc(&cgrp->count);
-	}
 	list_for_each_entry_safe(link, saved_link, &oldcg->cg_links,
 				 cg_link_list) {
 		struct cgroup *c = link->cgrp;
@@ -955,6 +933,10 @@ struct cgroup_sb_opts {
 	unsigned long flags;
 	char *release_agent;
 	char *name;
+
+	/* User explicitly requested empty subsystem */
+	bool none;
+
 	/* A flag indicating that a root was created from this options block */
 	bool created_root;
 };
@@ -983,6 +965,9 @@ static int parse_cgroupfs_options(char *data,
 				if (!ss->disabled)
 					opts->subsys_bits |= 1ul << i;
 			}
+		} else if (!strcmp(token, "none")) {
+			/* Explicitly have no subsystems */
+			opts->none = true;
 		} else if (!strcmp(token, "noprefix")) {
 			set_bit(ROOT_NOPREFIX, &opts->flags);
 		} else if (!strncmp(token, "release_agent=", 14)) {
@@ -1019,7 +1004,16 @@ static int parse_cgroupfs_options(char *data,
 		}
 	}
 
-	/* We can't have an empty hierarchy */
+	/* Consistency checks */
+
+	/* Can't specify "none" and some subsystems */
+	if (opts->subsys_bits && opts->none)
+		return -EINVAL;
+
+	/*
+	 * We either have to specify by name or by subsystems. (So all
+	 * empty hierarchies must have a name).
+	 */
 	if (!opts->subsys_bits && !opts->name)
 		return -EINVAL;
 
@@ -1085,6 +1079,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->release_list);
 	init_rwsem(&cgrp->pids_mutex);
 }
+
 static void init_cgroup_root(struct cgroupfs_root *root)
 {
 	struct cgroup *cgrp = &root->top_cgroup;
@@ -1096,6 +1091,18 @@ static void init_cgroup_root(struct cgroupfs_root *root)
 	init_cgroup_housekeeping(cgrp);
 }
 
+static void init_root_id(struct cgroupfs_root *root)
+{
+	BUG_ON(!ida_pre_get(&hierarchy_ida, GFP_KERNEL));
+	spin_lock(&hierarchy_id_lock);
+	if (ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
+			      &root->hierarchy_id)) {
+		BUG_ON(ida_get_new(&hierarchy_ida, &root->hierarchy_id));
+	}
+	next_hierarchy_id = root->hierarchy_id + 1;
+	spin_unlock(&hierarchy_id_lock);
+}
+
 static int cgroup_test_super(struct super_block *sb, void *data)
 {
 	struct cgroup_sb_opts *new = data;
@@ -1116,7 +1123,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 {
 	struct cgroupfs_root *root;
 
-	if (!opts->subsys_bits)
+	if (!opts->subsys_bits && !opts->none)
 		return ERR_PTR(-EINVAL);
 
 	root = kzalloc(sizeof(*root), GFP_KERNEL);
@@ -1124,6 +1131,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 		return ERR_PTR(-ENOMEM);
 
 	init_cgroup_root(root);
+	init_root_id(root);
+
 	root->subsys_bits = opts->subsys_bits;
 	root->flags = opts->flags;
 	if (opts->release_agent)
@@ -1134,6 +1143,15 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 	return root;
 }
 
+static void cgroup_drop_root(struct cgroupfs_root *root)
+{
+	BUG_ON(!root->hierarchy_id);
+	spin_lock(&hierarchy_id_lock);
+	ida_remove(&hierarchy_ida, root->hierarchy_id);
+	spin_unlock(&hierarchy_id_lock);
+	kfree(root);
+}
+
 static int cgroup_set_super(struct super_block *sb, void *data)
 {
 	int ret;
@@ -1145,7 +1163,7 @@ static int cgroup_set_super(struct super_block *sb, void *data)
 		return PTR_ERR(root);
 	ret = set_anon_super(sb, NULL);
 	if (ret) {
-		kfree(root);
+		cgroup_drop_root(root);
 		return ret;
 	}
 
@@ -1331,7 +1349,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
 	mutex_unlock(&cgroup_mutex);
 
 	kill_litter_super(sb);
-	kfree(root);
+	cgroup_drop_root(root);
 }
 
 static struct file_system_type cgroup_fs_type = {
@@ -2975,7 +2993,7 @@ int __init cgroup_init(void)
 	/* Add init_css_set to the hash table */
 	hhead = css_set_hash(init_css_set.subsys);
 	hlist_add_head(&init_css_set.hlist, hhead);
-
+	init_root_id(&rootnode);
 	err = register_filesystem(&cgroup_fs_type);
 	if (err < 0)
 		goto out;
@@ -3030,7 +3048,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
 		struct cgroup *cgrp;
 		int count = 0;
 
-		seq_printf(m, "%lu:", root->subsys_bits);
+		seq_printf(m, "%d:", root->hierarchy_id);
 		for_each_subsys(root, ss)
 			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
 		if (strlen(root->name))
@@ -3076,8 +3094,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
 	mutex_lock(&cgroup_mutex);
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
-		seq_printf(m, "%s\t%lu\t%d\t%d\n",
-			   ss->name, ss->root->subsys_bits,
+		seq_printf(m, "%s\t%d\t%d\t%d\n",
+			   ss->name, ss->root->hierarchy_id,
 			   ss->root->number_of_cgroups, !ss->disabled);
 	}
 	mutex_unlock(&cgroup_mutex);
@@ -3800,8 +3818,8 @@ static int current_css_set_cg_links_read(struct cgroup *cont,
 			name = c->dentry->d_name.name;
 		else
 			name = "?";
-		seq_printf(seq, "Root %lu group %s\n",
-			   c->root->subsys_bits, name);
+		seq_printf(seq, "Root %d group %s\n",
+			   c->root->hierarchy_id, name);
 		rcu_read_unlock();
 	}
 	task_unlock(current);
@@ -3809,6 +3827,33 @@ static int current_css_set_cg_links_read(struct cgroup *cont,
 	return 0;
 }
 
+#define MAX_TASKS_SHOWN_PER_CSS 25
+static int cgroup_css_links_read(struct cgroup *cont,
+				 struct cftype *cft,
+				 struct seq_file *seq)
+{
+	struct cg_cgroup_link *link, *saved_link;
+	write_lock_irq(&css_set_lock);
+	list_for_each_entry_safe(link, saved_link, &cont->css_sets,
+				 cgrp_link_list) {
+		struct css_set *cg = link->cg;
+		struct task_struct *task, *saved_task;
+		int count = 0;
+		seq_printf(seq, "css_set %p\n", cg);
+		list_for_each_entry_safe(task, saved_task, &cg->tasks,
+					 cg_list) {
+			if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
+				seq_puts(seq, "  ...\n");
+				break;
+			} else {
+				seq_printf(seq, "  task %d\n", task->pid);
+			}
+		}
+	}
+	write_unlock_irq(&css_set_lock);
+	return 0;
+}
+
 static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
 {
 	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
@@ -3840,6 +3885,11 @@ static struct cftype debug_files[] =  {
 	},
 
 	{
+		.name = "cgroup_css_links",
+		.read_seq_string = cgroup_css_links_read,
+	},
+
+	{
 		.name = "releasable",
 		.read_u64 = releasable_read,
 	},

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 5/7] [RFC] Remove cgroup_subsys.root pointer
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
                     ` (3 preceding siblings ...)
  2009-03-12 10:51   ` [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no bound subsystems Paul Menage
@ 2009-03-12 10:51   ` Paul Menage
  2009-03-12 10:51   ` [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems Paul Menage
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

[RFC] Remove cgroup_subsys.root pointer

In preparation for supporting cgroup subsystems that can be bound to
multiple hierarchies, remove the "root" pointer and associated list
structures.  Subsystem hierarchy membership is now determined entirely
through the subsystem bitmasks in struct cgroupfs_root.

Minor changes include:
  - root_list now includes the inactive root
  - for_each_active_root() -> for_each_root()
  - for_each_subsys() is now guaranteed to be in subsys_id order


Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

---

 include/linux/cgroup.h |   16 ++----
 kernel/cgroup.c        |  128 +++++++++++++++++++++++++++---------------------
 2 files changed, 77 insertions(+), 67 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 665fa70..adf6739 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -227,7 +227,9 @@ struct css_set {
 	/*
 	 * Set of subsystem states, one for each subsystem. This array
 	 * is immutable after creation apart from the init_css_set
-	 * during subsystem registration (at boot time).
+	 * during subsystem registration (at boot
+	 * time). Multi-subsystems don't have an entry in here since
+	 * there's no unique state for a given task.
 	 */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 };
@@ -401,9 +403,9 @@ struct cgroup_subsys {
 	/*
 	 * Protects sibling/children links of cgroups in this
 	 * hierarchy, plus protects which hierarchy (or none) the
-	 * subsystem is a part of (i.e. root/sibling).  To avoid
-	 * potential deadlocks, the following operations should not be
-	 * undertaken while holding any hierarchy_mutex:
+	 * subsystem is a part of.  To avoid potential deadlocks, the
+	 * following operations should not be undertaken while holding
+	 * any hierarchy_mutex:
 	 *
 	 * - allocating memory
 	 * - initiating hotplug events
@@ -411,12 +413,6 @@ struct cgroup_subsys {
 	struct mutex hierarchy_mutex;
 	struct lock_class_key subsys_key;
 
-	/*
-	 * Link to parent, and list entry in parent's children.
-	 * Protected by this->hierarchy_mutex and cgroup_lock()
-	 */
-	struct cgroupfs_root *root;
-	struct list_head sibling;
 	/* used when use_id == true */
 	struct idr idr;
 	spinlock_t id_lock;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4a7ef2c..fcb8181 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -66,28 +66,19 @@ static struct cgroup_subsys *subsys[] = {
 struct cgroupfs_root {
 	struct super_block *sb;
 
-	/*
-	 * The bitmask of subsystems intended to be attached to this
-	 * hierarchy
-	 */
+	/* The bitmask of subsystems attached to this hierarchy */
 	unsigned long subsys_bits;
 
 	/* Unique id for this hierarchy. */
 	int hierarchy_id;
 
-	/* The bitmask of subsystems currently attached to this hierarchy */
-	unsigned long actual_subsys_bits;
-
-	/* A list running through the attached subsystems */
-	struct list_head subsys_list;
-
 	/* The root cgroup for this hierarchy */
 	struct cgroup top_cgroup;
 
 	/* Tracks how many cgroups are currently defined in hierarchy.*/
 	int number_of_cgroups;
 
-	/* A list running through the active hierarchies */
+	/* A list running through all hierarchies */
 	struct list_head root_list;
 
 	/* Hierarchy-specific flags */
@@ -187,13 +178,36 @@ static int notify_on_release(const struct cgroup *cgrp)
  * for_each_subsys() allows you to iterate on each subsystem attached to
  * an active hierarchy
  */
+static inline struct cgroup_subsys *nth_ss(int n)
+{
+	return (n >= CGROUP_SUBSYS_COUNT) ? NULL : subsys[n];
+}
 #define for_each_subsys(_root, _ss) \
-list_for_each_entry(_ss, &_root->subsys_list, sibling)
+for (_ss = nth_ss(find_first_bit(&(_root)->subsys_bits, CGROUP_SUBSYS_COUNT));\
+	    _ss != NULL;						      \
+    _ss = nth_ss(find_next_bit(&(_root)->subsys_bits, CGROUP_SUBSYS_COUNT,    \
+			       _ss->subsys_id + 1)))
 
-/* for_each_active_root() allows you to iterate across the active hierarchies */
-#define for_each_active_root(_root) \
+
+/* for_each_root() allows you to iterate across all hierarchies */
+#define for_each_root(_root) \
 list_for_each_entry(_root, &roots, root_list)
 
+/* Find the root for a given subsystem */
+static struct cgroupfs_root *find_root(struct cgroup_subsys *ss)
+{
+	int id = ss->subsys_id;
+	struct cgroupfs_root *root, *res = NULL;
+	for_each_root(root) {
+		if (root->subsys_bits && (1UL << id)) {
+			BUG_ON(res);
+			res = root;
+		}
+	}
+	BUG_ON(!res);
+	return res;
+}
+
 /* the list of cgroups eligible for automatic release. Protected by
  * release_list_lock */
 static LIST_HEAD(release_list);
@@ -419,7 +433,7 @@ static struct css_set *find_existing_css_set(
 	struct hlist_node *node;
 	struct css_set *cg;
 
-	/* Built the set of subsystem state objects that we want to
+	/* Build the set of subsystem state objects that we want to
 	 * see in the new css_set */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		if (root->subsys_bits & (1UL << i)) {
@@ -840,21 +854,20 @@ static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
 }
 
 static int rebind_subsystems(struct cgroupfs_root *root,
-			      unsigned long final_bits)
+			     unsigned long final_bits)
 {
 	unsigned long added_bits, removed_bits;
 	struct cgroup *cgrp = &root->top_cgroup;
 	int i;
 
-	removed_bits = root->actual_subsys_bits & ~final_bits;
-	added_bits = final_bits & ~root->actual_subsys_bits;
+	removed_bits = root->subsys_bits & ~final_bits;
+	added_bits = final_bits & ~root->subsys_bits;
 	/* Check that any added subsystems are currently free */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		unsigned long bit = 1UL << i;
-		struct cgroup_subsys *ss = subsys[i];
 		if (!(bit & added_bits))
 			continue;
-		if (ss->root != &rootnode) {
+		if (!(rootnode.subsys_bits & bit)) {
 			/* Subsystem isn't free */
 			return -EBUSY;
 		}
@@ -876,25 +889,27 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			BUG_ON(cgrp->subsys[i]);
 			BUG_ON(!dummytop->subsys[i]);
 			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+			BUG_ON(!(rootnode.subsys_bits & bit));
 			mutex_lock(&ss->hierarchy_mutex);
 			cgrp->subsys[i] = dummytop->subsys[i];
 			cgrp->subsys[i]->cgroup = cgrp;
-			list_move(&ss->sibling, &root->subsys_list);
-			ss->root = root;
 			if (ss->bind)
 				ss->bind(ss, cgrp);
+			rootnode.subsys_bits &= ~bit;
+			root->subsys_bits |= bit;
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
 			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
 			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+			BUG_ON(rootnode.subsys_bits & bit);
 			mutex_lock(&ss->hierarchy_mutex);
 			if (ss->bind)
 				ss->bind(ss, dummytop);
 			dummytop->subsys[i]->cgroup = dummytop;
 			cgrp->subsys[i] = NULL;
-			subsys[i]->root = &rootnode;
-			list_move(&ss->sibling, &rootnode.subsys_list);
+			root->subsys_bits &= ~bit;
+			rootnode.subsys_bits |= bit;
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & final_bits) {
 			/* Subsystem state should already exist */
@@ -904,7 +919,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			BUG_ON(cgrp->subsys[i]);
 		}
 	}
-	root->subsys_bits = root->actual_subsys_bits = final_bits;
 	synchronize_rcu();
 
 	return 0;
@@ -1083,7 +1097,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 static void init_cgroup_root(struct cgroupfs_root *root)
 {
 	struct cgroup *cgrp = &root->top_cgroup;
-	INIT_LIST_HEAD(&root->subsys_list);
 	INIT_LIST_HEAD(&root->root_list);
 	root->number_of_cgroups = 1;
 	cgrp->root = root;
@@ -1133,7 +1146,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
 	init_cgroup_root(root);
 	init_root_id(root);
 
-	root->subsys_bits = opts->subsys_bits;
 	root->flags = opts->flags;
 	if (opts->release_agent)
 		strcpy(root->release_agent_path, opts->release_agent);
@@ -1252,7 +1264,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 			goto drop_new_super;
 		}
 
-		ret = rebind_subsystems(root, root->subsys_bits);
+		ret = rebind_subsystems(root, opts.subsys_bits);
 		if (ret == -EBUSY) {
 			mutex_unlock(&cgroup_mutex);
 			mutex_unlock(&inode->i_mutex);
@@ -2596,24 +2608,16 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
 {
 	/* We need to take each hierarchy_mutex in a consistent order */
-	int i;
-
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		struct cgroup_subsys *ss = subsys[i];
-		if (ss->root == root)
-			mutex_lock(&ss->hierarchy_mutex);
-	}
+	struct cgroup_subsys *ss;
+	for_each_subsys(root, ss)
+		mutex_lock(&ss->hierarchy_mutex);
 }
 
 static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
 {
-	int i;
-
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		struct cgroup_subsys *ss = subsys[i];
-		if (ss->root == root)
-			mutex_unlock(&ss->hierarchy_mutex);
-	}
+	struct cgroup_subsys *ss;
+	for_each_subsys(root, ss)
+		mutex_unlock(&ss->hierarchy_mutex);
 }
 
 /*
@@ -2730,13 +2734,9 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
 	 * we can be called via check_for_release() with no
 	 * synchronization other than RCU, and the subsystem linked
 	 * list isn't RCU-safe */
-	int i;
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		struct cgroup_subsys *ss = subsys[i];
+	struct cgroup_subsys *ss;
+	for_each_subsys(cgrp->root, ss) {
 		struct cgroup_subsys_state *css;
-		/* Skip subsystems not in this hierarchy */
-		if (ss->root != cgrp->root)
-			continue;
 		css = cgrp->subsys[ss->subsys_id];
 		/* When called from check_for_release() it's possible
 		 * that by this point the cgroup has been removed
@@ -2894,8 +2894,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
 	/* Create the top cgroup state for this subsystem */
-	list_add(&ss->sibling, &rootnode.subsys_list);
-	ss->root = &rootnode;
 	css = ss->create(ss, dummytop);
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
@@ -2917,6 +2915,8 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	mutex_init(&ss->hierarchy_mutex);
 	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
 	ss->active = 1;
+
+	rootnode.subsys_bits |= 1ULL << ss->subsys_id;
 }
 
 /**
@@ -2934,6 +2934,7 @@ int __init cgroup_init_early(void)
 	INIT_HLIST_NODE(&init_css_set.hlist);
 	css_set_count = 1;
 	init_cgroup_root(&rootnode);
+	list_add(&rootnode.root_list, &roots);
 	root_count = 1;
 	init_task.cgroups = &init_css_set;
 
@@ -3043,11 +3044,14 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
 
 	mutex_lock(&cgroup_mutex);
 
-	for_each_active_root(root) {
+	for_each_root(root) {
 		struct cgroup_subsys *ss;
 		struct cgroup *cgrp;
 		int count = 0;
 
+		if (root == &rootnode)
+			continue;
+
 		seq_printf(m, "%d:", root->hierarchy_id);
 		for_each_subsys(root, ss)
 			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
@@ -3086,17 +3090,27 @@ struct file_operations proc_cgroup_operations = {
 };
 
 /* Display information about each subsystem and each hierarchy */
+static void proc_show_subsys(struct seq_file *m, struct cgroupfs_root *root,
+			     struct cgroup_subsys *ss)
+{
+	seq_printf(m, "%s\t%d\t%d\t%d\n",
+		   ss->name, root->hierarchy_id,
+		   root->number_of_cgroups, !ss->disabled);
+}
+
 static int proc_cgroupstats_show(struct seq_file *m, void *v)
 {
 	int i;
-
 	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
 	mutex_lock(&cgroup_mutex);
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
-		seq_printf(m, "%s\t%d\t%d\t%d\n",
-			   ss->name, ss->root->hierarchy_id,
-			   ss->root->number_of_cgroups, !ss->disabled);
+		unsigned long bit = 1ULL << ss->subsys_id;
+		struct cgroupfs_root *root;
+		for_each_root(root) {
+			if (root->subsys_bits & bit)
+				proc_show_subsys(m, root, ss);
+		}
 	}
 	mutex_unlock(&cgroup_mutex);
 	return 0;
@@ -3276,7 +3290,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
 	 * with, and pin them so we can drop cgroup_mutex */
 	mutex_lock(&cgroup_mutex);
  again:
-	root = subsys->root;
+	root = find_root(subsys);
 	if (root == &rootnode) {
 		mutex_unlock(&cgroup_mutex);
 		return 0;
@@ -3328,7 +3342,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
 	 * that we're still in the same state that we thought we
 	 * were. */
 	mutex_lock(&cgroup_mutex);
-	if ((root != subsys->root) ||
+	if ((root != find_root(subsys)) ||
 	    (parent != task_cgroup(tsk, subsys->subsys_id))) {
 		/* Aargh, we raced ... */
 		mutex_unlock(&inode->i_mutex);

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
                     ` (4 preceding siblings ...)
  2009-03-12 10:51   ` [PATCH 5/7] [RFC] Remove cgroup_subsys.root pointer Paul Menage
@ 2009-03-12 10:51   ` Paul Menage
       [not found]     ` <20090312105147.24154.62638.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-12 10:51   ` [PATCH 7/7] [RFC] Example multi-bindable subsystem: a per-cgroup notes field Paul Menage
  2009-03-16  1:10   ` [PATCH 0/7][RFC] CGroup hierarchy extensions KAMEZAWA Hiroyuki
  7 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

[RFC] Support multiply-bindable cgroup subsystems

This patch allows a cgroup subsystem to be marked as bindable on
multiple cgroup hierarchies independently, when declared in
cgroup_subsys.h via MULTI_SUBSYS() rather than SUBSYS().

The state for such subsystems cannot be accessed directly from a
task->cgroups (since there's no unique mapping for a task) but instead
must be accessed via a particular control group object.

Multiply-bound subsystems are useful in cases where there's no direct
correspondence between the cgroup configuration and some property of
the kernel outside of the cgroups subsystem.  So this would not be
applicable to e.g. the CFS cgroup, since there has to a unique mapping
from a task to its CFS run queue.

As an example, the "debug" subsystem is marked multiply-bindable,
since it has no state outside the cgroups framework itself.

Example usage:

mount -t cgroup -o name=foo,debug,cpu cgroup /mnt1
mount -t cgroup -o name=bar,debug,memory cgroup /mnt2

Open issues:

- what should /proc/cgroup show for multiply-bindable subsystems?
  Currently it shows just one entry for each hierarchy the subsystem
  is bound to, which means the subsystem doesn't show up when unbound,
  and is indistinguishable from a regular (singleton) subsystem when
  singly-bound.

- should the bind() callback be extended to support passing more
  information to multiply-bindable subsystems? Or should we just scrap
  the bind() callback since nothing appears to be using it.

- how to stop checkpatch.pl complaining about the way the
  SUBSYS/MULTI_SUBSYS macros as used to define the cgroup_subsys_id
  enum in cgroup.h?

Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

---

 include/linux/cgroup.h        |   37 ++++++++++-
 include/linux/cgroup_subsys.h |    2 -
 kernel/cgroup.c               |  137 ++++++++++++++++++++++++++---------------
 3 files changed, 123 insertions(+), 53 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index adf6739..9b5999d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -39,10 +39,37 @@ extern int cgroupstats_build(struct cgroupstats *stats,
 
 extern struct file_operations proc_cgroup_operations;
 
-/* Define the enumeration of all cgroup subsystems */
-#define SUBSYS(_x) _x ## _subsys_id,
+/* Define the enumeration of all cgroup subsystems. There are two
+ * kinds of subsystems:
+ *
+ * - regular (singleton) subsystems can be only mounted once; these
+ * generally correspond to some system that has substantial state
+ * outside of the cgroups framework, or where the state is being used
+ * to control some external behaviour (e.g. CPU scheduler).  Every
+ * task has an associated state pointer (accessed efficiently through
+ * task->cgroups) for each singleton subsystem.
+ *
+ * - multiply-bound subsystems may be mounted on zero or more
+ * hierarchies. Since there's no unique mapping from a task to a
+ * subsystem state pointer for multiply-bound subsystems, the state of
+ * these subsystems cannot be accessed rapidly from a task
+ * pointer. These correspond to subsystems where most or all of the
+ * state is maintained within the subsystem itself, and/or the
+ * subsystem is used for monitoring rather than control.
+ */
 enum cgroup_subsys_id {
+#define SUBSYS(_x) _x ## _subsys_id,
+#define MULTI_SUBSYS(_x)
 #include <linux/cgroup_subsys.h>
+#undef SUBSYS
+#undef MULTI_SUBSYS
+	CGROUP_SINGLETON_SUBSYS_COUNT,
+	CGROUP_DUMMY_ENUM_RESET = CGROUP_SINGLETON_SUBSYS_COUNT - 1,
+#define SUBSYS(_x)
+#define MULTI_SUBSYS(_x) _x ## _subsys_id,
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+#undef MULTI_SUBSYS
 	CGROUP_SUBSYS_COUNT
 };
 #undef SUBSYS
@@ -231,7 +258,7 @@ struct css_set {
 	 * time). Multi-subsystems don't have an entry in here since
 	 * there's no unique state for a given task.
 	 */
-	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+	struct cgroup_subsys_state *subsys[CGROUP_SINGLETON_SUBSYS_COUNT];
 };
 
 /*
@@ -419,8 +446,10 @@ struct cgroup_subsys {
 };
 
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
+#define MULTI_SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
 #include <linux/cgroup_subsys.h>
 #undef SUBSYS
+#undef MULTI_SUBSYS
 
 static inline struct cgroup_subsys_state *cgroup_subsys_state(
 	struct cgroup *cgrp, int subsys_id)
@@ -431,6 +460,8 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
 static inline struct cgroup_subsys_state *task_subsys_state(
 	struct task_struct *task, int subsys_id)
 {
+	/* This check is optimized out for most callers */
+	BUG_ON(subsys_id >= CGROUP_SINGLETON_SUBSYS_COUNT);
 	return rcu_dereference(task->cgroups->subsys[subsys_id]);
 }
 
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9c8d31b..f78605e 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -14,7 +14,7 @@ SUBSYS(cpuset)
 /* */
 
 #ifdef CONFIG_CGROUP_DEBUG
-SUBSYS(debug)
+MULTI_SUBSYS(debug)
 #endif
 
 /* */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fcb8181..942a951 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -52,10 +52,18 @@
 static DEFINE_MUTEX(cgroup_mutex);
 
 /* Generate an array of cgroup subsystem pointers */
-#define SUBSYS(_x) &_x ## _subsys,
 
 static struct cgroup_subsys *subsys[] = {
+#define SUBSYS(_x) &_x ## _subsys,
+#define MULTI_SUBSYS(_x)
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+#undef MULTI_SUBSYS
+#define SUBSYS(_x)
+#define MULTI_SUBSYS(_x) &_x ## _subsys,
 #include <linux/cgroup_subsys.h>
+#undef SUBSYS
+#undef MULTI_SUBSYS
 };
 
 /*
@@ -265,7 +273,7 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 	int index;
 	unsigned long tmp = 0UL;
 
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
+	for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++)
 		tmp += (unsigned long)css[i];
 	tmp = (tmp >> 16) ^ tmp;
 
@@ -435,7 +443,7 @@ static struct css_set *find_existing_css_set(
 
 	/* Build the set of subsystem state objects that we want to
 	 * see in the new css_set */
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+	for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++) {
 		if (root->subsys_bits & (1UL << i)) {
 			/* Subsystem is in this hierarchy. So we want
 			 * the subsystem state from the new
@@ -529,7 +537,7 @@ static struct css_set *find_css_set(
 	struct css_set *oldcg, struct cgroup *cgrp)
 {
 	struct css_set *res;
-	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+	struct cgroup_subsys_state *template[CGROUP_SINGLETON_SUBSYS_COUNT];
 
 	struct list_head tmp_cg_links;
 
@@ -853,6 +861,20 @@ static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
 		wake_up_all(&cgroup_rmdir_waitq);
 }
 
+static void init_cgroup_css(struct cgroup_subsys_state *css,
+			       struct cgroup_subsys *ss,
+			       struct cgroup *cgrp)
+{
+	css->cgroup = cgrp;
+	atomic_set(&css->refcnt, 1);
+	css->flags = 0;
+	css->id = NULL;
+	if (cgrp == &cgrp->root->top_cgroup)
+		set_bit(CSS_ROOT, &css->flags);
+	BUG_ON(cgrp->subsys[ss->subsys_id]);
+	cgrp->subsys[ss->subsys_id] = css;
+}
+
 static int rebind_subsystems(struct cgroupfs_root *root,
 			     unsigned long final_bits)
 {
@@ -863,7 +885,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	removed_bits = root->subsys_bits & ~final_bits;
 	added_bits = final_bits & ~root->subsys_bits;
 	/* Check that any added subsystems are currently free */
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+	for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++) {
 		unsigned long bit = 1UL << i;
 		if (!(bit & added_bits))
 			continue;
@@ -883,33 +905,57 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	/* Process each subsystem */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+		bool singleton = i < CGROUP_SINGLETON_SUBSYS_COUNT;
 		unsigned long bit = 1UL << i;
 		if (bit & added_bits) {
+			struct cgroup_subsys_state *css;
 			/* We're binding this subsystem to this hierarchy */
 			BUG_ON(cgrp->subsys[i]);
-			BUG_ON(!dummytop->subsys[i]);
-			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
-			BUG_ON(!(rootnode.subsys_bits & bit));
+			if (singleton) {
+				css = dummytop->subsys[i];
+				BUG_ON(!css);
+
+				BUG_ON(css->cgroup != dummytop);
+				BUG_ON(!(rootnode.subsys_bits & bit));
+			} else {
+				BUG_ON(dummytop->subsys[i]);
+				BUG_ON(rootnode.subsys_bits & bit);
+				css = ss->create(ss, cgrp);
+				if (IS_ERR(css))
+					return PTR_ERR(css);
+				init_cgroup_css(css, ss, cgrp);
+			}
 			mutex_lock(&ss->hierarchy_mutex);
-			cgrp->subsys[i] = dummytop->subsys[i];
-			cgrp->subsys[i]->cgroup = cgrp;
+			cgrp->subsys[i] = css;
+			css->cgroup = cgrp;
 			if (ss->bind)
 				ss->bind(ss, cgrp);
-			rootnode.subsys_bits &= ~bit;
+			if (singleton)
+				rootnode.subsys_bits &= ~bit;
 			root->subsys_bits |= bit;
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & removed_bits) {
+			struct cgroup_subsys_state *css;
 			/* We're removing this subsystem */
-			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
-			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
-			BUG_ON(rootnode.subsys_bits & bit);
+			css = cgrp->subsys[i];
+			BUG_ON(css->cgroup != cgrp);
+			if (singleton) {
+				BUG_ON(css != dummytop->subsys[i]);
+				BUG_ON(rootnode.subsys_bits & bit);
+			}
 			mutex_lock(&ss->hierarchy_mutex);
 			if (ss->bind)
 				ss->bind(ss, dummytop);
-			dummytop->subsys[i]->cgroup = dummytop;
+			if (singleton) {
+				css->cgroup = dummytop;
+			} else {
+				/* Is this safe? */
+				ss->destroy(ss, cgrp);
+			}
 			cgrp->subsys[i] = NULL;
 			root->subsys_bits &= ~bit;
-			rootnode.subsys_bits |= bit;
+			if (singleton)
+				rootnode.subsys_bits |= bit;
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & final_bits) {
 			/* Subsystem state should already exist */
@@ -974,7 +1020,7 @@ static int parse_cgroupfs_options(char *data,
 			/* Add all non-disabled subsystems */
 			int i;
 			opts->subsys_bits = 0;
-			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++) {
 				struct cgroup_subsys *ss = subsys[i];
 				if (!ss->disabled)
 					opts->subsys_bits |= 1ul << i;
@@ -1040,6 +1086,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	struct cgroupfs_root *root = sb->s_fs_info;
 	struct cgroup *cgrp = &root->top_cgroup;
 	struct cgroup_sb_opts opts;
+	unsigned long original_bits;
 
 	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
 	mutex_lock(&cgroup_mutex);
@@ -1061,9 +1108,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 		goto out_unlock;
 	}
 
+	original_bits = root->subsys_bits;
 	ret = rebind_subsystems(root, opts.subsys_bits);
-	if (ret)
+	if (ret) {
+		int tmp = rebind_subsystems(root, original_bits);
+		BUG_ON(tmp);
 		goto out_unlock;
+	}
 
 	/* (re)populate subsystem files */
 	cgroup_populate_dir(cgrp);
@@ -1265,16 +1316,15 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 		}
 
 		ret = rebind_subsystems(root, opts.subsys_bits);
-		if (ret == -EBUSY) {
+		if (ret) {
+			int tmp = rebind_subsystems(root, 0);
+			BUG_ON(tmp);
 			mutex_unlock(&cgroup_mutex);
 			mutex_unlock(&inode->i_mutex);
 			free_cg_links(&tmp_cg_links);
 			goto drop_new_super;
 		}
 
-		/* EBUSY should be the only error here */
-		BUG_ON(ret);
-
 		list_add(&root->root_list, &roots);
 		root_count++;
 
@@ -2591,20 +2641,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
 	return 0;
 }
 
-static void init_cgroup_css(struct cgroup_subsys_state *css,
-			       struct cgroup_subsys *ss,
-			       struct cgroup *cgrp)
-{
-	css->cgroup = cgrp;
-	atomic_set(&css->refcnt, 1);
-	css->flags = 0;
-	css->id = NULL;
-	if (cgrp == dummytop)
-		set_bit(CSS_ROOT, &css->flags);
-	BUG_ON(cgrp->subsys[ss->subsys_id]);
-	cgrp->subsys[ss->subsys_id] = css;
-}
-
 static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
 {
 	/* We need to take each hierarchy_mutex in a consistent order */
@@ -2890,21 +2926,23 @@ again:
 static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 {
 	struct cgroup_subsys_state *css;
-
+	bool singleton = ss->subsys_id < CGROUP_SINGLETON_SUBSYS_COUNT;
 	printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
-	/* Create the top cgroup state for this subsystem */
-	css = ss->create(ss, dummytop);
-	/* We don't handle early failures gracefully */
-	BUG_ON(IS_ERR(css));
-	init_cgroup_css(css, ss, dummytop);
-
-	/* Update the init_css_set to contain a subsys
-	 * pointer to this state - since the subsystem is
-	 * newly registered, all tasks and hence the
-	 * init_css_set is in the subsystem's top cgroup. */
-	init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
+	if (singleton) {
+		/* Create the top cgroup state for this subsystem */
+		css = ss->create(ss, dummytop);
+		/* We don't handle early failures gracefully */
+		BUG_ON(IS_ERR(css));
+		init_cgroup_css(css, ss, dummytop);
 
+		/* Update the init_css_set to contain a subsys
+		 * pointer to this state - since the subsystem is
+		 * newly registered, all tasks and hence the
+		 * init_css_set is in the subsystem's top cgroup. */
+		init_css_set.subsys[ss->subsys_id] = css;
+		rootnode.subsys_bits |= 1ULL << ss->subsys_id;
+	}
 	need_forkexit_callback |= ss->fork || ss->exit;
 
 	/* At system boot, before all subsystems have been
@@ -2916,7 +2954,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
 	ss->active = 1;
 
-	rootnode.subsys_bits |= 1ULL << ss->subsys_id;
 }
 
 /**
@@ -3285,6 +3322,8 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
 
 	/* We shouldn't be called by an unregistered subsystem */
 	BUG_ON(!subsys->active);
+	/* We can only support singleton subsystems */
+	BUG_ON(subsys->subsys_id >= CGROUP_SINGLETON_SUBSYS_COUNT);
 
 	/* First figure out what hierarchy and cgroup we're dealing
 	 * with, and pin them so we can drop cgroup_mutex */

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 7/7] [RFC] Example multi-bindable subsystem: a per-cgroup notes field
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
                     ` (5 preceding siblings ...)
  2009-03-12 10:51   ` [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems Paul Menage
@ 2009-03-12 10:51   ` Paul Menage
       [not found]     ` <20090312105153.24154.29389.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
  2009-03-16  1:10   ` [PATCH 0/7][RFC] CGroup hierarchy extensions KAMEZAWA Hiroyuki
  7 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-12 10:51 UTC (permalink / raw)
  To: lizf-BthXqXjhjHXQFUHtdCDX3A,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A

[RFC] Example multi-bindable subsystem: a per-cgroup notes field

As an example of a multiply-bindable subsystem, this patch introduces
the "info" subsystem, which provides a single file, "info.notes", in
which user-space middleware can store an arbitrary (up to one page)
string representing configuration data about that cgroup. This reduces
the need to keep additional state outside the cgroup filesystem.

TODO: a single page is somewhat limiting - the file limit should be
userspace-configurable, and the system should store large notes in a
vector of pages (to avoid trying to kmalloc() an excessively large
chunk of memory. (Or maybe just use vmalloc())

Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

---

 include/linux/cgroup_subsys.h |    6 +++
 init/Kconfig                  |    8 ++++
 kernel/Makefile               |    1 
 kernel/info_cgroup.c          |   93 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 108 insertions(+), 0 deletions(-)
 create mode 100644 kernel/info_cgroup.c

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index f78605e..5dfea38 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -60,3 +60,9 @@ SUBSYS(net_cls)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_INFO
+MULTI_SUBSYS(info)
+#endif
+
+/* */
diff --git a/init/Kconfig b/init/Kconfig
index fd5e3be..018601b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -604,6 +604,14 @@ config CGROUP_MEM_RES_CTLR_SWAP
 	  there will be no overhead from this. Even when you set this config=y,
 	  if boot option "noswapaccount" is set, swap will not be accounted.
 
+config CGROUP_INFO
+	bool "Simple application-specific info cgroup subsystem
+	help
+	  Provides a simple cgroups subsystem with an "info.notes"
+	  field, which can be used by middleware to store
+	  application-specific configuration data about a cgroup. Can
+	  be mounted on multiple hierarchies at once.
+
 endif # CGROUPS
 
 config MM_OWNER
diff --git a/kernel/Makefile b/kernel/Makefile
index bdc528f..782752f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
+obj-$(CONFIG_CGROUP_INFO) += info_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
diff --git a/kernel/info_cgroup.c b/kernel/info_cgroup.c
new file mode 100644
index 0000000..b8fdb37
--- /dev/null
+++ b/kernel/info_cgroup.c
@@ -0,0 +1,93 @@
+/*
+ * info_cgroup.c - simple cgroup providing a "notes" field
+ */
+
+#include "linux/cgroup.h"
+#include "linux/err.h"
+#include "linux/seq_file.h"
+
+#define MAX_NOTES_LEN PAGE_SIZE
+
+struct info_cgroup {
+	struct cgroup_subsys_state css;
+	const char *notes;
+	spinlock_t lock;
+};
+
+static inline struct info_cgroup *cg_info(struct cgroup *cg)
+{
+	return container_of(cgroup_subsys_state(cg, info_subsys_id),
+			    struct info_cgroup, css);
+}
+
+static struct cgroup_subsys_state *info_create(struct cgroup_subsys *ss,
+					       struct cgroup *cg)
+{
+	struct info_cgroup *info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+	spin_lock_init(&info->lock);
+	return &info->css;
+}
+
+static void info_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct info_cgroup *css = cg_info(cont);
+	kfree(css->notes);
+	kfree(css);
+}
+
+static int info_read(struct cgroup *cont,
+		     struct cftype *cft,
+		     struct seq_file *seq)
+{
+	struct info_cgroup *css = cg_info(cont);
+	spin_lock(&css->lock);
+	if (css->notes)
+		seq_puts(seq, css->notes);
+	spin_unlock(&css->lock);
+	return 0;
+}
+
+static int info_write(struct cgroup *cont,
+		      struct cftype *cft,
+		      const char *str) {
+	struct info_cgroup *css = cg_info(cont);
+	if (*str) {
+		if (strlen(str) > MAX_NOTES_LEN)
+			return -ENOSPC;
+		str = kstrdup(str, GFP_KERNEL);
+		if (!str)
+			return -ENOMEM;
+	} else {
+		str = NULL;
+	}
+
+	spin_lock(&css->lock);
+	kfree(css->notes);
+	css->notes = str;
+	spin_unlock(&css->lock);
+	return 0;
+}
+
+static struct cftype info_files[] =  {
+	{
+		.name = "notes",
+		.read_seq_string = info_read,
+		.write_string = info_write,
+	},
+};
+
+static int info_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, info_files,
+				ARRAY_SIZE(info_files));
+}
+
+struct cgroup_subsys info_subsys = {
+	.name = "info",
+	.create = info_create,
+	.destroy = info_destroy,
+	.populate = info_populate,
+	.subsys_id = info_subsys_id,
+};

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 0/7][RFC] CGroup hierarchy extensions
       [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
                     ` (6 preceding siblings ...)
  2009-03-12 10:51   ` [PATCH 7/7] [RFC] Example multi-bindable subsystem: a per-cgroup notes field Paul Menage
@ 2009-03-16  1:10   ` KAMEZAWA Hiroyuki
  7 siblings, 0 replies; 28+ messages in thread
From: KAMEZAWA Hiroyuki @ 2009-03-16  1:10 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Thu, 12 Mar 2009 03:51:16 -0700
Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:

> The following series implements support for:
> 
> - named cgroup hierarchies
> - cgroup hierarchies with no bound subsystems
> - cgroup subsystems that can be bound to multiple hierarchies
> 
> These patches aren't yet very well tested (only brief testing running
> under UML so far) or well documented, but I wanted to get them sent
> out for comment before I went on vacation.
> 
Wow, exciting patches :)

Thank you !

-Kame

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/7] [RFC] Support named cgroups hierarchies
       [not found]     ` <20090312105122.24154.73633.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2009-03-17  6:44       ` Li Zefan
       [not found]         ` <49BF46BC.4080302-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2009-03-17  6:44 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Paul Menage wrote:
> [RFC] Support named cgroups hierarchies
> 
> To simplify referring to cgroup hierarchies in mount statements, and
> to allow disambiguation in the presence of empty hierarchies and
> multiply-bindable subsystems (see later patches in series) this patch
> adds support for naming a new cgroup hierarchy via the "name=" mount
> option
> 
> A pre-existing hierarchy may be specified by either name or by
> subsystems; a hierarchy's name cannot be changed by a remount
> operation.
> 
> Example usage:
> 
> # To create a hierarchy called "foo" containing the "cpu" subsystem
> mount -t cgroup -oname=foo,cpu cgroup /mnt/cgroup1
> 
> # To mount the "foo" hierarchy on a second location
> mount -t cgroup -oname=foo cgroup /mnt/cgroup2
> 
> Open issues:
> 
> - should the specification be via a name= option as in this patch, or
>   should we simply use the "device name" as passed to the mount()
>   system call?  Using the device name is more conceptually clean and
>   consistent with the filesystem API; however, given that the device
>   name is currently ignored by cgroups, this would lead to a
>   user-visible behaviour change.
> 

If we use "device name" and don't allow it to be changed on remount,
this seems a change that may break/suprise existing users?

And another issue is, using "device name" won't allow us to use NULL
name, which is also user-visible in /proc/pid/cgroups/.

Some comments below.

> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> 
> ---
> 
>  kernel/cgroup.c |  126 ++++++++++++++++++++++++++++++++++++-------------------
>  1 files changed, 82 insertions(+), 44 deletions(-)
> 
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 5995477..aa5edc8 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -92,6 +92,9 @@ struct cgroupfs_root {
>  
>  	/* The path to use for release notifications. */
>  	char release_agent_path[PATH_MAX];
> +
> +	/* The name for this hierarchy - may be empty */
> +	char name[PATH_MAX];

I think 32 or 64 is sufficient. How about reuse MAX_CGROUP_TYPE_NAMELEN
which is the length limit of cgroup_subsys.name?

>  };
>  
>  /*
> @@ -826,6 +829,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
>  		seq_puts(seq, ",noprefix");
>  	if (strlen(root->release_agent_path))
>  		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
> +	if (strlen(root->name))
> +		seq_printf(seq, ",name=%s", root->name);
>  	mutex_unlock(&cgroup_mutex);
>  	return 0;
>  }
> @@ -834,18 +839,22 @@ struct cgroup_sb_opts {
>  	unsigned long subsys_bits;
>  	unsigned long flags;
>  	char *release_agent;
> +	char *name;
> +	/* A flag indicating that a root was created from this options block */
> +	bool created_root;
>  };
>  
>  /* Convert a hierarchy specifier into a bitmask of subsystems and
>   * flags. */
>  static int parse_cgroupfs_options(char *data,
> -				     struct cgroup_sb_opts *opts)
> +				  struct cgroup_sb_opts *opts)
>  {
>  	char *token, *o = data ?: "all";
>  
>  	opts->subsys_bits = 0;
>  	opts->flags = 0;
>  	opts->release_agent = NULL;
> +	opts->name = NULL;
>  

memset() can save us some bytes.

>  	while ((token = strsep(&o, ",")) != NULL) {
>  		if (!*token)
> @@ -870,6 +879,15 @@ static int parse_cgroupfs_options(char *data,
>  				return -ENOMEM;
>  			strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
>  			opts->release_agent[PATH_MAX - 1] = 0;
> +		} else if (!strncmp(token, "name=", 5)) {
> +			/* Specifying two names is forbidden */
> +			if (opts->name)
> +				return -EINVAL;
> +			opts->name = kzalloc(PATH_MAX, GFP_KERNEL);
> +			if (!opts->name)
> +				return -ENOMEM;
> +			strncpy(opts->name, token + 5, PATH_MAX - 1);
> +			opts->name[PATH_MAX - 1] = 0;

kstrndup()

>  		} else {
>  			struct cgroup_subsys *ss;
>  			int i;
> @@ -887,7 +905,7 @@ static int parse_cgroupfs_options(char *data,
>  	}
>  
>  	/* We can't have an empty hierarchy */
> -	if (!opts->subsys_bits)
> +	if (!opts->subsys_bits && !opts->name)
>  		return -EINVAL;
>  
>  	return 0;
> @@ -914,6 +932,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
>  		goto out_unlock;
>  	}
>  
> +	/* Don't allow name to change at remount */
> +	if (opts.name && strcmp(opts.name, root->name)) {
> +		ret = -EINVAL;
> +		goto out_unlock;
> +	}
> +
>  	ret = rebind_subsystems(root, opts.subsys_bits);
>  	if (ret)
>  		goto out_unlock;
> @@ -925,6 +949,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
>  		strcpy(root->release_agent_path, opts.release_agent);
>   out_unlock:
>  	kfree(opts.release_agent);
> +	kfree(opts.name);
>  	mutex_unlock(&cgroup_mutex);
>  	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
>  	return ret;
> @@ -958,28 +983,56 @@ static void init_cgroup_root(struct cgroupfs_root *root)
>  
>  static int cgroup_test_super(struct super_block *sb, void *data)
>  {
> -	struct cgroupfs_root *new = data;
> +	struct cgroup_sb_opts *new = data;
>  	struct cgroupfs_root *root = sb->s_fs_info;
>  
> -	/* First check subsystems */
> -	if (new->subsys_bits != root->subsys_bits)
> -	    return 0;
> +	/* If we asked for a name then it must match */
> +	if (new->name && strcmp(new->name, root->name))
> +		return 0;
>  
> -	/* Next check flags */
> -	if (new->flags != root->flags)

Is this change intended or unintended? With this change we allow:
 # mount -t cgroup -o cpu xxx /mnt1
 # mount -t cgroup -o cpu,noprefix xxx /mnt2
But files in /mnt2 still prefix with 'cpu.'

> +	/* If we asked for subsystems then they must match */
> +	if (new->subsys_bits && new->subsys_bits != root->subsys_bits)
>  		return 0;

This has already been checked.

>  
>  	return 1;
>  }
>  
> +static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
> +{
> +	struct cgroupfs_root *root;
> +
> +	if (!opts->subsys_bits)
> +		return ERR_PTR(-EINVAL);
> +
> +	root = kzalloc(sizeof(*root), GFP_KERNEL);
> +	if (!root)
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_cgroup_root(root);
> +	root->subsys_bits = opts->subsys_bits;
> +	root->flags = opts->flags;
> +	if (opts->release_agent)
> +		strcpy(root->release_agent_path, opts->release_agent);
> +	if (opts->name)
> +		strcpy(root->name, opts->name);
> +	opts->created_root = true;
> +	return root;
> +}
> +
>  static int cgroup_set_super(struct super_block *sb, void *data)
>  {
>  	int ret;
> -	struct cgroupfs_root *root = data;
> +	struct cgroup_sb_opts *opts = data;
> +	struct cgroupfs_root *root;
>  
> +	root = cgroup_root_from_opts(opts);
> +	if (IS_ERR(root))
> +		return PTR_ERR(root);
>  	ret = set_anon_super(sb, NULL);
> -	if (ret)
> +	if (ret) {
> +		kfree(root);
>  		return ret;
> +	}
>  
>  	sb->s_fs_info = root;
>  	root->sb = sb;
> @@ -1018,47 +1071,26 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
>  			 int flags, const char *unused_dev_name,
>  			 void *data, struct vfsmount *mnt)
>  {
> -	struct cgroup_sb_opts opts;
> +	struct cgroup_sb_opts opts = { 0 };

with the memset() in parse_cgroupfs_options(), this init is unneeded.

>  	int ret = 0;
>  	struct super_block *sb;
> -	struct cgroupfs_root *root;
> -	struct list_head tmp_cg_links;
>  
>  	/* First find the desired set of subsystems */
>  	ret = parse_cgroupfs_options(data, &opts);
> -	if (ret) {
> -		kfree(opts.release_agent);
> -		return ret;
> -	}
> -
> -	root = kzalloc(sizeof(*root), GFP_KERNEL);
> -	if (!root) {
> -		kfree(opts.release_agent);
> -		return -ENOMEM;
> -	}
> -
> -	init_cgroup_root(root);
> -	root->subsys_bits = opts.subsys_bits;
> -	root->flags = opts.flags;
> -	if (opts.release_agent) {
> -		strcpy(root->release_agent_path, opts.release_agent);
> -		kfree(opts.release_agent);
> -	}

leaking opts.release_agent and opts.name with every successful mount.

> +	if (ret)
> +		goto out_err;
>  
> -	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
> +	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
>  
>  	if (IS_ERR(sb)) {
> -		kfree(root);
> -		return PTR_ERR(sb);
> +		ret = PTR_ERR(sb);
> +		goto out_err;
>  	}
>  
> -	if (sb->s_fs_info != root) {
> -		/* Reusing an existing superblock */
> -		BUG_ON(sb->s_root == NULL);
> -		kfree(root);
> -		root = NULL;
> -	} else {
> +	if (opts.created_root) {
>  		/* New superblock */
> +		struct cgroupfs_root *root = sb->s_fs_info;
> +		struct list_head tmp_cg_links;
>  		struct cgroup *root_cgrp = &root->top_cgroup;
>  		struct inode *inode;
>  		int i;
> @@ -1091,7 +1123,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
>  		if (ret == -EBUSY) {
>  			mutex_unlock(&cgroup_mutex);
>  			mutex_unlock(&inode->i_mutex);
> -			goto free_cg_links;
> +			free_cg_links(&tmp_cg_links);
> +			goto drop_new_super;
>  		}
>  
>  		/* EBUSY should be the only error here */
> @@ -1130,11 +1163,14 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
>  	simple_set_mnt(mnt, sb);
>  	return 0;
>  
> - free_cg_links:
> -	free_cg_links(&tmp_cg_links);
>   drop_new_super:
>  	up_write(&sb->s_umount);
>  	deactivate_super(sb);
> +
> + out_err:
> +	kfree(opts.release_agent);
> +	kfree(opts.name);
> +
>  	return ret;
>  }
>  
> @@ -2906,6 +2942,9 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
>  		seq_printf(m, "%lu:", root->subsys_bits);
>  		for_each_subsys(root, ss)
>  			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
> +		if (strlen(root->name))
> +			seq_printf(m, "%sname=%s",
> +				   count++ ? "," : "", root->name);

s/count++/count

>  		seq_putc(m, ':');
>  		get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
>  		cgrp = task_cgroup(tsk, subsys_id);
> @@ -3606,4 +3645,3 @@ css_get_next(struct cgroup_subsys *ss, int id,
>  	}
>  	return ret;
>  }
> -
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/7] [RFC]Move the cgroup debug subsys into cgroup.c to access internal state
       [not found]     ` <20090312105127.24154.39200.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2009-03-17  6:44       ` Li Zefan
  0 siblings, 0 replies; 28+ messages in thread
From: Li Zefan @ 2009-03-17  6:44 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

> +static u64 current_css_set_refcount_read(struct cgroup *cont,

we can change those 'cont' to 'cgrp' along with this patch

> +					   struct cftype *cft)
> +{
> +	u64 count;
> +
> +	rcu_read_lock();
> +	count = atomic_read(&current->cgroups->refcount);
> +	rcu_read_unlock();
> +	return count;
> +}
> +
> +static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
> +{
> +	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
> +}
> +
> +static struct cftype debug_files[] =  {
> +	{
> +		.name = "cgroup_refcount",
> +		.read_u64 = cgroup_refcount_read,
> +	},

a trivial inconsistency that no blank line here

> +	{
> +		.name = "taskcount",
> +		.read_u64 = taskcount_read,
> +	},
> +
> +	{
> +		.name = "current_css_set",
> +		.read_u64 = current_css_set_read,
> +	},
> +
> +	{
> +		.name = "current_css_set_refcount",
> +		.read_u64 = current_css_set_refcount_read,
> +	},
> +
> +	{
> +		.name = "releasable",
> +		.read_u64 = releasable_read,
> +	},
> +};

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/7] [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup
       [not found]     ` <20090312105132.24154.99250.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2009-03-17  6:44       ` Li Zefan
  0 siblings, 0 replies; 28+ messages in thread
From: Li Zefan @ 2009-03-17  6:44 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Paul Menage wrote:
> [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup
> 
> Currently the cgroups code makes the assumption that the subsystem
> pointers in a struct css_set uniquely identify the hierarchy->cgroup
> mappings associated with the css_set; and there's no way to directly
> identify the associated set of cgroups other than by indirecting
> through the appropriate subsystem state pointers.
> 
> This patch removes the need for that assumption by adding a
> back-pointer from struct cg_cgroup_link object to its associated
> cgroup; this allows the set of cgroups to be determined by traversing
> the cg_links list in the struct css_set.
> 
> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> 
> ---
> 
>  kernel/cgroup.c |  218 +++++++++++++++++++++++++++++++++++++++++++------------
>  1 files changed, 169 insertions(+), 49 deletions(-)
> 
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index ecf00ad..9cdbbac 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -202,6 +202,7 @@ struct cg_cgroup_link {
>  	 * cgroup, anchored on cgroup->css_sets
>  	 */
>  	struct list_head cgrp_link_list;
> +	struct cgroup *cgrp;
>  	/*
>  	 * List running through cg_cgroup_links pointing at a
>  	 * single css_set object, anchored on css_set->cg_links
> @@ -228,8 +229,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
>  static DEFINE_RWLOCK(css_set_lock);
>  static int css_set_count;
>  
> -/* hash table for cgroup groups. This improves the performance to
> - * find an existing css_set */
> +/*
> + * hash table for cgroup groups. This improves the performance to find
> + * an existing css_set. This hash doesn't (currently) take into
> + * account cgroups in empty hierarchies.
> + */
>  #define CSS_SET_HASH_BITS	7
>  #define CSS_SET_TABLE_SIZE	(1 << CSS_SET_HASH_BITS)
>  static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
> @@ -339,6 +343,77 @@ static inline void put_css_set_taskexit(struct css_set *cg)
>  }
>  
>  /*
> + * compare_css_sets - helper function for find_existing_css_set().
> + * @cg: candidate css_set being tested
> + * @old_cg: existing css_set for a task
> + * @new_cgrp: cgroup that's being entered by the task
> + * @template: desired set of css pointers in css_set (pre-calculated)
> + *
> + * Returns true if "cg" matches "old_cg" except for the hierarchy
> + * which "new_cgrp" belongs to, for which it should match "new_cgrp".
> + */
> +static bool compare_css_sets(struct css_set *cg,
> +			     struct css_set *old_cg,
> +			     struct cgroup *new_cgrp,
> +			     struct cgroup_subsys_state *template[])
> +{
> +	struct list_head *l1, *l2;

a blank line is needed. this comment can apply to some other places
in this patchset.

> +	if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
> +		/* Not all subsystems matched */
> +		return false;
> +	}
> +
> +	/*
> +	 * Compare cgroup pointers in order to distinguish between
> +	 * different cgroups in heirarchies with no subsystems. We
> +	 * could get by with just this check alone (and skip the
> +	 * memcmp above) but on most setups the memcmp check will
> +	 * avoid the need for this more expensive check on almost all
> +	 * candidates.
> +	 */
> +
> +	l1 = &cg->cg_links;
> +	l2 = &old_cg->cg_links;
> +	while (1) {
> +		struct cg_cgroup_link *cgl1, *cgl2;
> +		struct cgroup *cg1, *cg2;
> +
> +		l1 = l1->next;
> +		l2 = l2->next;
> +		/* See if we reached the end - both lists are equal length. */
> +		if (l1 == &cg->cg_links) {
> +			BUG_ON(l2 != &old_cg->cg_links);
> +			break;
> +		} else {
> +			BUG_ON(l2 == &old_cg->cg_links);
> +		}
> +		/* Locate the cgroups associated with these links. */
> +		cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
> +		cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
> +		cg1 = cgl1->cgrp;
> +		cg2 = cgl2->cgrp;
> +		/* Hierarchies should be linked in the same order. */
> +		BUG_ON(cg1->root != cg2->root);
> +
> +		/*
> +		 * If this hierarchy is the hierarchy of the cgroup
> +		 * that's changing, then we need to check that this
> +		 * css_set points to the new cgroup; if it's any other
> +		 * hierarchy, then this css_set should point to the
> +		 * same cgroup as the old css_set.
> +		 */
> +		if (cg1->root == new_cgrp->root) {
> +			if (cg1 != new_cgrp)
> +				return false;
> +		} else {
> +			if (cg1 != cg2)
> +				return false;
> +		}
> +	}
> +	return true;
> +}
> +
> +/*
>   * find_existing_css_set() is a helper for
>   * find_css_set(), and checks to see whether an existing
>   * css_set is suitable.
> @@ -379,10 +454,11 @@ static struct css_set *find_existing_css_set(
>  
>  	hhead = css_set_hash(template);
>  	hlist_for_each_entry(cg, node, hhead, hlist) {
> -		if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
> -			/* All subsystems matched */
> -			return cg;
> -		}
> +		if (!compare_css_sets(cg, oldcg, cgrp, template))
> +			continue;
> +
> +		/* This css_set matches what we need */
> +		return cg;
>  	}
>  
>  	/* No existing cgroup group matched */
> @@ -436,8 +512,13 @@ static void link_css_set(struct list_head *tmp_cg_links,
>  	link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
>  				cgrp_link_list);
>  	link->cg = cg;
> +	link->cgrp = cgrp;
>  	list_move(&link->cgrp_link_list, &cgrp->css_sets);
> -	list_add(&link->cg_link_list, &cg->cg_links);
> +	/*
> +	 * Always add links to the tail of the list so that the list
> +	 * is sorted by order of hierarchy creation
> +	 */
> +	list_add_tail(&link->cg_link_list, &cg->cg_links);
>  }
>  
>  /*
> @@ -457,6 +538,7 @@ static struct css_set *find_css_set(
>  	struct list_head tmp_cg_links;
>  
>  	struct hlist_head *hhead;
> +	struct cg_cgroup_link *link, *saved_link;
>  
>  	/* First see if we already have a cgroup group that matches
>  	 * the desired set */
> @@ -492,18 +574,15 @@ static struct css_set *find_css_set(
>  	/* Add reference counts and links from the new css_set. */
>  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
>  		struct cgroup *cgrp = res->subsys[i]->cgroup;
> -		struct cgroup_subsys *ss = subsys[i];
>  		atomic_inc(&cgrp->count);
> -		/*
> -		 * We want to add a link once per cgroup, so we
> -		 * only do it for the first subsystem in each
> -		 * hierarchy
> -		 */
> -		if (ss->root->subsys_list.next == &ss->sibling)
> -			link_css_set(&tmp_cg_links, res, cgrp);
>  	}
> -	if (list_empty(&rootnode.subsys_list))
> -		link_css_set(&tmp_cg_links, res, dummytop);
> +	list_for_each_entry_safe(link, saved_link, &oldcg->cg_links,
> +				 cg_link_list) {

list_for_each_entry() is ok. this comment can also apply to some
other places in this patchset.

> +		struct cgroup *c = link->cgrp;
> +		if (c->root == cgrp->root)
> +			c = cgrp;
> +		link_css_set(&tmp_cg_links, res, c);
> +	}
>  
>  	BUG_ON(!list_empty(&tmp_cg_links));
>  
> @@ -519,6 +598,42 @@ static struct css_set *find_css_set(
>  }
>  
>  /*
> + * Return the cgroup for "task" from the given hierarchy. Must be
> + * called with cgroup_mutex held.
> + */
> +struct cgroup *task_cgroup_from_root(struct task_struct *task,
> +				     struct cgroupfs_root *root)

static

> +{
> +	struct css_set *css;
> +	struct cgroup *res = NULL;
> +
> +	BUG_ON(!mutex_is_locked(&cgroup_mutex));
> +	read_lock(&css_set_lock);
> +	/*
> +	 * No need to lock the task - since we hold cgroup_mutex the
> +	 * task can't change groups, so the only thing that can happen
> +	 * is that it exits and its css is set back to init_css_set.
> +	 */
> +	css = task->cgroups;
> +	if (css == &init_css_set) {
> +		res = &root->top_cgroup;
> +	} else {
> +		struct cg_cgroup_link *link, *saved_link;
> +		list_for_each_entry_safe(link, saved_link, &css->cg_links,
> +					 cg_link_list) {
> +			struct cgroup *c = link->cgrp;
> +			if (c->root == root) {
> +				res = c;
> +				break;
> +			}
> +		}
> +	}
> +	read_unlock(&css_set_lock);
> +	BUG_ON(!res);
> +	return res;
> +}
> +
> +/*
>   * There is one global cgroup mutex. We also require taking
>   * task_lock() when dereferencing a task's cgroup subsys pointers.
>   * See "The task_lock() exception", at the end of this comment.
> @@ -1281,27 +1396,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
>  	return 0;
>  }
>  
> -/*
> - * Return the first subsystem attached to a cgroup's hierarchy, and
> - * its subsystem id.
> - */
> -
> -static void get_first_subsys(const struct cgroup *cgrp,
> -			struct cgroup_subsys_state **css, int *subsys_id)
> -{
> -	const struct cgroupfs_root *root = cgrp->root;
> -	const struct cgroup_subsys *test_ss;
> -	BUG_ON(list_empty(&root->subsys_list));
> -	test_ss = list_entry(root->subsys_list.next,
> -			     struct cgroup_subsys, sibling);
> -	if (css) {
> -		*css = cgrp->subsys[test_ss->subsys_id];
> -		BUG_ON(!*css);
> -	}
> -	if (subsys_id)
> -		*subsys_id = test_ss->subsys_id;
> -}
> -
>  /**
>   * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
>   * @cgrp: the cgroup the task is attaching to
> @@ -1318,12 +1412,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
>  	struct css_set *cg;
>  	struct css_set *newcg;
>  	struct cgroupfs_root *root = cgrp->root;
> -	int subsys_id;
> -
> -	get_first_subsys(cgrp, NULL, &subsys_id);
>  
>  	/* Nothing to do if the task is already in that cgroup */
> -	oldcgrp = task_cgroup(tsk, subsys_id);
> +	oldcgrp = task_cgroup_from_root(tsk, root);
>  	if (cgrp == oldcgrp)
>  		return 0;
>  
> @@ -1881,7 +1972,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
>   * the start of a css_set
>   */
>  static void cgroup_advance_iter(struct cgroup *cgrp,
> -					  struct cgroup_iter *it)
> +				struct cgroup_iter *it)
>  {
>  	struct list_head *l = it->cg_link;
>  	struct cg_cgroup_link *link;
> @@ -2829,6 +2920,7 @@ int __init cgroup_init_early(void)
>  	init_task.cgroups = &init_css_set;
>  
>  	init_css_set_link.cg = &init_css_set;
> +	init_css_set_link.cgrp = dummytop;
>  	list_add(&init_css_set_link.cgrp_link_list,
>  		 &rootnode.top_cgroup.css_sets);
>  	list_add(&init_css_set_link.cg_link_list,
> @@ -2936,7 +3028,6 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
>  	for_each_active_root(root) {
>  		struct cgroup_subsys *ss;
>  		struct cgroup *cgrp;
> -		int subsys_id;
>  		int count = 0;
>  
>  		seq_printf(m, "%lu:", root->subsys_bits);
> @@ -2946,8 +3037,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
>  			seq_printf(m, "%sname=%s",
>  				   count++ ? "," : "", root->name);
>  		seq_putc(m, ':');
> -		get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
> -		cgrp = task_cgroup(tsk, subsys_id);
> +		cgrp = task_cgroup_from_root(tsk, root);
>  		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
>  		if (retval < 0)
>  			goto out_unlock;
> @@ -3273,13 +3363,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
>  {
>  	int ret;
>  	struct cgroup *target;
> -	int subsys_id;
>  
>  	if (cgrp == dummytop)
>  		return 1;
>  
> -	get_first_subsys(cgrp, NULL, &subsys_id);
> -	target = task_cgroup(task, subsys_id);
> +	target = task_cgroup_from_root(task, cgrp->root);
>  	while (cgrp != target && cgrp!= cgrp->top_cgroup)
>  		cgrp = cgrp->parent;
>  	ret = (cgrp == target);
> @@ -3694,6 +3782,33 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
>  	return count;
>  }
>  
> +static int current_css_set_cg_links_read(struct cgroup *cont,
> +					 struct cftype *cft,
> +					 struct seq_file *seq)
> +{
> +	struct cg_cgroup_link *link, *saved_link;
> +	struct css_set *cg;
> +	write_lock_irq(&css_set_lock);
> +	task_lock(current);
> +	cg = current->cgroups;
> +	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
> +				 cg_link_list) {
> +		struct cgroup *c = link->cgrp;
> +		const char *name;
> +		rcu_read_lock();
> +		if (c->dentry)
> +			name = c->dentry->d_name.name;
> +		else
> +			name = "?";
> +		seq_printf(seq, "Root %lu group %s\n",
> +			   c->root->subsys_bits, name);
> +		rcu_read_unlock();
> +	}
> +	task_unlock(current);
> +	write_unlock_irq(&css_set_lock);
> +	return 0;
> +}
> +
>  static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
>  {
>  	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
> @@ -3720,6 +3835,11 @@ static struct cftype debug_files[] =  {
>  	},
>  
>  	{
> +		.name = "current_css_set_cg_links",
> +		.read_seq_string = current_css_set_cg_links_read,
> +	},
> +
> +	{
>  		.name = "releasable",
>  		.read_u64 = releasable_read,
>  	},
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no bound subsystems
       [not found]     ` <20090312105137.24154.34890.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2009-03-17  6:45       ` Li Zefan
       [not found]         ` <49BF470D.8080600-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2009-03-17  6:45 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Paul Menage wrote:
> [RFC] Allow cgroup hierarchies to be created with no bound subsystems
> 
> This patch removes the restriction that a cgroup hierarchy must have
> at least one bound subsystem. The mount option "none" is treated as an
> explicit request for no bound subsystems.
> 
> A hierarchy with no subsystems can be useful for plan task tracking,

s/plan/plain/ ;)

> and is also a step towards the support for multiply-bindable
> subsystems in a later patch in this series.
> 
> As part of this change, the hierarchy id is no longer calculated from
> the bitmask of subsystems in the hierarchy (since this is not
> guaranteed to be unique) but is allocated via an ida. Reference counts
> on cgroups from css_set objects are now taken explicitly one per
> hierarchy, rather than one per subsystem.
> 
> Example usage:
> 
> mount -t cgroup -o none,name=foo cgroup /mnt/cgroup
> 
> Based on the "no-op"/"none" subsystem concept proposed by
> kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org
> 
> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> 
> ---
> 
>  kernel/cgroup.c |  158 ++++++++++++++++++++++++++++++++++++-------------------
>  1 files changed, 104 insertions(+), 54 deletions(-)
> 
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 9cdbbac..4a7ef2c 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -72,6 +72,9 @@ struct cgroupfs_root {
>  	 */
>  	unsigned long subsys_bits;
>  
> +	/* Unique id for this hierarchy. */
> +	int hierarchy_id;
> +
>  	/* The bitmask of subsystems currently attached to this hierarchy */
>  	unsigned long actual_subsys_bits;
>  
> @@ -142,6 +145,10 @@ struct css_id {
>  static LIST_HEAD(roots);
>  static int root_count;
>  
> +static DEFINE_IDA(hierarchy_ida);
> +static int next_hierarchy_id;
> +static DEFINE_SPINLOCK(hierarchy_id_lock);
> +
>  /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
>  #define dummytop (&rootnode.top_cgroup)
>  
> @@ -259,42 +266,10 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
>   * compiled into their kernel but not actually in use */
>  static int use_task_css_set_links __read_mostly;
>  
> -/* When we create or destroy a css_set, the operation simply
> - * takes/releases a reference count on all the cgroups referenced
> - * by subsystems in this css_set. This can end up multiple-counting
> - * some cgroups, but that's OK - the ref-count is just a
> - * busy/not-busy indicator; ensuring that we only count each cgroup
> - * once would require taking a global lock to ensure that no
> - * subsystems moved between hierarchies while we were doing so.
> - *
> - * Possible TODO: decide at boot time based on the number of
> - * registered subsystems and the number of CPUs or NUMA nodes whether
> - * it's better for performance to ref-count every subsystem, or to
> - * take a global lock and only add one ref count to each hierarchy.
> - */
> -
> -/*
> - * unlink a css_set from the list and free it
> - */
> -static void unlink_css_set(struct css_set *cg)
> +static void __put_css_set(struct css_set *cg, int taskexit)
>  {
>  	struct cg_cgroup_link *link;
>  	struct cg_cgroup_link *saved_link;
> -
> -	hlist_del(&cg->hlist);
> -	css_set_count--;
> -
> -	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
> -				 cg_link_list) {
> -		list_del(&link->cg_link_list);
> -		list_del(&link->cgrp_link_list);
> -		kfree(link);
> -	}
> -}
> -
> -static void __put_css_set(struct css_set *cg, int taskexit)
> -{
> -	int i;
>  	/*
>  	 * Ensure that the refcount doesn't hit zero while any readers
>  	 * can see it. Similar to atomic_dec_and_lock(), but for an
> @@ -307,20 +282,27 @@ static void __put_css_set(struct css_set *cg, int taskexit)
>  		write_unlock(&css_set_lock);
>  		return;
>  	}
> -	unlink_css_set(cg);
> -	write_unlock(&css_set_lock);
>  
> -	rcu_read_lock();
> -	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
> -		struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
> +	/* This css_set is dead. unlink it and release cgroup refcounts */
> +	hlist_del(&cg->hlist);
> +	css_set_count--;
> +
> +	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
> +				 cg_link_list) {
> +		struct cgroup *cgrp = link->cgrp;
> +		list_del(&link->cg_link_list);
> +		list_del(&link->cgrp_link_list);
>  		if (atomic_dec_and_test(&cgrp->count) &&
>  		    notify_on_release(cgrp)) {
>  			if (taskexit)
>  				set_bit(CGRP_RELEASABLE, &cgrp->flags);
>  			check_for_release(cgrp);
>  		}
> +
> +		kfree(link);
>  	}
> -	rcu_read_unlock();
> +
> +	write_unlock(&css_set_lock);
>  	kfree(cg);
>  }
>  
> @@ -513,6 +495,7 @@ static void link_css_set(struct list_head *tmp_cg_links,
>  				cgrp_link_list);
>  	link->cg = cg;
>  	link->cgrp = cgrp;
> +	atomic_inc(&cgrp->count);
>  	list_move(&link->cgrp_link_list, &cgrp->css_sets);
>  	/*
>  	 * Always add links to the tail of the list so that the list
> @@ -533,7 +516,6 @@ static struct css_set *find_css_set(
>  {
>  	struct css_set *res;
>  	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
> -	int i;
>  
>  	struct list_head tmp_cg_links;
>  
> @@ -572,10 +554,6 @@ static struct css_set *find_css_set(
>  
>  	write_lock(&css_set_lock);
>  	/* Add reference counts and links from the new css_set. */
> -	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
> -		struct cgroup *cgrp = res->subsys[i]->cgroup;
> -		atomic_inc(&cgrp->count);
> -	}
>  	list_for_each_entry_safe(link, saved_link, &oldcg->cg_links,
>  				 cg_link_list) {
>  		struct cgroup *c = link->cgrp;
> @@ -955,6 +933,10 @@ struct cgroup_sb_opts {
>  	unsigned long flags;
>  	char *release_agent;
>  	char *name;
> +
> +	/* User explicitly requested empty subsystem */
> +	bool none;
> +
>  	/* A flag indicating that a root was created from this options block */
>  	bool created_root;
>  };
> @@ -983,6 +965,9 @@ static int parse_cgroupfs_options(char *data,
>  				if (!ss->disabled)
>  					opts->subsys_bits |= 1ul << i;
>  			}
> +		} else if (!strcmp(token, "none")) {
> +			/* Explicitly have no subsystems */
> +			opts->none = true;
>  		} else if (!strcmp(token, "noprefix")) {
>  			set_bit(ROOT_NOPREFIX, &opts->flags);
>  		} else if (!strncmp(token, "release_agent=", 14)) {
> @@ -1019,7 +1004,16 @@ static int parse_cgroupfs_options(char *data,
>  		}
>  	}
>  
> -	/* We can't have an empty hierarchy */
> +	/* Consistency checks */
> +
> +	/* Can't specify "none" and some subsystems */
> +	if (opts->subsys_bits && opts->none)
> +		return -EINVAL;
> +
> +	/*
> +	 * We either have to specify by name or by subsystems. (So all
> +	 * empty hierarchies must have a name).
> +	 */
>  	if (!opts->subsys_bits && !opts->name)
>  		return -EINVAL;
>  
> @@ -1085,6 +1079,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
>  	INIT_LIST_HEAD(&cgrp->release_list);
>  	init_rwsem(&cgrp->pids_mutex);
>  }
> +
>  static void init_cgroup_root(struct cgroupfs_root *root)
>  {
>  	struct cgroup *cgrp = &root->top_cgroup;
> @@ -1096,6 +1091,18 @@ static void init_cgroup_root(struct cgroupfs_root *root)
>  	init_cgroup_housekeeping(cgrp);
>  }
>  
> +static void init_root_id(struct cgroupfs_root *root)
> +{
> +	BUG_ON(!ida_pre_get(&hierarchy_ida, GFP_KERNEL));

we should return -errno, BUG_ON() on this is wrong

> +	spin_lock(&hierarchy_id_lock);
> +	if (ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
> +			      &root->hierarchy_id)) {
> +		BUG_ON(ida_get_new(&hierarchy_ida, &root->hierarchy_id));
> +	}

next_hierarchy_id is redundant, just use ida_get_new() instead of
ida_get_new_above()

and I think we should check EAGAIN:

/*
 * ida_get_new - allocate new ID
 ...
 * If memory is required, it will return -EAGAIN, you should unlock
 * and go back to the idr_pre_get() call. ...
 */

> +	next_hierarchy_id = root->hierarchy_id + 1;
> +	spin_unlock(&hierarchy_id_lock);
> +}
> +
>  static int cgroup_test_super(struct super_block *sb, void *data)
>  {
>  	struct cgroup_sb_opts *new = data;
> @@ -1116,7 +1123,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
>  {
>  	struct cgroupfs_root *root;
>  
> -	if (!opts->subsys_bits)
> +	if (!opts->subsys_bits && !opts->none)
>  		return ERR_PTR(-EINVAL);
>  

Shouldn't we check this in parse_cgroupfs_options() instead?

>  	root = kzalloc(sizeof(*root), GFP_KERNEL);
> @@ -1124,6 +1131,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
>  		return ERR_PTR(-ENOMEM);
>  
>  	init_cgroup_root(root);
> +	init_root_id(root);
> +
>  	root->subsys_bits = opts->subsys_bits;
>  	root->flags = opts->flags;
>  	if (opts->release_agent)
> @@ -1134,6 +1143,15 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
>  	return root;
>  }
>  
> +static void cgroup_drop_root(struct cgroupfs_root *root)
> +{
> +	BUG_ON(!root->hierarchy_id);

I think this BUG_ON() is redundant, if root->hierarchy_id == 0,
we are freeing the dummy root, and kfree(root) should trigger
kernel oops.

> +	spin_lock(&hierarchy_id_lock);
> +	ida_remove(&hierarchy_ida, root->hierarchy_id);
> +	spin_unlock(&hierarchy_id_lock);
> +	kfree(root);
> +}
> +
>  static int cgroup_set_super(struct super_block *sb, void *data)
>  {
>  	int ret;
> @@ -1145,7 +1163,7 @@ static int cgroup_set_super(struct super_block *sb, void *data)
>  		return PTR_ERR(root);
>  	ret = set_anon_super(sb, NULL);
>  	if (ret) {
> -		kfree(root);
> +		cgroup_drop_root(root);
>  		return ret;
>  	}
>  
> @@ -1331,7 +1349,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
>  	mutex_unlock(&cgroup_mutex);
>  
>  	kill_litter_super(sb);
> -	kfree(root);
> +	cgroup_drop_root(root);
>  }
>  
>  static struct file_system_type cgroup_fs_type = {
> @@ -2975,7 +2993,7 @@ int __init cgroup_init(void)
>  	/* Add init_css_set to the hash table */
>  	hhead = css_set_hash(init_css_set.subsys);
>  	hlist_add_head(&init_css_set.hlist, hhead);
> -
> +	init_root_id(&rootnode);
>  	err = register_filesystem(&cgroup_fs_type);
>  	if (err < 0)
>  		goto out;
> @@ -3030,7 +3048,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
>  		struct cgroup *cgrp;
>  		int count = 0;
>  
> -		seq_printf(m, "%lu:", root->subsys_bits);
> +		seq_printf(m, "%d:", root->hierarchy_id);
>  		for_each_subsys(root, ss)
>  			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
>  		if (strlen(root->name))
> @@ -3076,8 +3094,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
>  	mutex_lock(&cgroup_mutex);
>  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
>  		struct cgroup_subsys *ss = subsys[i];
> -		seq_printf(m, "%s\t%lu\t%d\t%d\n",
> -			   ss->name, ss->root->subsys_bits,
> +		seq_printf(m, "%s\t%d\t%d\t%d\n",
> +			   ss->name, ss->root->hierarchy_id,
>  			   ss->root->number_of_cgroups, !ss->disabled);
>  	}
>  	mutex_unlock(&cgroup_mutex);
> @@ -3800,8 +3818,8 @@ static int current_css_set_cg_links_read(struct cgroup *cont,
>  			name = c->dentry->d_name.name;
>  		else
>  			name = "?";
> -		seq_printf(seq, "Root %lu group %s\n",
> -			   c->root->subsys_bits, name);
> +		seq_printf(seq, "Root %d group %s\n",
> +			   c->root->hierarchy_id, name);
>  		rcu_read_unlock();
>  	}
>  	task_unlock(current);
> @@ -3809,6 +3827,33 @@ static int current_css_set_cg_links_read(struct cgroup *cont,
>  	return 0;
>  }
>  
> +#define MAX_TASKS_SHOWN_PER_CSS 25
> +static int cgroup_css_links_read(struct cgroup *cont,
> +				 struct cftype *cft,
> +				 struct seq_file *seq)
> +{
> +	struct cg_cgroup_link *link, *saved_link;
> +	write_lock_irq(&css_set_lock);
> +	list_for_each_entry_safe(link, saved_link, &cont->css_sets,
> +				 cgrp_link_list) {
> +		struct css_set *cg = link->cg;
> +		struct task_struct *task, *saved_task;
> +		int count = 0;

> +		seq_printf(seq, "css_set %p\n", cg);
> +		list_for_each_entry_safe(task, saved_task, &cg->tasks,
> +					 cg_list) {
> +			if (count++ > MAX_TASKS_SHOWN_PER_CSS) {

actually this prints at most 26 tasks but not 25

and what's the concern to not print out all the tasks? the buffer
limit of seqfile?

> +				seq_puts(seq, "  ...\n");
> +				break;
> +			} else {
> +				seq_printf(seq, "  task %d\n", task->pid);

I guess we should call task_pid_vnr(tsk) instead of accessing task->pid
directly.

> +			}
> +		}
> +	}
> +	write_unlock_irq(&css_set_lock);
> +	return 0;
> +}
> +
>  static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
>  {
>  	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
> @@ -3840,6 +3885,11 @@ static struct cftype debug_files[] =  {
>  	},
>  
>  	{
> +		.name = "cgroup_css_links",
> +		.read_seq_string = cgroup_css_links_read,
> +	},
> +
> +	{
>  		.name = "releasable",
>  		.read_u64 = releasable_read,
>  	},
> 
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]     ` <20090312105147.24154.62638.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2009-03-17  6:46       ` Li Zefan
       [not found]         ` <49BF4744.5060309-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2009-03-17  6:46 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Paul Menage wrote:
> [RFC] Support multiply-bindable cgroup subsystems
> 
> This patch allows a cgroup subsystem to be marked as bindable on
> multiple cgroup hierarchies independently, when declared in
> cgroup_subsys.h via MULTI_SUBSYS() rather than SUBSYS().
> 
> The state for such subsystems cannot be accessed directly from a
> task->cgroups (since there's no unique mapping for a task) but instead
> must be accessed via a particular control group object.
> 
> Multiply-bound subsystems are useful in cases where there's no direct
> correspondence between the cgroup configuration and some property of
> the kernel outside of the cgroups subsystem.  So this would not be
> applicable to e.g. the CFS cgroup, since there has to a unique mapping
> from a task to its CFS run queue.
> 
> As an example, the "debug" subsystem is marked multiply-bindable,
> since it has no state outside the cgroups framework itself.
> 
> Example usage:
> 
> mount -t cgroup -o name=foo,debug,cpu cgroup /mnt1
> mount -t cgroup -o name=bar,debug,memory cgroup /mnt2
> 
> Open issues:
> 
> - what should /proc/cgroup show for multiply-bindable subsystems?
>   Currently it shows just one entry for each hierarchy the subsystem
>   is bound to, which means the subsystem doesn't show up when unbound,
>   and is indistinguishable from a regular (singleton) subsystem when
>   singly-bound.
> 
> - should the bind() callback be extended to support passing more
>   information to multiply-bindable subsystems? Or should we just scrap
>   the bind() callback since nothing appears to be using it.
> 

I agree to scrap it.

> - how to stop checkpatch.pl complaining about the way the
>   SUBSYS/MULTI_SUBSYS macros as used to define the cgroup_subsys_id
>   enum in cgroup.h?
> 

I think we can ignore it.

> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> 
> ---
> 
>  include/linux/cgroup.h        |   37 ++++++++++-
>  include/linux/cgroup_subsys.h |    2 -
>  kernel/cgroup.c               |  137 ++++++++++++++++++++++++++---------------
>  3 files changed, 123 insertions(+), 53 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index adf6739..9b5999d 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -39,10 +39,37 @@ extern int cgroupstats_build(struct cgroupstats *stats,
>  
>  extern struct file_operations proc_cgroup_operations;
>  
> -/* Define the enumeration of all cgroup subsystems */
> -#define SUBSYS(_x) _x ## _subsys_id,
> +/* Define the enumeration of all cgroup subsystems. There are two

/*
 * xxx
 */

> + * kinds of subsystems:
> + *
> + * - regular (singleton) subsystems can be only mounted once; these
> + * generally correspond to some system that has substantial state
> + * outside of the cgroups framework, or where the state is being used
> + * to control some external behaviour (e.g. CPU scheduler).  Every
> + * task has an associated state pointer (accessed efficiently through
> + * task->cgroups) for each singleton subsystem.
> + *
> + * - multiply-bound subsystems may be mounted on zero or more
> + * hierarchies. Since there's no unique mapping from a task to a
> + * subsystem state pointer for multiply-bound subsystems, the state of
> + * these subsystems cannot be accessed rapidly from a task
> + * pointer. These correspond to subsystems where most or all of the
> + * state is maintained within the subsystem itself, and/or the
> + * subsystem is used for monitoring rather than control.
> + */
>  enum cgroup_subsys_id {
> +#define SUBSYS(_x) _x ## _subsys_id,
> +#define MULTI_SUBSYS(_x)
>  #include <linux/cgroup_subsys.h>
> +#undef SUBSYS
> +#undef MULTI_SUBSYS
> +	CGROUP_SINGLETON_SUBSYS_COUNT,
> +	CGROUP_DUMMY_ENUM_RESET = CGROUP_SINGLETON_SUBSYS_COUNT - 1,
> +#define SUBSYS(_x)
> +#define MULTI_SUBSYS(_x) _x ## _subsys_id,
> +#include <linux/cgroup_subsys.h>
> +#undef SUBSYS
> +#undef MULTI_SUBSYS
>  	CGROUP_SUBSYS_COUNT
>  };
>  #undef SUBSYS
> @@ -231,7 +258,7 @@ struct css_set {
>  	 * time). Multi-subsystems don't have an entry in here since
>  	 * there's no unique state for a given task.
>  	 */
> -	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
> +	struct cgroup_subsys_state *subsys[CGROUP_SINGLETON_SUBSYS_COUNT];
>  };
>  
>  /*
> @@ -419,8 +446,10 @@ struct cgroup_subsys {
>  };
>  
>  #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
> +#define MULTI_SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
>  #include <linux/cgroup_subsys.h>
>  #undef SUBSYS
> +#undef MULTI_SUBSYS
>  
>  static inline struct cgroup_subsys_state *cgroup_subsys_state(
>  	struct cgroup *cgrp, int subsys_id)
> @@ -431,6 +460,8 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
>  static inline struct cgroup_subsys_state *task_subsys_state(
>  	struct task_struct *task, int subsys_id)
>  {
> +	/* This check is optimized out for most callers */
> +	BUG_ON(subsys_id >= CGROUP_SINGLETON_SUBSYS_COUNT);
>  	return rcu_dereference(task->cgroups->subsys[subsys_id]);
>  }
>  
> diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
> index 9c8d31b..f78605e 100644
> --- a/include/linux/cgroup_subsys.h
> +++ b/include/linux/cgroup_subsys.h
> @@ -14,7 +14,7 @@ SUBSYS(cpuset)
>  /* */
>  
>  #ifdef CONFIG_CGROUP_DEBUG
> -SUBSYS(debug)
> +MULTI_SUBSYS(debug)
>  #endif
>  
>  /* */
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index fcb8181..942a951 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -52,10 +52,18 @@
>  static DEFINE_MUTEX(cgroup_mutex);
>  
>  /* Generate an array of cgroup subsystem pointers */
> -#define SUBSYS(_x) &_x ## _subsys,
>  
>  static struct cgroup_subsys *subsys[] = {
> +#define SUBSYS(_x) &_x ## _subsys,
> +#define MULTI_SUBSYS(_x)
> +#include <linux/cgroup_subsys.h>
> +#undef SUBSYS
> +#undef MULTI_SUBSYS
> +#define SUBSYS(_x)
> +#define MULTI_SUBSYS(_x) &_x ## _subsys,
>  #include <linux/cgroup_subsys.h>
> +#undef SUBSYS
> +#undef MULTI_SUBSYS
>  };
>  
>  /*
> @@ -265,7 +273,7 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
>  	int index;
>  	unsigned long tmp = 0UL;
>  
> -	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
> +	for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++)
>  		tmp += (unsigned long)css[i];
>  	tmp = (tmp >> 16) ^ tmp;
>  
> @@ -435,7 +443,7 @@ static struct css_set *find_existing_css_set(
>  
>  	/* Build the set of subsystem state objects that we want to
>  	 * see in the new css_set */
> -	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
> +	for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++) {
>  		if (root->subsys_bits & (1UL << i)) {
>  			/* Subsystem is in this hierarchy. So we want
>  			 * the subsystem state from the new
> @@ -529,7 +537,7 @@ static struct css_set *find_css_set(
>  	struct css_set *oldcg, struct cgroup *cgrp)
>  {
>  	struct css_set *res;
> -	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
> +	struct cgroup_subsys_state *template[CGROUP_SINGLETON_SUBSYS_COUNT];
>  
>  	struct list_head tmp_cg_links;
>  
> @@ -853,6 +861,20 @@ static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
>  		wake_up_all(&cgroup_rmdir_waitq);
>  }
>  
> +static void init_cgroup_css(struct cgroup_subsys_state *css,
> +			       struct cgroup_subsys *ss,
> +			       struct cgroup *cgrp)
> +{
> +	css->cgroup = cgrp;
> +	atomic_set(&css->refcnt, 1);
> +	css->flags = 0;
> +	css->id = NULL;
> +	if (cgrp == &cgrp->root->top_cgroup)
> +		set_bit(CSS_ROOT, &css->flags);
> +	BUG_ON(cgrp->subsys[ss->subsys_id]);
> +	cgrp->subsys[ss->subsys_id] = css;
> +}
> +
>  static int rebind_subsystems(struct cgroupfs_root *root,
>  			     unsigned long final_bits)
>  {
> @@ -863,7 +885,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>  	removed_bits = root->subsys_bits & ~final_bits;
>  	added_bits = final_bits & ~root->subsys_bits;
>  	/* Check that any added subsystems are currently free */
> -	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
> +	for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++) {
>  		unsigned long bit = 1UL << i;
>  		if (!(bit & added_bits))
>  			continue;
> @@ -883,33 +905,57 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>  	/* Process each subsystem */
>  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
>  		struct cgroup_subsys *ss = subsys[i];
> +		bool singleton = i < CGROUP_SINGLETON_SUBSYS_COUNT;
>  		unsigned long bit = 1UL << i;
>  		if (bit & added_bits) {
> +			struct cgroup_subsys_state *css;
>  			/* We're binding this subsystem to this hierarchy */
>  			BUG_ON(cgrp->subsys[i]);
> -			BUG_ON(!dummytop->subsys[i]);
> -			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
> -			BUG_ON(!(rootnode.subsys_bits & bit));
> +			if (singleton) {
> +				css = dummytop->subsys[i];
> +				BUG_ON(!css);
> +
> +				BUG_ON(css->cgroup != dummytop);
> +				BUG_ON(!(rootnode.subsys_bits & bit));
> +			} else {
> +				BUG_ON(dummytop->subsys[i]);
> +				BUG_ON(rootnode.subsys_bits & bit);
> +				css = ss->create(ss, cgrp);
> +				if (IS_ERR(css))
> +					return PTR_ERR(css);
> +				init_cgroup_css(css, ss, cgrp);
> +			}
>  			mutex_lock(&ss->hierarchy_mutex);
> -			cgrp->subsys[i] = dummytop->subsys[i];
> -			cgrp->subsys[i]->cgroup = cgrp;
> +			cgrp->subsys[i] = css;
> +			css->cgroup = cgrp;
>  			if (ss->bind)
>  				ss->bind(ss, cgrp);
> -			rootnode.subsys_bits &= ~bit;
> +			if (singleton)
> +				rootnode.subsys_bits &= ~bit;
>  			root->subsys_bits |= bit;
>  			mutex_unlock(&ss->hierarchy_mutex);
>  		} else if (bit & removed_bits) {
> +			struct cgroup_subsys_state *css;
>  			/* We're removing this subsystem */
> -			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
> -			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
> -			BUG_ON(rootnode.subsys_bits & bit);
> +			css = cgrp->subsys[i];
> +			BUG_ON(css->cgroup != cgrp);
> +			if (singleton) {
> +				BUG_ON(css != dummytop->subsys[i]);
> +				BUG_ON(rootnode.subsys_bits & bit);
> +			}
>  			mutex_lock(&ss->hierarchy_mutex);
>  			if (ss->bind)
>  				ss->bind(ss, dummytop);
> -			dummytop->subsys[i]->cgroup = dummytop;
> +			if (singleton) {
> +				css->cgroup = dummytop;
> +			} else {
> +				/* Is this safe? */
> +				ss->destroy(ss, cgrp);
> +			}
>  			cgrp->subsys[i] = NULL;
>  			root->subsys_bits &= ~bit;
> -			rootnode.subsys_bits |= bit;
> +			if (singleton)
> +				rootnode.subsys_bits |= bit;

initially we can see all subsystems in /proc/cgroups, but:

# mount -o cpu,debug xxx /mnt
# remount -o cpu xxx /mnt
# umount /mnt

now no root including rootnode contains debug_subsys, so we
won't see debug_subsys in /proc/cgroups.

I think this inconsistency is wrong.

>  			mutex_unlock(&ss->hierarchy_mutex);
>  		} else if (bit & final_bits) {
>  			/* Subsystem state should already exist */
> @@ -974,7 +1020,7 @@ static int parse_cgroupfs_options(char *data,
>  			/* Add all non-disabled subsystems */
>  			int i;
>  			opts->subsys_bits = 0;
> -			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
> +			for (i = 0; i < CGROUP_SINGLETON_SUBSYS_COUNT; i++) {

I don't see why only singleton subsystems are binded when mount
with '-o all'.

>  				struct cgroup_subsys *ss = subsys[i];
>  				if (!ss->disabled)
>  					opts->subsys_bits |= 1ul << i;
> @@ -1040,6 +1086,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
>  	struct cgroupfs_root *root = sb->s_fs_info;
>  	struct cgroup *cgrp = &root->top_cgroup;
>  	struct cgroup_sb_opts opts;
> +	unsigned long original_bits;
>  
>  	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
>  	mutex_lock(&cgroup_mutex);
> @@ -1061,9 +1108,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
>  		goto out_unlock;
>  	}
>  
> +	original_bits = root->subsys_bits;
>  	ret = rebind_subsystems(root, opts.subsys_bits);
> -	if (ret)
> +	if (ret) {
> +		int tmp = rebind_subsystems(root, original_bits);
> +		BUG_ON(tmp);
>  		goto out_unlock;
> +	}
>  
>  	/* (re)populate subsystem files */
>  	cgroup_populate_dir(cgrp);
> @@ -1265,16 +1316,15 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
>  		}
>  
>  		ret = rebind_subsystems(root, opts.subsys_bits);
> -		if (ret == -EBUSY) {
> +		if (ret) {
> +			int tmp = rebind_subsystems(root, 0);
> +			BUG_ON(tmp);
>  			mutex_unlock(&cgroup_mutex);
>  			mutex_unlock(&inode->i_mutex);
>  			free_cg_links(&tmp_cg_links);
>  			goto drop_new_super;
>  		}
>  
> -		/* EBUSY should be the only error here */
> -		BUG_ON(ret);
> -
>  		list_add(&root->root_list, &roots);
>  		root_count++;
>  
> @@ -2591,20 +2641,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
>  	return 0;
>  }
>  
> -static void init_cgroup_css(struct cgroup_subsys_state *css,
> -			       struct cgroup_subsys *ss,
> -			       struct cgroup *cgrp)
> -{
> -	css->cgroup = cgrp;
> -	atomic_set(&css->refcnt, 1);
> -	css->flags = 0;
> -	css->id = NULL;
> -	if (cgrp == dummytop)
> -		set_bit(CSS_ROOT, &css->flags);
> -	BUG_ON(cgrp->subsys[ss->subsys_id]);
> -	cgrp->subsys[ss->subsys_id] = css;
> -}
> -
>  static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
>  {
>  	/* We need to take each hierarchy_mutex in a consistent order */
> @@ -2890,21 +2926,23 @@ again:
>  static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
>  {
>  	struct cgroup_subsys_state *css;
> -
> +	bool singleton = ss->subsys_id < CGROUP_SINGLETON_SUBSYS_COUNT;
>  	printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
>  
> -	/* Create the top cgroup state for this subsystem */
> -	css = ss->create(ss, dummytop);
> -	/* We don't handle early failures gracefully */
> -	BUG_ON(IS_ERR(css));
> -	init_cgroup_css(css, ss, dummytop);
> -
> -	/* Update the init_css_set to contain a subsys
> -	 * pointer to this state - since the subsystem is
> -	 * newly registered, all tasks and hence the
> -	 * init_css_set is in the subsystem's top cgroup. */
> -	init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
> +	if (singleton) {
> +		/* Create the top cgroup state for this subsystem */
> +		css = ss->create(ss, dummytop);
> +		/* We don't handle early failures gracefully */
> +		BUG_ON(IS_ERR(css));
> +		init_cgroup_css(css, ss, dummytop);
>  
> +		/* Update the init_css_set to contain a subsys
> +		 * pointer to this state - since the subsystem is
> +		 * newly registered, all tasks and hence the
> +		 * init_css_set is in the subsystem's top cgroup. */
> +		init_css_set.subsys[ss->subsys_id] = css;
> +		rootnode.subsys_bits |= 1ULL << ss->subsys_id;
> +	}
>  	need_forkexit_callback |= ss->fork || ss->exit;
>  
>  	/* At system boot, before all subsystems have been
> @@ -2916,7 +2954,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
>  	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
>  	ss->active = 1;
>  

tailing blank line

> -	rootnode.subsys_bits |= 1ULL << ss->subsys_id;
>  }
>  
>  /**
> @@ -3285,6 +3322,8 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
>  
>  	/* We shouldn't be called by an unregistered subsystem */
>  	BUG_ON(!subsys->active);
> +	/* We can only support singleton subsystems */
> +	BUG_ON(subsys->subsys_id >= CGROUP_SINGLETON_SUBSYS_COUNT);
>  
>  	/* First figure out what hierarchy and cgroup we're dealing
>  	 * with, and pin them so we can drop cgroup_mutex */
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 7/7] [RFC] Example multi-bindable subsystem: a per-cgroup notes field
       [not found]     ` <20090312105153.24154.29389.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2009-03-17  6:46       ` Li Zefan
  0 siblings, 0 replies; 28+ messages in thread
From: Li Zefan @ 2009-03-17  6:46 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Paul Menage wrote:
> [RFC] Example multi-bindable subsystem: a per-cgroup notes field
> 
> As an example of a multiply-bindable subsystem, this patch introduces
> the "info" subsystem, which provides a single file, "info.notes", in
> which user-space middleware can store an arbitrary (up to one page)
> string representing configuration data about that cgroup. This reduces
> the need to keep additional state outside the cgroup filesystem.
> 
> TODO: a single page is somewhat limiting - the file limit should be
> userspace-configurable, and the system should store large notes in a
> vector of pages (to avoid trying to kmalloc() an excessively large
> chunk of memory. (Or maybe just use vmalloc())
> 

add info.size, and the size will default to PAGESIZE but is changable?

> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> 

> +config CGROUP_INFO
> +	bool "Simple application-specific info cgroup subsystem

"..."

> +	help
> +	  Provides a simple cgroups subsystem with an "info.notes"
> +	  field, which can be used by middleware to store
> +	  application-specific configuration data about a cgroup. Can
> +	  be mounted on multiple hierarchies at once.
> +

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]         ` <49BF4744.5060309-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2009-03-18  2:09           ` Li Zefan
       [not found]             ` <49C057EB.1000307-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2009-03-18  2:09 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

>>  			cgrp->subsys[i] = NULL;
>>  			root->subsys_bits &= ~bit;
>> -			rootnode.subsys_bits |= bit;
>> +			if (singleton)
>> +				rootnode.subsys_bits |= bit;
> 
> initially we can see all subsystems in /proc/cgroups, but:
> 

Ah, I just notice debug_subsys will never be in rootnode. But this is a bit odd,
and one of the use of /proc/cgroups is to check if a subsystem is supported or
not, so we can no longer do this:
	#! /bin/sh
	grep -q debug /proc/cgroups
	if [ $? -ne 0 ]; then
		echo "debug subsystem is not compiled into kernel"
		exit 1
	fi
	# mount debug_subsys and do something with it...

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]             ` <49C057EB.1000307-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2009-03-18  2:16               ` Paul Menage
       [not found]                 ` <6599ad830903171916x7364ec7cw76975d71d5125d82-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-18  2:16 UTC (permalink / raw)
  To: Li Zefan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Tue, Mar 17, 2009 at 7:09 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>
> Ah, I just notice debug_subsys will never be in rootnode.

Right - this is the first of the "Open Issues" that I mentioned in the
patch. The question is how to display the multiply-bindable subsystems
in /proc/cgroup in a way that won't break backward-compatibility.
Options include:

- always display one line for each subsystem; if a subsystem is
multiply-bindable then the "hierarchy id" and "num cgroups" columns
may be empty or have multiple (comma or slash-separated?) values

- for a multiply-bindable subsystem, have a header line to indicate
that the subsystem exists, and then a separate line for each bound
instance of the subsystem.

Either of these could confuse existing userspace parsers, but I'm not
sure that any exist other than libcgroup, and I don't think it's in
wide use yet.

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]                 ` <6599ad830903171916x7364ec7cw76975d71d5125d82-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-03-18  2:39                   ` Li Zefan
       [not found]                     ` <49C05EDF.5010607-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2009-03-18  2:39 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Paul Menage wrote:
> On Tue, Mar 17, 2009 at 7:09 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>> Ah, I just notice debug_subsys will never be in rootnode.
> 
> Right - this is the first of the "Open Issues" that I mentioned in the
> patch. The question is how to display the multiply-bindable subsystems
> in /proc/cgroup in a way that won't break backward-compatibility.
> Options include:
> 
> - always display one line for each subsystem; if a subsystem is
> multiply-bindable then the "hierarchy id" and "num cgroups" columns
> may be empty or have multiple (comma or slash-separated?) values
> 

Then "hierarchy id" is no long a single number..

> - for a multiply-bindable subsystem, have a header line to indicate
> that the subsystem exists, and then a separate line for each bound
> instance of the subsystem.
> 

I think it's better to show every subsystems including debug_subsys in
/proc/cgroups, and show exactly n lines of debug_susys if we have n
hierarcies with debug_subsys binded, but no header line.

> Either of these could confuse existing userspace parsers, but I'm not

And some cgroup test programs, like controller tests in ltp and some
test programs that I wrote, but they are ok for this change.

> sure that any exist other than libcgroup, and I don't think it's in
> wide use yet.
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]                     ` <49C05EDF.5010607-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2009-03-18  2:43                       ` Paul Menage
       [not found]                         ` <6599ad830903171943x7884cb03w8f22fa1629d667b3-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-18  2:43 UTC (permalink / raw)
  To: Li Zefan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Tue, Mar 17, 2009 at 7:39 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>
>> - always display one line for each subsystem; if a subsystem is
>> multiply-bindable then the "hierarchy id" and "num cgroups" columns
>> may be empty or have multiple (comma or slash-separated?) values
>>
>
> Then "hierarchy id" is no long a single number..

Correct.

>
>> - for a multiply-bindable subsystem, have a header line to indicate
>> that the subsystem exists, and then a separate line for each bound
>> instance of the subsystem.
>>
>
> I think it's better to show every subsystems including debug_subsys in
> /proc/cgroups, and show exactly n lines of debug_susys if we have n
> hierarcies with debug_subsys binded, but no header line.

But that gives a contradiction when n == 0 - we can't show exactly 0
lines for an unbound multi subsys, and still show every subsystem.

Does libcgroup actually parse /proc/cgroup? If not, maybe we should
just break the format now and replace it with something more
extensible for future changes.

> And some cgroup test programs, like controller tests in ltp and some
> test programs that I wrote, but they are ok for this change.

Right, changes to test programs aren't a compatibility issue.

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]                         ` <6599ad830903171943x7884cb03w8f22fa1629d667b3-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-03-18  3:09                           ` Li Zefan
       [not found]                             ` <49C065E7.8060901-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2009-03-18  3:09 UTC (permalink / raw)
  To: Paul Menage
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Balbir Singh

CC: Balbir Singh

Paul Menage wrote:
> On Tue, Mar 17, 2009 at 7:39 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>> - always display one line for each subsystem; if a subsystem is
>>> multiply-bindable then the "hierarchy id" and "num cgroups" columns
>>> may be empty or have multiple (comma or slash-separated?) values
>>>
>> Then "hierarchy id" is no long a single number..
> 
> Correct.
> 
>>> - for a multiply-bindable subsystem, have a header line to indicate
>>> that the subsystem exists, and then a separate line for each bound
>>> instance of the subsystem.
>>>
>> I think it's better to show every subsystems including debug_subsys in
>> /proc/cgroups, and show exactly n lines of debug_susys if we have n
>> hierarcies with debug_subsys binded, but no header line.
> 
> But that gives a contradiction when n == 0 - we can't show exactly 0
> lines for an unbound multi subsys, and still show every subsystem.
> 

I don't see what's wrong with this behavior:

multi subsys sits in rootnode if it's unbound, and is removed from
rootnode if it's binded at least in one hierarchy.

> Does libcgroup actually parse /proc/cgroup? If not, maybe we should

Balbir may answer this. :)

> just break the format now and replace it with something more
> extensible for future changes.
> 

Even /proc/cgroup is ok, I don't think we can break this based on assumption
that on one is making use of /proc/cgroups. :(

>> And some cgroup test programs, like controller tests in ltp and some
>> test programs that I wrote, but they are ok for this change.
> 
> Right, changes to test programs aren't a compatibility issue.
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]                             ` <49C065E7.8060901-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2009-03-18  5:43                               ` Balbir Singh
  2009-03-31  9:02                               ` Paul Menage
  1 sibling, 0 replies; 28+ messages in thread
From: Balbir Singh @ 2009-03-18  5:43 UTC (permalink / raw)
  To: Li Zefan
  Cc: Paul Menage, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

* Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> [2009-03-18 11:09:27]:

> CC: Balbir Singh
> 
> Paul Menage wrote:
> > On Tue, Mar 17, 2009 at 7:39 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
> >>> - always display one line for each subsystem; if a subsystem is
> >>> multiply-bindable then the "hierarchy id" and "num cgroups" columns
> >>> may be empty or have multiple (comma or slash-separated?) values
> >>>
> >> Then "hierarchy id" is no long a single number..
> > 
> > Correct.
> > 
> >>> - for a multiply-bindable subsystem, have a header line to indicate
> >>> that the subsystem exists, and then a separate line for each bound
> >>> instance of the subsystem.
> >>>
> >> I think it's better to show every subsystems including debug_subsys in
> >> /proc/cgroups, and show exactly n lines of debug_susys if we have n
> >> hierarcies with debug_subsys binded, but no header line.
> > 
> > But that gives a contradiction when n == 0 - we can't show exactly 0
> > lines for an unbound multi subsys, and still show every subsystem.
> > 
> 
> I don't see what's wrong with this behavior:
> 
> multi subsys sits in rootnode if it's unbound, and is removed from
> rootnode if it's binded at least in one hierarchy.
> 
> > Does libcgroup actually parse /proc/cgroup? If not, maybe we should
> 
> Balbir may answer this. :)

We do parse /proc/cgroups to see what controllers/subsystems we have
and if they are enabled.

> 
> > just break the format now and replace it with something more
> > extensible for future changes.
> > 
> 
> Even /proc/cgroup is ok, I don't think we can break this based on assumption
> that on one is making use of /proc/cgroups. :(
>

If fields are not removed I don't see an issue. I would however
recommend version of /proc/cgroups, like we do for /proc/slabinfo in
the future. Versioning does not allow us to remove fields, but
addition can be handled in that manner. 

-- 
	Balbir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/7] [RFC] Support named cgroups hierarchies
       [not found]         ` <49BF46BC.4080302-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2009-03-31  8:12           ` Paul Menage
       [not found]             ` <6599ad830903310112x626252c4je760a80eb1eaa1d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-31  8:12 UTC (permalink / raw)
  To: Li Zefan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Mon, Mar 16, 2009 at 11:44 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>
>> - should the specification be via a name= option as in this patch, or
>>   should we simply use the "device name" as passed to the mount()
>>   system call?  Using the device name is more conceptually clean and
>>   consistent with the filesystem API; however, given that the device
>>   name is currently ignored by cgroups, this would lead to a
>>   user-visible behaviour change.
>>
>
> If we use "device name" and don't allow it to be changed on remount,
> this seems a change that may break/suprise existing users?

Potentially, yes.

>
> And another issue is, using "device name" won't allow us to use NULL
> name, which is also user-visible in /proc/pid/cgroups/.

Hmm. We could treat "none" specially, but I guess that's a bit ugly.

>> +
>> +     /* The name for this hierarchy - may be empty */
>> +     char name[PATH_MAX];
>
> I think 32 or 64 is sufficient. How about reuse MAX_CGROUP_TYPE_NAMELEN
> which is the length limit of cgroup_subsys.name?

I've added a new define MAX_CGROUP_ROOT_NAMELEN, value 64
>> +             } else if (!strncmp(token, "name=", 5)) {
>> +                     /* Specifying two names is forbidden */
>> +                     if (opts->name)
>> +                             return -EINVAL;
>> +                     opts->name = kzalloc(PATH_MAX, GFP_KERNEL);
>> +                     if (!opts->name)
>> +                             return -ENOMEM;
>> +                     strncpy(opts->name, token + 5, PATH_MAX - 1);
>> +                     opts->name[PATH_MAX - 1] = 0;
>
> kstrndup()

Good idea.

>>
>> -     /* Next check flags */
>> -     if (new->flags != root->flags)
>
> Is this change intended or unintended? With this change we allow:
>  # mount -t cgroup -o cpu xxx /mnt1
>  # mount -t cgroup -o cpu,noprefix xxx /mnt2
> But files in /mnt2 still prefix with 'cpu.'

That fits in better with existing semantics for other filesystems - if
you mount an already-mounted device with different superblock mount
options, the new mount still has the superblock options associated
with the old mount.

Arguably this might mean that we should ignore all options when
mounting a hierarchy by name that already exists, even if you specify
a different set of subsystems from what is mounted.

>
>> +     /* If we asked for subsystems then they must match */
>> +     if (new->subsys_bits && new->subsys_bits != root->subsys_bits)
>>               return 0;
>
> This has already been checked.

Where?
>> -     init_cgroup_root(root);
>> -     root->subsys_bits = opts.subsys_bits;
>> -     root->flags = opts.flags;
>> -     if (opts.release_agent) {
>> -             strcpy(root->release_agent_path, opts.release_agent);
>> -             kfree(opts.release_agent);
>> -     }
>
> leaking opts.release_agent and opts.name with every successful mount.

Good point, fixed.

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no bound subsystems
       [not found]         ` <49BF470D.8080600-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2009-03-31  8:45           ` Paul Menage
       [not found]             ` <6599ad830903310145l7b24ad0vb4462f94390ac264-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-31  8:45 UTC (permalink / raw)
  To: Li Zefan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Mon, Mar 16, 2009 at 11:45 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>
>> +static void init_root_id(struct cgroupfs_root *root)
>> +{
>> +     BUG_ON(!ida_pre_get(&hierarchy_ida, GFP_KERNEL));
>
> we should return -errno, BUG_ON() on this is wrong

Hmm. Fair enough in the new root case, since it's possible we'd not
get the memory then - I've changed init_root_id() to return false on
failure. But I think that in cgroup_init(), we need to
BUG_ON(!init_root_id()) - we shouldn't be able to fail that early in
boot and there's no sensible way to handle it if it happens.

>
>> +     spin_lock(&hierarchy_id_lock);
>> +     if (ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
>> +                           &root->hierarchy_id)) {
>> +             BUG_ON(ida_get_new(&hierarchy_ida, &root->hierarchy_id));
>> +     }
>
> next_hierarchy_id is redundant, just use ida_get_new() instead of
> ida_get_new_above()

From the idr.c code it looks as though ida_get_new() always returns
the smallest unused ID. I want to cycle through increasing IDs until
we hit the end of the range, and then start again at 0.

>
> and I think we should check EAGAIN:

OK, I've changed this to handle EAGAIN. I don't see any value in
handling ENOSPC though - we're not going to fill up a 31-bit IDR.

>>
>> -     if (!opts->subsys_bits)
>> +     if (!opts->subsys_bits && !opts->none)
>>               return ERR_PTR(-EINVAL);
>>
>
> Shouldn't we check this in parse_cgroupfs_options() instead?

No, becase at that point it's valid - we can request the root named X
without specifying what subsystems X is already mounted with. But if
we don't find a root named X, then we're creating a new root, and we
have to have specified a set of subsystems, or else explicitly "none".

>>
>> +static void cgroup_drop_root(struct cgroupfs_root *root)
>> +{
>> +     BUG_ON(!root->hierarchy_id);
>
> I think this BUG_ON() is redundant, if root->hierarchy_id == 0,
> we are freeing the dummy root, and kfree(root) should trigger
> kernel oops.

It's more obvious this way.

>> +             seq_printf(seq, "css_set %p\n", cg);
>> +             list_for_each_entry_safe(task, saved_task, &cg->tasks,
>> +                                      cg_list) {
>> +                     if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
>
> actually this prints at most 26 tasks but not 25

Not really a big deal :-)

>
> and what's the concern to not print out all the tasks? the buffer
> limit of seqfile?

Yes.

>
>> +                             seq_puts(seq, "  ...\n");
>> +                             break;
>> +                     } else {
>> +                             seq_printf(seq, "  task %d\n", task->pid);
>
> I guess we should call task_pid_vnr(tsk) instead of accessing task->pid
> directly.

Changed.

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]                             ` <49C065E7.8060901-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  2009-03-18  5:43                               ` Balbir Singh
@ 2009-03-31  9:02                               ` Paul Menage
       [not found]                                 ` <6599ad830903310202p1c268237lff283b2676f78864-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 28+ messages in thread
From: Paul Menage @ 2009-03-31  9:02 UTC (permalink / raw)
  To: Li Zefan
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Balbir Singh

On Tue, Mar 17, 2009 at 8:09 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>
> I don't see what's wrong with this behavior:
>
> multi subsys sits in rootnode if it's unbound, and is removed from
> rootnode if it's binded at least in one hierarchy.

You mean just for the purposes of /proc/cgroup, or in reality?

Singleton (traditional) subsystems generally have some meaning outside
of the cgroups framework, e.g. the "cpu" subsystem corresponds to CFS
scheduler nodes for tasks; "cpuset" corresponds to the permitted
cpus/mems for a task. For every task there's a single unique state
object for each singleton subsystem.

But multi-bindable subsystems don't really have any meaning outside
the cgroup framework, since there's no unique mapping from a task to
its subsystem state. So instantiating a root cgroup object for that
subsystem in the unbound hierarchy is a bit pointless - it can't
really do anything. So it wouldn't really make sense to keep one
instance of a multi-bindable subsystem attached to rootnote until the
first bind for that subsystem, and then create fresh ones on the fly
later if the subsystem is bound to more hierarchies. In particular,
which one would you return to the rootnode later?

But I guess we could just pretend in /proc/cgroup, and add a new
column such as "multi-bindable".

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/7] [RFC] Support named cgroups hierarchies
       [not found]             ` <6599ad830903310112x626252c4je760a80eb1eaa1d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-04-01  6:24               ` Li Zefan
  0 siblings, 0 replies; 28+ messages in thread
From: Li Zefan @ 2009-04-01  6:24 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

>>> -     /* Next check flags */
>>> -     if (new->flags != root->flags)
>> Is this change intended or unintended? With this change we allow:
>>  # mount -t cgroup -o cpu xxx /mnt1
>>  # mount -t cgroup -o cpu,noprefix xxx /mnt2
>> But files in /mnt2 still prefix with 'cpu.'
> 
> That fits in better with existing semantics for other filesystems - if
> you mount an already-mounted device with different superblock mount
> options, the new mount still has the superblock options associated
> with the old mount.
> 
> Arguably this might mean that we should ignore all options when
> mounting a hierarchy by name that already exists, even if you specify
> a different set of subsystems from what is mounted.
> 

Then I think it's better to make it a different patch, but not hiding
this change in this patch.

>>> +     /* If we asked for subsystems then they must match */
>>> +     if (new->subsys_bits && new->subsys_bits != root->subsys_bits)
>>>               return 0;
>> This has already been checked.
> 
> Where?

My mistake, please ignore. :)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no  bound subsystems
       [not found]             ` <6599ad830903310145l7b24ad0vb4462f94390ac264-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-04-01  6:40               ` Li Zefan
       [not found]                 ` <49D30C44.8050802-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2009-04-01  6:40 UTC (permalink / raw)
  To: Paul Menage; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Paul Menage wrote:
> On Mon, Mar 16, 2009 at 11:45 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>> +static void init_root_id(struct cgroupfs_root *root)
>>> +{
>>> +     BUG_ON(!ida_pre_get(&hierarchy_ida, GFP_KERNEL));
>> we should return -errno, BUG_ON() on this is wrong
> 
> Hmm. Fair enough in the new root case, since it's possible we'd not
> get the memory then - I've changed init_root_id() to return false on
> failure. But I think that in cgroup_init(), we need to
> BUG_ON(!init_root_id()) - we shouldn't be able to fail that early in
> boot and there's no sensible way to handle it if it happens.
> 

agreed

>>> +     spin_lock(&hierarchy_id_lock);
>>> +     if (ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
>>> +                           &root->hierarchy_id)) {
>>> +             BUG_ON(ida_get_new(&hierarchy_ida, &root->hierarchy_id));
>>> +     }
>> next_hierarchy_id is redundant, just use ida_get_new() instead of
>> ida_get_new_above()
> 
>>From the idr.c code it looks as though ida_get_new() always returns
> the smallest unused ID. I want to cycle through increasing IDs until
> we hit the end of the range, and then start again at 0.
> 

Makes sense, but it would be better to add a comment.

>> and I think we should check EAGAIN:
> 
> OK, I've changed this to handle EAGAIN. I don't see any value in
> handling ENOSPC though - we're not going to fill up a 31-bit IDR.
> 
>>> -     if (!opts->subsys_bits)
>>> +     if (!opts->subsys_bits && !opts->none)
>>>               return ERR_PTR(-EINVAL);
>>>
>> Shouldn't we check this in parse_cgroupfs_options() instead?
> 
> No, becase at that point it's valid - we can request the root named X
> without specifying what subsystems X is already mounted with. But if
> we don't find a root named X, then we're creating a new root, and we
> have to have specified a set of subsystems, or else explicitly "none".
> 

I see your poing. Again, better to have a comment in the code.

>> and what's the concern to not print out all the tasks? the buffer
>> limit of seqfile?
> 
> Yes.
> 

Hmm, I just had a try, and I found we don't need to worry about this,
seqfile can handle output that larger than PAGE_SIZE well.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems
       [not found]                                 ` <6599ad830903310202p1c268237lff283b2676f78864-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-04-01  6:44                                   ` Li Zefan
  0 siblings, 0 replies; 28+ messages in thread
From: Li Zefan @ 2009-04-01  6:44 UTC (permalink / raw)
  To: Paul Menage
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Balbir Singh

Paul Menage wrote:
> On Tue, Mar 17, 2009 at 8:09 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>> I don't see what's wrong with this behavior:
>>
>> multi subsys sits in rootnode if it's unbound, and is removed from
>> rootnode if it's binded at least in one hierarchy.
> 
> You mean just for the purposes of /proc/cgroup, or in reality?
> 

Yes, to show all subsystems in /proc/cgroup.

> Singleton (traditional) subsystems generally have some meaning outside
> of the cgroups framework, e.g. the "cpu" subsystem corresponds to CFS
> scheduler nodes for tasks; "cpuset" corresponds to the permitted
> cpus/mems for a task. For every task there's a single unique state
> object for each singleton subsystem.
> 
> But multi-bindable subsystems don't really have any meaning outside
> the cgroup framework, since there's no unique mapping from a task to
> its subsystem state. So instantiating a root cgroup object for that
> subsystem in the unbound hierarchy is a bit pointless - it can't
> really do anything. So it wouldn't really make sense to keep one
> instance of a multi-bindable subsystem attached to rootnote until the
> first bind for that subsystem, and then create fresh ones on the fly
> later if the subsystem is bound to more hierarchies. In particular,
> which one would you return to the rootnode later?
> 

For that purpose I suggest, we are not going to allocate a root cgroup
object for multi-subsys, but we just set the subsys-id in dump-root's
subsystem bits.

> But I guess we could just pretend in /proc/cgroup, and add a new
> column such as "multi-bindable".
> 
> Paul
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no bound subsystems
       [not found]                 ` <49D30C44.8050802-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2009-04-02  8:17                   ` Paul Menage
  0 siblings, 0 replies; 28+ messages in thread
From: Paul Menage @ 2009-04-02  8:17 UTC (permalink / raw)
  To: Li Zefan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Tue, Mar 31, 2009 at 11:40 PM, Li Zefan <lizf-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>> and what's the concern to not print out all the tasks? the buffer
>>> limit of seqfile?
>>
>> Yes.
>>
>
> Hmm, I just had a try, and I found we don't need to worry about this,
> seqfile can handle output that larger than PAGE_SIZE well.
>

But only as a kmalloc, so it gets progressively harder to satisfy as
the record size increases.

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2009-04-02  8:17 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-12 10:51 [PATCH 0/7][RFC] CGroup hierarchy extensions Paul Menage
     [not found] ` <20090312104507.24154.71691.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2009-03-12 10:51   ` [PATCH 1/7] [RFC] Support named cgroups hierarchies Paul Menage
     [not found]     ` <20090312105122.24154.73633.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2009-03-17  6:44       ` Li Zefan
     [not found]         ` <49BF46BC.4080302-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-03-31  8:12           ` Paul Menage
     [not found]             ` <6599ad830903310112x626252c4je760a80eb1eaa1d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-04-01  6:24               ` Li Zefan
2009-03-12 10:51   ` [PATCH 2/7] [RFC]Move the cgroup debug subsys into cgroup.c to access internal state Paul Menage
     [not found]     ` <20090312105127.24154.39200.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2009-03-17  6:44       ` Li Zefan
2009-03-12 10:51   ` [PATCH 3/7] [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup Paul Menage
     [not found]     ` <20090312105132.24154.99250.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2009-03-17  6:44       ` Li Zefan
2009-03-12 10:51   ` [PATCH 4/7] [RFC] Allow cgroup hierarchies to be created with no bound subsystems Paul Menage
     [not found]     ` <20090312105137.24154.34890.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2009-03-17  6:45       ` Li Zefan
     [not found]         ` <49BF470D.8080600-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-03-31  8:45           ` Paul Menage
     [not found]             ` <6599ad830903310145l7b24ad0vb4462f94390ac264-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-04-01  6:40               ` Li Zefan
     [not found]                 ` <49D30C44.8050802-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-04-02  8:17                   ` Paul Menage
2009-03-12 10:51   ` [PATCH 5/7] [RFC] Remove cgroup_subsys.root pointer Paul Menage
2009-03-12 10:51   ` [PATCH 6/7] [RFC] Support multiply-bindable cgroup subsystems Paul Menage
     [not found]     ` <20090312105147.24154.62638.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2009-03-17  6:46       ` Li Zefan
     [not found]         ` <49BF4744.5060309-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-03-18  2:09           ` Li Zefan
     [not found]             ` <49C057EB.1000307-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-03-18  2:16               ` Paul Menage
     [not found]                 ` <6599ad830903171916x7364ec7cw76975d71d5125d82-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-03-18  2:39                   ` Li Zefan
     [not found]                     ` <49C05EDF.5010607-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-03-18  2:43                       ` Paul Menage
     [not found]                         ` <6599ad830903171943x7884cb03w8f22fa1629d667b3-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-03-18  3:09                           ` Li Zefan
     [not found]                             ` <49C065E7.8060901-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2009-03-18  5:43                               ` Balbir Singh
2009-03-31  9:02                               ` Paul Menage
     [not found]                                 ` <6599ad830903310202p1c268237lff283b2676f78864-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-04-01  6:44                                   ` Li Zefan
2009-03-12 10:51   ` [PATCH 7/7] [RFC] Example multi-bindable subsystem: a per-cgroup notes field Paul Menage
     [not found]     ` <20090312105153.24154.29389.stgit-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2009-03-17  6:46       ` Li Zefan
2009-03-16  1:10   ` [PATCH 0/7][RFC] CGroup hierarchy extensions KAMEZAWA Hiroyuki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.