linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andrea Righi <righi.andrea@gmail.com>
To: Vivek Goyal <vgoyal@redhat.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Paul Menage <menage@google.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: linux kernel mailing list <linux-kernel@vger.kernel.org>,
	Dhaval Giani <dhaval@linux.vnet.ibm.com>,
	Kazunaga Ikeno <k-ikeno@ak.jp.nec.com>,
	Morton Andrew Morton <akpm@linux-foundation.org>,
	Thomas Graf <tgraf@redhat.com>,
	Ulrich Drepper <drepper@redhat.com>
Subject: [RFC] [PATCH -mm] cgroup: uid-based rules to add processes efficiently in the right cgroup
Date: Sun, 17 Aug 2008 12:33:31 +0200	[thread overview]
Message-ID: <48A7FE7B.3060309@gmail.com> (raw)
In-Reply-To: <20080714152142.GJ16673@redhat.com>

The problem of placing tasks in respective cgroups seems to be correctly
addressed by userspace lib wrappers or classifier daemons [1].

However, this is an attempt to implement an in-kernel classifier.

[ I wrote this patch for a "special purpose" environment, where a lot of
short-lived processes belonging to different users are spawned by
different daemons, so the main goal here would be to remove the dealy
needed by userspace classification and place the tasks in the right
cgroup at the time they're created. This is just an ugly hack for now
and it works only for uid-based rules, gid-based rules could be
implemented in a similar way. ]

UID:cgroup associations are stored in a RCU-protected hash list.

The kernel<->userspace interface works as following:
 - the file "uids" is added in the cgroup filesystem
 - a UID can be placed only in a single cgroup
 - a cgroup can have multiple UIDs

Respect to the userspace solution (e.g. classifier daemon) this solution
has the advantage of removing the delay for task classification, that
means each task always runs in the appropriate cgroup at the time is
created (fork, exec) or when the uid changes (setuid).

OTOH the disadvantage is to introduce the complexity in the kernel.

[1] http://lkml.org/lkml/2008/7/1/391

Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
---
 include/linux/cgroup.h |    9 +++
 kernel/cgroup.c        |  141 +++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/sys.c           |    6 ++-
 3 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 30934e4..243819a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -393,6 +393,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
+struct cgroup *uid_to_cgroup(uid_t uid);
 
 #else /* !CONFIG_CGROUPS */
 
@@ -411,6 +412,14 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 {
 	return -EINVAL;
 }
+static inline int cgroup_attach_task(struct cgroup *, struct task_struct *)
+{
+	return 0;
+}
+static inline struct cgroup *uid_to_cgroup(uid_t uid)
+{
+	return NULL;
+}
 
 #endif /* !CONFIG_CGROUPS */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 791246a..5a010db 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1318,6 +1318,7 @@ enum cgroup_filetype {
 	FILE_ROOT,
 	FILE_DIR,
 	FILE_TASKLIST,
+	FILE_UIDLIST,
 	FILE_NOTIFY_ON_RELEASE,
 	FILE_RELEASE_AGENT,
 };
@@ -2203,6 +2204,131 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
 	return 0;
 }
 
+#define CGROUP_UID_HASH_SHIFT	9
+#define CGROUP_UID_HASH_SIZE	(1UL << CGROUP_UID_HASH_SHIFT)
+#define cgroup_uid_hashfn(__uid) \
+		hash_long((unsigned long)__uid, CGROUP_UID_HASH_SHIFT)
+
+struct cgroup_uid {
+	uid_t uid;
+	struct cgroup *cgroup;
+	struct hlist_node cgroup_uid_chain;
+};
+
+/* hash list to store uid:cgroup associations (protected by RCU locking) */
+static struct hlist_head *cgroup_uids;
+
+/* spinlock to protect cgroup_uids write operations */
+static __cacheline_aligned DEFINE_SPINLOCK(cgroup_uid_lock);
+
+/*
+ * Note: called with rcu_read_lock() held.
+ */
+static struct cgroup_uid *cgroup_uid_find_item(uid_t uid)
+{
+	struct hlist_node *item;
+	struct cgroup_uid *u;
+
+	hlist_for_each_entry_rcu(u, item, &cgroup_uids[cgroup_uid_hashfn(uid)],
+			cgroup_uid_chain)
+		if (u->uid == uid)
+			return u;
+	return NULL;
+}
+
+struct cgroup *uid_to_cgroup(uid_t uid)
+{
+	struct cgroup_uid *cu;
+	struct cgroup *ret;
+
+	rcu_read_lock();
+	cu = cgroup_uid_find_item(uid);
+	ret = cu ? cu->cgroup : NULL;
+	rcu_read_unlock();
+	return ret;
+}
+
+static int cgroup_uid_read(struct cgroup *cgrp, struct cftype *cft,
+				struct seq_file *m)
+{
+	struct hlist_node *item;
+	struct cgroup_uid *u;
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < CGROUP_UID_HASH_SIZE; i++)
+		hlist_for_each_entry_rcu(u, item, &cgroup_uids[i],
+				cgroup_uid_chain)
+			if (u->cgroup == cgrp)
+				seq_printf(m, "%u\n", u->uid);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int cgroup_uid_write(struct cgroup *cgrp, struct cftype *cft, u64 uid)
+{
+	struct cgroup_uid *u, *old_u;
+
+	u = kmalloc(sizeof(*u), GFP_KERNEL);
+	if (unlikely(!u))
+		return -ENOMEM;
+	u->uid = (uid_t)uid;
+	u->cgroup = cgrp;
+
+	spin_lock_irq(&cgroup_uid_lock);
+	old_u = cgroup_uid_find_item(uid);
+	if (old_u) {
+		/* Replace old element with newer */
+		hlist_replace_rcu(&old_u->cgroup_uid_chain,
+				&u->cgroup_uid_chain);
+		spin_unlock_irq(&cgroup_uid_lock);
+		synchronize_rcu();
+		kfree(old_u);
+		return 0;
+	}
+	/* Add the new element to the cgroup uid hash list */
+	hlist_add_head_rcu(&u->cgroup_uid_chain,
+			&cgroup_uids[cgroup_uid_hashfn(uid)]);
+	spin_unlock_irq(&cgroup_uid_lock);
+	return 0;
+}
+
+static int cgroup_uid_cleanup(struct cgroup *cgrp)
+{
+	HLIST_HEAD(old_items);
+	struct hlist_node *item, *n;
+	struct cgroup_uid *u;
+	int i;
+
+	spin_lock_irq(&cgroup_uid_lock);
+	for (i = 0; i < CGROUP_UID_HASH_SIZE; i++)
+		hlist_for_each_entry_safe(u, item, n, &cgroup_uids[i],
+				cgroup_uid_chain)
+			if (u->cgroup == cgrp) {
+				hlist_del_rcu(&u->cgroup_uid_chain);
+				hlist_add_head(&u->cgroup_uid_chain,
+						&old_items);
+			}
+	spin_unlock_irq(&cgroup_uid_lock);
+	synchronize_rcu();
+	hlist_for_each_entry_safe(u, item, n, &old_items, cgroup_uid_chain)
+		kfree(u);
+	return 0;
+}
+
+static int __init init_cgroup_uid(void)
+{
+	int i;
+
+	cgroup_uids = kmalloc(sizeof(*cgroup_uids) * CGROUP_UID_HASH_SIZE,
+				GFP_KERNEL);
+	if (unlikely(!cgroup_uids))
+		return -ENOMEM;
+	for (i = 0; i < CGROUP_UID_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&cgroup_uids[i]);
+	return 0;
+}
+
 /*
  * for the common functions, 'private' gives the type of file
  */
@@ -2215,7 +2341,12 @@ static struct cftype files[] = {
 		.release = cgroup_tasks_release,
 		.private = FILE_TASKLIST,
 	},
-
+	{
+		.name = "uids",
+		.read_seq_string = cgroup_uid_read,
+		.write_u64 = cgroup_uid_write,
+		.private = FILE_UIDLIST,
+	},
 	{
 		.name = "notify_on_release",
 		.read_u64 = cgroup_read_notify_on_release,
@@ -2434,6 +2565,8 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 		return -EBUSY;
 	}
 
+	cgroup_uid_cleanup(cgrp);
+
 	spin_lock(&release_list_lock);
 	set_bit(CGRP_REMOVED, &cgrp->flags);
 	if (!list_empty(&cgrp->release_list))
@@ -2550,6 +2683,8 @@ int __init cgroup_init(void)
 	if (err)
 		return err;
 
+	init_cgroup_uid();
+
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 		if (!ss->early_init)
@@ -2700,11 +2835,15 @@ static struct file_operations proc_cgroupstats_operations = {
  */
 void cgroup_fork(struct task_struct *child)
 {
+	struct cgroup *cgrp = uid_to_cgroup(child->uid);
+
 	task_lock(current);
 	child->cgroups = current->cgroups;
 	get_css_set(child->cgroups);
 	task_unlock(current);
 	INIT_LIST_HEAD(&child->cg_list);
+	if (cgrp)
+		cgroup_attach_task(cgrp, child);
 }
 
 /**
diff --git a/kernel/sys.c b/kernel/sys.c
index c018580..d22e815 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -19,6 +19,7 @@
 #include <linux/kexec.h>
 #include <linux/workqueue.h>
 #include <linux/capability.h>
+#include <linux/cgroup.h>
 #include <linux/device.h>
 #include <linux/key.h>
 #include <linux/times.h>
@@ -548,10 +549,11 @@ asmlinkage long sys_setgid(gid_t gid)
 	proc_id_connector(current, PROC_EVENT_GID);
 	return 0;
 }
-  
+
 static int set_user(uid_t new_ruid, int dumpclear)
 {
 	struct user_struct *new_user;
+	struct cgroup *cgrp = uid_to_cgroup(new_ruid);
 
 	new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
 	if (!new_user)
@@ -571,6 +573,8 @@ static int set_user(uid_t new_ruid, int dumpclear)
 		smp_wmb();
 	}
 	current->uid = new_ruid;
+	if (cgrp)
+		cgroup_attach_task(cgrp, current);
 	return 0;
 }
 

  parent reply	other threads:[~2008-08-17 10:33 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-01 19:11 [RFC] How to handle the rules engine for cgroups Vivek Goyal
2008-07-02  9:33 ` Kazunaga Ikeno
2008-07-03  1:19 ` KAMEZAWA Hiroyuki
2008-07-03 15:54   ` Vivek Goyal
2008-07-04  0:34     ` KAMEZAWA Hiroyuki
2008-07-04  3:17     ` Li Zefan
2008-07-08  9:35     ` Balbir Singh
2008-07-08 13:45       ` Vivek Goyal
2008-07-10  9:23     ` Paul Menage
2008-07-10 14:30       ` Vivek Goyal
2008-07-10 15:42         ` Dhaval Giani
2008-07-10 16:51         ` Paul Menage
2008-07-10 14:48       ` Rik van Riel
2008-07-10 15:40         ` Vivek Goyal
2008-07-10 15:56           ` Ulrich Drepper
2008-07-10 17:25             ` Rik van Riel
2008-07-10 17:39               ` Ulrich Drepper
2008-07-10 18:41                 ` Vivek Goyal
2008-07-10 22:29                   ` Ulrich Drepper
2008-07-11  0:55           ` KAMEZAWA Hiroyuki
2008-07-14 13:57             ` Vivek Goyal
2008-07-14 14:44               ` David Collier-Brown
2008-07-14 15:21                 ` Vivek Goyal
2008-07-17  7:05                   ` Kazunaga Ikeno
2008-07-17 13:47                     ` Vivek Goyal
     [not found]                       ` <20080717170717.GA3718@linux.vnet.ibm.com>
2008-07-18  8:12                         ` [Libcg-devel] " Dhaval Giani
2008-07-18 20:12                           ` Vivek Goyal
2008-08-17 10:33                   ` Andrea Righi [this message]
2008-08-18 12:35                     ` [RFC] [PATCH -mm] cgroup: uid-based rules to add processes efficiently in the right cgroup Vivek Goyal
2008-08-19 14:35                       ` righi.andrea
2008-08-18 21:05                     ` Paul Menage
2008-08-19 12:57                       ` Vivek Goyal
2008-08-26  0:54                         ` Paul Menage
2008-08-26 13:41                           ` Vivek Goyal
2008-08-26 14:35                             ` Balbir Singh
2008-08-26 15:04                               ` David Collier-Brown
2008-08-26 16:00                                 ` Vivek Goyal
2008-08-26 16:32                                   ` David Collier-Brown
2008-08-26 16:08                               ` Vivek Goyal
2008-09-04 18:25                             ` Paul Menage
2008-08-19 15:12                       ` righi.andrea
2008-08-26  0:55                         ` Paul Menage
2008-07-14 15:07             ` Re: [RFC] How to handle the rules engine for cgroups kamezawa.hiroyu
2008-07-10  9:07 ` Paul Menage
2008-07-10 14:06   ` Vivek Goyal
2008-07-10 16:41     ` Paul Menage
2008-07-10 17:19       ` Vivek Goyal
2008-07-10 17:27         ` [Libcg-devel] " Dhaval Giani
2008-07-10 14:33   ` Vivek Goyal
2008-07-10 16:46     ` Paul Menage
2008-07-10 17:18       ` [Libcg-devel] " Dhaval Giani
2008-07-10 17:30         ` Paul Menage
2008-07-10 17:44           ` Dhaval Giani
2008-07-10 15:49   ` Dhaval Giani
2008-07-18  9:52 ` KAMEZAWA Hiroyuki
2008-07-18 15:46   ` Paul Menage
2008-07-18 16:39   ` Balbir Singh
2008-07-18 18:55     ` Vivek Goyal
2008-07-18 23:05   ` kamezawa.hiroyu
2008-07-18 23:10   ` kamezawa.hiroyu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48A7FE7B.3060309@gmail.com \
    --to=righi.andrea@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=dhaval@linux.vnet.ibm.com \
    --cc=drepper@redhat.com \
    --cc=k-ikeno@ak.jp.nec.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=tgraf@redhat.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).