All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch -mm 1/4] mqueue namespace : add struct mq_namespace
       [not found] <20071128163728.177495768@fr.ibm.com>
@ 2007-11-28 16:37 ` Cedric Le Goater
  2007-11-28 16:37 ` [patch -mm 2/4] mqueue namespace : add unshare support Cedric Le Goater
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-28 16:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Containers, Cedric Le Goater, Eric W. Biederman,
	Linux Kernel Mailing List, Pavel Emelianov

[-- Attachment #1: mq_namespace-add-mq_namespace.patch --]
[-- Type: text/plain, Size: 13443 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

This patch adds a struct mq_namespace holding the common attributes 
of the mqueue namespace. 

The current code is modified to use the default mqueue namespace 
object 'init_mq_ns' and to prepare the ground for futur dynamic 
objects.

A new option CONFIG_MQ_NS protects configuration not using 
namespaces.

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 include/linux/mq_namespace.h |   67 +++++++++++++++++++++++++++
 init/Kconfig                 |    9 +++
 ipc/Makefile                 |    1 
 ipc/mq_namespace.c           |   23 +++++++++
 ipc/mqueue.c                 |  103 ++++++++++++++++++++++---------------------
 5 files changed, 155 insertions(+), 48 deletions(-)

Index: 2.6.24-rc3-mm2/include/linux/mq_namespace.h
===================================================================
--- /dev/null
+++ 2.6.24-rc3-mm2/include/linux/mq_namespace.h
@@ -0,0 +1,67 @@
+#ifndef _LINUX_MQ_NAMESPACE_H
+#define _LINUX_MQ_NAMESPACE_H
+
+#include <linux/kref.h>
+
+struct vfsmount;
+
+struct mq_namespace {
+	struct kref	kref;
+	struct vfsmount *mnt;
+
+	unsigned int	queues_count;
+	unsigned int	queues_max;
+	unsigned int	msg_max;
+	unsigned int	msgsize_max;
+};
+
+extern struct mq_namespace init_mq_ns;
+
+/* default values */
+#define DFLT_QUEUESMAX	256	/* max number of message queues */
+#define DFLT_MSGMAX 	10	/* max number of messages in each queue */
+#define HARD_MSGMAX 	(131072/sizeof(void *))
+#define DFLT_MSGSIZEMAX 8192	/* max message size */
+
+#ifdef CONFIG_POSIX_MQUEUE
+#define INIT_MQ_NS(ns)		.ns		= &init_mq_ns,
+#else
+#define INIT_MQ_NS(ns)
+#endif
+
+#if defined(CONFIG_POSIX_MQUEUE) && defined(CONFIG_MQ_NS)
+static inline struct mq_namespace *get_mq_ns(struct mq_namespace *ns)
+{
+	if (ns)
+		kref_get(&ns->kref);
+	return ns;
+}
+
+extern struct mq_namespace *copy_mq_ns(unsigned long flags,
+				struct mq_namespace *old_ns);
+extern void free_mq_ns(struct kref *kref);
+
+static inline void put_mq_ns(struct mq_namespace *ns)
+{
+	if (ns)
+		kref_put(&ns->kref, free_mq_ns);
+}
+
+#else
+
+static inline struct mq_namespace *get_mq_ns(struct mq_namespace *ns)
+{
+	return ns;
+}
+
+static inline struct mq_namespace *copy_mq_ns(unsigned long flags,
+					struct mq_namespace *old_ns)
+{
+	return old_ns;
+}
+
+static inline void put_mq_ns(struct mq_namespace *ns) { }
+
+#endif /* CONFIG_POSIX_MQUEUE */
+
+#endif /* _LINUX_MQ_H */
Index: 2.6.24-rc3-mm2/ipc/mqueue.c
===================================================================
--- 2.6.24-rc3-mm2.orig/ipc/mqueue.c
+++ 2.6.24-rc3-mm2/ipc/mqueue.c
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/nsproxy.h>
 #include <linux/pid.h>
+#include <linux/mq_namespace.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -46,13 +47,6 @@
 #define STATE_PENDING	1
 #define STATE_READY	2
 
-/* default values */
-#define DFLT_QUEUESMAX	256	/* max number of message queues */
-#define DFLT_MSGMAX 	10	/* max number of messages in each queue */
-#define HARD_MSGMAX 	(131072/sizeof(void*))
-#define DFLT_MSGSIZEMAX 8192	/* max message size */
-
-
 struct ext_wait_queue {		/* queue of sleeping tasks */
 	struct task_struct *task;
 	struct list_head list;
@@ -87,12 +81,18 @@ static void remove_notification(struct m
 
 static spinlock_t mq_lock;
 static struct kmem_cache *mqueue_inode_cachep;
-static struct vfsmount *mqueue_mnt;
 
-static unsigned int queues_count;
-static unsigned int queues_max 	= DFLT_QUEUESMAX;
-static unsigned int msg_max 	= DFLT_MSGMAX;
-static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
+struct mq_namespace init_mq_ns = {
+	.kref = {
+		.refcount = ATOMIC_INIT(2),
+	},
+	.mnt		= NULL,
+	.queues_count	= 0,
+	.queues_max 	= DFLT_QUEUESMAX,
+	.msg_max 	= DFLT_MSGMAX,
+	.msgsize_max	= DFLT_MSGSIZEMAX,
+};
+
 
 static struct ctl_table_header * mq_sysctl_table;
 
@@ -235,6 +235,7 @@ static void mqueue_delete_inode(struct i
 	struct user_struct *user;
 	unsigned long mq_bytes;
 	int i;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	if (S_ISDIR(inode->i_mode)) {
 		clear_inode(inode);
@@ -255,7 +256,7 @@ static void mqueue_delete_inode(struct i
 	if (user) {
 		spin_lock(&mq_lock);
 		user->mq_bytes -= mq_bytes;
-		queues_count--;
+		mq_ns->queues_count--;
 		spin_unlock(&mq_lock);
 		free_uid(user);
 	}
@@ -267,20 +268,22 @@ static int mqueue_create(struct inode *d
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
 	int error;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	spin_lock(&mq_lock);
-	if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) {
+	if (mq_ns->queues_count >= mq_ns->queues_max &&
+		!capable(CAP_SYS_RESOURCE)) {
 		error = -ENOSPC;
 		goto out_lock;
 	}
-	queues_count++;
+	mq_ns->queues_count++;
 	spin_unlock(&mq_lock);
 
 	inode = mqueue_get_inode(dir->i_sb, mode, attr);
 	if (!inode) {
 		error = -ENOMEM;
 		spin_lock(&mq_lock);
-		queues_count--;
+		mq_ns->queues_count--;
 		goto out_lock;
 	}
 
@@ -570,7 +573,7 @@ static void remove_notification(struct m
 	info->notify_owner = NULL;
 }
 
-static int mq_attr_ok(struct mq_attr *attr)
+static int mq_attr_ok(struct mq_namespace *mq_ns, struct mq_attr *attr)
 {
 	if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
 		return 0;
@@ -578,8 +581,8 @@ static int mq_attr_ok(struct mq_attr *at
 		if (attr->mq_maxmsg > HARD_MSGMAX)
 			return 0;
 	} else {
-		if (attr->mq_maxmsg > msg_max ||
-				attr->mq_msgsize > msgsize_max)
+		if (attr->mq_maxmsg > mq_ns->msg_max ||
+				attr->mq_msgsize > mq_ns->msgsize_max)
 			return 0;
 	}
 	/* check for overflow */
@@ -595,8 +598,9 @@ static int mq_attr_ok(struct mq_attr *at
 /*
  * Invoked when creating a new queue via sys_mq_open
  */
-static struct file *do_create(struct dentry *dir, struct dentry *dentry,
-			int oflag, mode_t mode, struct mq_attr __user *u_attr)
+static struct file *do_create(struct mq_namespace *mq_ns, struct dentry *dir,
+			struct dentry *dentry, int oflag, mode_t mode,
+			struct mq_attr __user *u_attr)
 {
 	struct mq_attr attr;
 	int ret;
@@ -606,7 +610,7 @@ static struct file *do_create(struct den
 		if (copy_from_user(&attr, u_attr, sizeof(attr)))
 			goto out;
 		ret = -EINVAL;
-		if (!mq_attr_ok(&attr))
+		if (!mq_attr_ok(mq_ns, &attr))
 			goto out;
 		/* store for use during create */
 		dentry->d_fsdata = &attr;
@@ -618,33 +622,34 @@ static struct file *do_create(struct den
 	if (ret)
 		goto out;
 
-	return dentry_open(dentry, mqueue_mnt, oflag);
+	return dentry_open(dentry, mq_ns->mnt, oflag);
 
 out:
 	dput(dentry);
-	mntput(mqueue_mnt);
+	mntput(mq_ns->mnt);
 	return ERR_PTR(ret);
 }
 
 /* Opens existing queue */
-static struct file *do_open(struct dentry *dentry, int oflag)
+static struct file *do_open(struct mq_namespace *mq_ns, struct dentry *dentry,
+			int oflag)
 {
 static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
 					MAY_READ | MAY_WRITE };
 
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
 		dput(dentry);
-		mntput(mqueue_mnt);
+		mntput(mq_ns->mnt);
 		return ERR_PTR(-EINVAL);
 	}
 
 	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
 		dput(dentry);
-		mntput(mqueue_mnt);
+		mntput(mq_ns->mnt);
 		return ERR_PTR(-EACCES);
 	}
 
-	return dentry_open(dentry, mqueue_mnt, oflag);
+	return dentry_open(dentry, mq_ns->mnt, oflag);
 }
 
 asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
@@ -654,6 +659,7 @@ asmlinkage long sys_mq_open(const char _
 	struct file *filp;
 	char *name;
 	int fd, error;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	error = audit_mq_open(oflag, mode, u_attr);
 	if (error != 0)
@@ -666,13 +672,13 @@ asmlinkage long sys_mq_open(const char _
 	if (fd < 0)
 		goto out_putname;
 
-	mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
-	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+	mutex_lock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
+	dentry = lookup_one_len(name, mq_ns->mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
 		goto out_err;
 	}
-	mntget(mqueue_mnt);
+	mntget(mq_ns->mnt);
 
 	if (oflag & O_CREAT) {
 		if (dentry->d_inode) {	/* entry already exists */
@@ -680,12 +686,12 @@ asmlinkage long sys_mq_open(const char _
 			error = -EEXIST;
 			if (oflag & O_EXCL)
 				goto out;
-			filp = do_open(dentry, oflag);
+			filp = do_open(mq_ns, dentry, oflag);
 		} else {
-			error = mnt_want_write(mqueue_mnt);
+			error = mnt_want_write(mq_ns->mnt);
 			if (error)
 				goto out;
-			filp = do_create(mqueue_mnt->mnt_root, dentry,
+			filp = do_create(mq_ns, mq_ns->mnt->mnt_root, dentry,
 						oflag, mode, u_attr);
 		}
 	} else {
@@ -693,7 +699,7 @@ asmlinkage long sys_mq_open(const char _
 		if (!dentry->d_inode)
 			goto out;
 		audit_inode(name, dentry);
-		filp = do_open(dentry, oflag);
+		filp = do_open(mq_ns, dentry, oflag);
 	}
 
 	if (IS_ERR(filp)) {
@@ -707,13 +713,13 @@ asmlinkage long sys_mq_open(const char _
 
 out:
 	dput(dentry);
-	mntput(mqueue_mnt);
+	mntput(mq_ns->mnt);
 out_putfd:
 	put_unused_fd(fd);
 out_err:
 	fd = error;
 out_upsem:
-	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+	mutex_unlock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
 out_putname:
 	putname(name);
 	return fd;
@@ -725,14 +731,15 @@ asmlinkage long sys_mq_unlink(const char
 	char *name;
 	struct dentry *dentry;
 	struct inode *inode = NULL;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	name = getname(u_name);
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
-	mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex,
+	mutex_lock_nested(&mq_ns->mnt->mnt_root->d_inode->i_mutex,
 			I_MUTEX_PARENT);
-	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+	dentry = lookup_one_len(name, mq_ns->mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
 		goto out_unlock;
@@ -746,16 +753,16 @@ asmlinkage long sys_mq_unlink(const char
 	inode = dentry->d_inode;
 	if (inode)
 		atomic_inc(&inode->i_count);
-	err = mnt_want_write(mqueue_mnt);
+	err = mnt_want_write(mq_ns->mnt);
 	if (err)
 		goto out_err;
 	err = vfs_unlink(dentry->d_parent->d_inode, dentry);
-	mnt_drop_write(mqueue_mnt);
+	mnt_drop_write(mq_ns->mnt);
 out_err:
 	dput(dentry);
 
 out_unlock:
-	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+	mutex_unlock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
 	putname(name);
 	if (inode)
 		iput(inode);
@@ -1204,14 +1211,14 @@ static int msg_maxsize_limit_max = INT_M
 static ctl_table mq_sysctls[] = {
 	{
 		.procname	= "queues_max",
-		.data		= &queues_max,
+		.data		= &init_mq_ns.queues_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.procname	= "msg_max",
-		.data		= &msg_max,
+		.data		= &init_mq_ns.msg_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -1220,7 +1227,7 @@ static ctl_table mq_sysctls[] = {
 	},
 	{
 		.procname	= "msgsize_max",
-		.data		= &msgsize_max,
+		.data		= &init_mq_ns.msgsize_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -1266,13 +1273,13 @@ static int __init init_mqueue_fs(void)
 	if (error)
 		goto out_sysctl;
 
-	if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
-		error = PTR_ERR(mqueue_mnt);
+	init_mq_ns.mnt = kern_mount(&mqueue_fs_type);
+	if (IS_ERR(init_mq_ns.mnt)) {
+		error = PTR_ERR(init_mq_ns.mnt);
 		goto out_filesystem;
 	}
 
 	/* internal initialization - not common for vfs */
-	queues_count = 0;
 	spin_lock_init(&mq_lock);
 
 	return 0;
Index: 2.6.24-rc3-mm2/init/Kconfig
===================================================================
--- 2.6.24-rc3-mm2.orig/init/Kconfig
+++ 2.6.24-rc3-mm2/init/Kconfig
@@ -426,6 +426,15 @@ config PID_NS
 	  Unless you want to work with an experimental feature
 	  say N here.
 
+config MQ_NS
+       bool "POSIX Message Queues namespace"
+       depends on NAMESPACES && POSIX_MQUEUE
+       help
+	 Support for POSIX Message Queues namespaces. This allows
+	 having different POSIX Message Queues filesystems containing
+	 message queues with the same name. Yet another a building
+	 block of containers.
+
 config BLK_DEV_INITRD
 	bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
 	depends on BROKEN || !FRV
Index: 2.6.24-rc3-mm2/ipc/Makefile
===================================================================
--- 2.6.24-rc3-mm2.orig/ipc/Makefile
+++ 2.6.24-rc3-mm2/ipc/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysc
 obj_mq-$(CONFIG_COMPAT) += compat_mq.o
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
 obj-$(CONFIG_IPC_NS) += namespace.o
+obj-$(CONFIG_MQ_NS) += mq_namespace.o
 
Index: 2.6.24-rc3-mm2/ipc/mq_namespace.c
===================================================================
--- /dev/null
+++ 2.6.24-rc3-mm2/ipc/mq_namespace.c
@@ -0,0 +1,23 @@
+/*
+ *  Copyright (C) 2007 IBM Corporation
+ *
+ *  Author: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ */
+
+#include <linux/mq_namespace.h>
+
+struct mq_namespace *copy_mq_ns(unsigned long flags,
+				struct mq_namespace *old_ns)
+{
+	BUG_ON(!old_ns);
+	return get_mq_ns(old_ns);
+}
+
+void free_mq_ns(struct kref *kref)
+{
+}

-- 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [patch -mm 2/4] mqueue namespace : add unshare support
       [not found] <20071128163728.177495768@fr.ibm.com>
  2007-11-28 16:37 ` [patch -mm 1/4] mqueue namespace : add struct mq_namespace Cedric Le Goater
@ 2007-11-28 16:37 ` Cedric Le Goater
  2007-11-28 16:37 ` [patch -mm 3/4] mqueue namespace : enable the mqueue namespace Cedric Le Goater
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-28 16:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Containers, Cedric Le Goater, Eric W. Biederman,
	Linux Kernel Mailing List, Pavel Emelianov

[-- Attachment #1: mq_namespace-add-mq_namespace-to-nsproxy.patch --]
[-- Type: text/plain, Size: 7297 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

This patch includes the mqueue namespace in the nsproxy object. It  
also adds the support of unshare() and clone() with a new clone flag 
CLONE_NEWMQ (1 bit left in the clone flags !)

CLONE_NEWMQ is required to be cloned or unshared along with CLONE_NEWNS.
This is to make sure that no user mounts of the internal mqueue fs
are left behind when the last task exits. 

It's totally harmless for the moment because the current code still 
uses the default mqueue namespace object 'init_mq_ns' 

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 include/linux/init_task.h    |    2 ++
 include/linux/mq_namespace.h |    4 ++++
 include/linux/nsproxy.h      |    2 ++
 include/linux/sched.h        |    1 +
 ipc/mq_namespace.c           |   36 +++++++++++++++++++++++++++++++++++-
 kernel/fork.c                |   15 ++++++++++++++-
 kernel/nsproxy.c             |   16 ++++++++++++++--
 7 files changed, 72 insertions(+), 4 deletions(-)

Index: 2.6.24-rc3-mm2/include/linux/init_task.h
===================================================================
--- 2.6.24-rc3-mm2.orig/include/linux/init_task.h
+++ 2.6.24-rc3-mm2/include/linux/init_task.h
@@ -10,6 +10,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <net/net_namespace.h>
+#include <linux/mq_namespace.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy;
 	INIT_NET_NS(net_ns)                                             \
 	INIT_IPC_NS(ipc_ns)						\
 	.user_ns	= &init_user_ns,				\
+	INIT_MQ_NS(mq_ns)						\
 }
 
 #define INIT_SIGHAND(sighand) {						\
Index: 2.6.24-rc3-mm2/include/linux/sched.h
===================================================================
--- 2.6.24-rc3-mm2.orig/include/linux/sched.h
+++ 2.6.24-rc3-mm2/include/linux/sched.h
@@ -27,6 +27,7 @@
 #define CLONE_NEWUSER		0x10000000	/* New user namespace */
 #define CLONE_NEWPID		0x20000000	/* New pid namespace */
 #define CLONE_NEWNET		0x40000000	/* New network namespace */
+#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */
 
 /*
  * Scheduling policies
Index: 2.6.24-rc3-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.24-rc3-mm2.orig/kernel/nsproxy.c
+++ 2.6.24-rc3-mm2/kernel/nsproxy.c
@@ -93,8 +93,17 @@ static struct nsproxy *create_new_namesp
 		goto out_net;
 	}
 
+	new_nsp->mq_ns = copy_mq_ns(flags, tsk->nsproxy->mq_ns);
+	if (IS_ERR(new_nsp->mq_ns)) {
+		err = PTR_ERR(new_nsp->mq_ns);
+		goto out_mq;
+	}
+
 	return new_nsp;
 
+out_mq:
+	if (new_nsp->user_ns)
+		put_user_ns(new_nsp->user_ns);
 out_net:
 	if (new_nsp->user_ns)
 		put_user_ns(new_nsp->user_ns);
@@ -131,7 +140,8 @@ int copy_namespaces(unsigned long flags,
 	get_nsproxy(old_ns);
 
 	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-				CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
+				CLONE_NEWUSER | CLONE_NEWPID |
+				CLONE_NEWNET | CLONE_NEWMQ)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -170,6 +180,8 @@ void free_nsproxy(struct nsproxy *ns)
 		put_pid_ns(ns->pid_ns);
 	if (ns->user_ns)
 		put_user_ns(ns->user_ns);
+	if (ns->mq_ns)
+		put_mq_ns(ns->mq_ns);
 	put_net(ns->net_ns);
 	kmem_cache_free(nsproxy_cachep, ns);
 }
@@ -184,7 +196,7 @@ int unshare_nsproxy_namespaces(unsigned 
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWUSER | CLONE_NEWNET)))
+			       CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWMQ)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN))
Index: 2.6.24-rc3-mm2/kernel/fork.c
===================================================================
--- 2.6.24-rc3-mm2.orig/kernel/fork.c
+++ 2.6.24-rc3-mm2/kernel/fork.c
@@ -1004,6 +1004,13 @@ static struct task_struct *copy_process(
 	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
 		return ERR_PTR(-EINVAL);
 
+	/*
+	 * mount namespace cannot be unshared when the mqueue
+	 * namespace is not
+	 */
+	if ((clone_flags & CLONE_NEWMQ) && !(clone_flags & CLONE_NEWNS))
+		return ERR_PTR(-EINVAL);
+
 	retval = security_task_create(clone_flags);
 	if (retval)
 		goto fork_out;
@@ -1570,6 +1577,12 @@ static void check_unshare_flags(unsigned
 		*flags_ptr |= CLONE_THREAD;
 
 	/*
+	 * If unsharing mqueue namespace, must also unshare mnt namespace.
+	 */
+	if (*flags_ptr & CLONE_NEWMQ)
+		*flags_ptr |= CLONE_NEWNS;
+
+	/*
 	 * If unsharing namespace, must also unshare filesystem information.
 	 */
 	if (*flags_ptr & CLONE_NEWNS)
@@ -1687,7 +1700,7 @@ asmlinkage long sys_unshare(unsigned lon
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
-				CLONE_NEWNET))
+				CLONE_NEWNET|CLONE_NEWMQ))
 		goto bad_unshare_out;
 
 	if ((err = unshare_thread(unshare_flags)))
Index: 2.6.24-rc3-mm2/include/linux/nsproxy.h
===================================================================
--- 2.6.24-rc3-mm2.orig/include/linux/nsproxy.h
+++ 2.6.24-rc3-mm2/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
 struct uts_namespace;
 struct ipc_namespace;
 struct pid_namespace;
+struct mq_namespace;
 
 /*
  * A structure to contain pointers to all per-process
@@ -29,6 +30,7 @@ struct nsproxy {
 	struct pid_namespace *pid_ns;
 	struct user_namespace *user_ns;
 	struct net 	     *net_ns;
+	struct mq_namespace *mq_ns;
 };
 extern struct nsproxy init_nsproxy;
 
Index: 2.6.24-rc3-mm2/ipc/mq_namespace.c
===================================================================
--- 2.6.24-rc3-mm2.orig/ipc/mq_namespace.c
+++ 2.6.24-rc3-mm2/ipc/mq_namespace.c
@@ -10,14 +10,48 @@
  */
 
 #include <linux/mq_namespace.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+
+static struct mq_namespace *clone_mq_ns(struct mq_namespace *old_ns)
+{
+	struct mq_namespace *mq_ns;
+
+	mq_ns = kmalloc(sizeof(struct mq_namespace), GFP_KERNEL);
+	if (!mq_ns)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&mq_ns->kref);
+	mq_ns->queues_count	= 0;
+	mq_ns->queues_max	= DFLT_QUEUESMAX;
+	mq_ns->msg_max		= DFLT_MSGMAX;
+	mq_ns->msgsize_max	= DFLT_MSGSIZEMAX;
+	mq_ns->mnt		= NULL;
+	return mq_ns;
+}
 
 struct mq_namespace *copy_mq_ns(unsigned long flags,
 				struct mq_namespace *old_ns)
 {
+	struct mq_namespace *mq_ns;
+
 	BUG_ON(!old_ns);
-	return get_mq_ns(old_ns);
+	get_mq_ns(old_ns);
+
+	if (!(flags & CLONE_NEWMQ))
+		return old_ns;
+
+	mq_ns = clone_mq_ns(old_ns);
+
+	put_mq_ns(old_ns);
+	return mq_ns;
 }
 
 void free_mq_ns(struct kref *kref)
 {
+	struct mq_namespace *ns;
+
+	ns = container_of(kref, struct mq_namespace, kref);
+	kfree(ns);
 }
Index: 2.6.24-rc3-mm2/include/linux/mq_namespace.h
===================================================================
--- 2.6.24-rc3-mm2.orig/include/linux/mq_namespace.h
+++ 2.6.24-rc3-mm2/include/linux/mq_namespace.h
@@ -2,6 +2,7 @@
 #define _LINUX_MQ_NAMESPACE_H
 
 #include <linux/kref.h>
+#include <linux/err.h>
 
 struct vfsmount;
 
@@ -57,6 +58,9 @@ static inline struct mq_namespace *get_m
 static inline struct mq_namespace *copy_mq_ns(unsigned long flags,
 					struct mq_namespace *old_ns)
 {
+	if (flags & CLONE_NEWMQ)
+		return ERR_PTR(-EINVAL);
+
 	return old_ns;
 }
 

-- 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [patch -mm 3/4] mqueue namespace : enable the mqueue namespace
       [not found] <20071128163728.177495768@fr.ibm.com>
  2007-11-28 16:37 ` [patch -mm 1/4] mqueue namespace : add struct mq_namespace Cedric Le Goater
  2007-11-28 16:37 ` [patch -mm 2/4] mqueue namespace : add unshare support Cedric Le Goater
@ 2007-11-28 16:37 ` Cedric Le Goater
  2007-11-28 16:37 ` [patch -mm 4/4] mqueue namespace: adapt sysctl Cedric Le Goater
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-28 16:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Containers, Cedric Le Goater, Eric W. Biederman,
	Linux Kernel Mailing List, Pavel Emelianov

[-- Attachment #1: mq_namespace-use-mq_namespace.patch --]
[-- Type: text/plain, Size: 5013 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

Move forward and start using the mqueue namespace.

The single super block mount of the file system is modified to allow 
one mount per namespace. This is achieved by storing the namespace 
in the super_block s_fs_info attribute. 

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 include/linux/mq_namespace.h |    2 ++
 ipc/mq_namespace.c           |    8 +++++++-
 ipc/mqueue.c                 |   42 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 44 insertions(+), 8 deletions(-)

Index: 2.6.24-rc3-mm2/ipc/mqueue.c
===================================================================
--- 2.6.24-rc3-mm2.orig/ipc/mqueue.c
+++ 2.6.24-rc3-mm2/ipc/mqueue.c
@@ -32,6 +32,7 @@
 #include <linux/nsproxy.h>
 #include <linux/pid.h>
 #include <linux/mq_namespace.h>
+#include <linux/parser.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -200,11 +201,38 @@ static int mqueue_fill_super(struct supe
 	return 0;
 }
 
+static int compare_data(struct super_block *sb, void *data)
+{
+	return sb->s_fs_info == data;
+}
+
 static int mqueue_get_sb(struct file_system_type *fs_type,
 			 int flags, const char *dev_name,
 			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
+	struct super_block *s;
+	int error;
+	struct mq_namespace *mq_ns = current->nsproxy->mq_ns;
+
+	if (flags & MS_KERNMOUNT)
+		mq_ns = (struct mq_namespace *) data;
+
+	s = sget(fs_type, compare_data, set_anon_super, mq_ns);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+	if (!s->s_root) {
+		s->s_flags = flags;
+		s->s_fs_info = mq_ns;
+		error = mqueue_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+		if (error) {
+			up_write(&s->s_umount);
+			deactivate_super(s);
+			return error;
+		}
+		s->s_flags |= MS_ACTIVE;
+	}
+	do_remount_sb(s, flags, data, 0);
+	return simple_set_mnt(mnt, s);
 }
 
 static void init_once(struct kmem_cache *cachep, void *foo)
@@ -235,7 +263,7 @@ static void mqueue_delete_inode(struct i
 	struct user_struct *user;
 	unsigned long mq_bytes;
 	int i;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = inode->i_sb->s_fs_info;
 
 	if (S_ISDIR(inode->i_mode)) {
 		clear_inode(inode);
@@ -268,7 +296,7 @@ static int mqueue_create(struct inode *d
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
 	int error;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = dir->i_sb->s_fs_info;
 
 	spin_lock(&mq_lock);
 	if (mq_ns->queues_count >= mq_ns->queues_max &&
@@ -659,7 +687,7 @@ asmlinkage long sys_mq_open(const char _
 	struct file *filp;
 	char *name;
 	int fd, error;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = current->nsproxy->mq_ns;
 
 	error = audit_mq_open(oflag, mode, u_attr);
 	if (error != 0)
@@ -731,7 +759,7 @@ asmlinkage long sys_mq_unlink(const char
 	char *name;
 	struct dentry *dentry;
 	struct inode *inode = NULL;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = current->nsproxy->mq_ns;
 
 	name = getname(u_name);
 	if (IS_ERR(name))
@@ -1196,7 +1224,7 @@ static struct super_operations mqueue_su
 	.drop_inode = generic_delete_inode,
 };
 
-static struct file_system_type mqueue_fs_type = {
+struct file_system_type mqueue_fs_type = {
 	.name = "mqueue",
 	.get_sb = mqueue_get_sb,
 	.kill_sb = kill_litter_super,
@@ -1273,7 +1301,7 @@ static int __init init_mqueue_fs(void)
 	if (error)
 		goto out_sysctl;
 
-	init_mq_ns.mnt = kern_mount(&mqueue_fs_type);
+	init_mq_ns.mnt = kern_mount_data(&mqueue_fs_type, &init_mq_ns);
 	if (IS_ERR(init_mq_ns.mnt)) {
 		error = PTR_ERR(init_mq_ns.mnt);
 		goto out_filesystem;
Index: 2.6.24-rc3-mm2/include/linux/mq_namespace.h
===================================================================
--- 2.6.24-rc3-mm2.orig/include/linux/mq_namespace.h
+++ 2.6.24-rc3-mm2/include/linux/mq_namespace.h
@@ -3,6 +3,7 @@
 
 #include <linux/kref.h>
 #include <linux/err.h>
+#include <linux/fs.h>
 
 struct vfsmount;
 
@@ -17,6 +18,7 @@ struct mq_namespace {
 };
 
 extern struct mq_namespace init_mq_ns;
+extern struct file_system_type mqueue_fs_type;
 
 /* default values */
 #define DFLT_QUEUESMAX	256	/* max number of message queues */
Index: 2.6.24-rc3-mm2/ipc/mq_namespace.c
===================================================================
--- 2.6.24-rc3-mm2.orig/ipc/mq_namespace.c
+++ 2.6.24-rc3-mm2/ipc/mq_namespace.c
@@ -27,7 +27,12 @@ static struct mq_namespace *clone_mq_ns(
 	mq_ns->queues_max	= DFLT_QUEUESMAX;
 	mq_ns->msg_max		= DFLT_MSGMAX;
 	mq_ns->msgsize_max	= DFLT_MSGSIZEMAX;
-	mq_ns->mnt		= NULL;
+	mq_ns->mnt		= kern_mount_data(&mqueue_fs_type, mq_ns);
+	if (IS_ERR(mq_ns->mnt)) {
+		void *error = mq_ns->mnt;
+		kfree(mq_ns);
+		return error;
+	}
 	return mq_ns;
 }
 
@@ -53,5 +58,6 @@ void free_mq_ns(struct kref *kref)
 	struct mq_namespace *ns;
 
 	ns = container_of(kref, struct mq_namespace, kref);
+	mntput(ns->mnt);
 	kfree(ns);
 }

-- 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [patch -mm 4/4] mqueue namespace: adapt sysctl
       [not found] <20071128163728.177495768@fr.ibm.com>
                   ` (2 preceding siblings ...)
  2007-11-28 16:37 ` [patch -mm 3/4] mqueue namespace : enable the mqueue namespace Cedric Le Goater
@ 2007-11-28 16:37 ` Cedric Le Goater
       [not found] ` <20071128163728.177495768-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-28 16:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Containers, Cedric Le Goater, Eric W. Biederman,
	Linux Kernel Mailing List, Pavel Emelianov

[-- Attachment #1: mq_namespace-fix-sysctl.patch --]
[-- Type: text/plain, Size: 6908 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

Largely inspired from ipc/ipc_sysctl.c. This patch isolates the mqueue 
sysctl stuff in its own file.

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 include/linux/mq_namespace.h |   10 +++
 init/Kconfig                 |    6 ++
 ipc/Makefile                 |    1 
 ipc/mq_sysctl.c              |  117 +++++++++++++++++++++++++++++++++++++++++++
 ipc/mqueue.c                 |   56 --------------------
 5 files changed, 135 insertions(+), 55 deletions(-)

Index: 2.6.24-rc3-mm2/ipc/mqueue.c
===================================================================
--- 2.6.24-rc3-mm2.orig/ipc/mqueue.c
+++ 2.6.24-rc3-mm2/ipc/mqueue.c
@@ -1230,60 +1230,6 @@ struct file_system_type mqueue_fs_type =
 	.kill_sb = kill_litter_super,
 };
 
-static int msg_max_limit_min = DFLT_MSGMAX;
-static int msg_max_limit_max = HARD_MSGMAX;
-
-static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
-static int msg_maxsize_limit_max = INT_MAX;
-
-static ctl_table mq_sysctls[] = {
-	{
-		.procname	= "queues_max",
-		.data		= &init_mq_ns.queues_max,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.procname	= "msg_max",
-		.data		= &init_mq_ns.msg_max,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.extra1		= &msg_max_limit_min,
-		.extra2		= &msg_max_limit_max,
-	},
-	{
-		.procname	= "msgsize_max",
-		.data		= &init_mq_ns.msgsize_max,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.extra1		= &msg_maxsize_limit_min,
-		.extra2		= &msg_maxsize_limit_max,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table mq_sysctl_dir[] = {
-	{
-		.procname	= "mqueue",
-		.mode		= 0555,
-		.child		= mq_sysctls,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table mq_sysctl_root[] = {
-	{
-		.ctl_name	= CTL_FS,
-		.procname	= "fs",
-		.mode		= 0555,
-		.child		= mq_sysctl_dir,
-	},
-	{ .ctl_name = 0 }
-};
-
 static int __init init_mqueue_fs(void)
 {
 	int error;
@@ -1295,7 +1241,7 @@ static int __init init_mqueue_fs(void)
 		return -ENOMEM;
 
 	/* ignore failues - they are not fatal */
-	mq_sysctl_table = register_sysctl_table(mq_sysctl_root);
+	mq_sysctl_table = mq_register_sysctl_table();
 
 	error = register_filesystem(&mqueue_fs_type);
 	if (error)
Index: 2.6.24-rc3-mm2/init/Kconfig
===================================================================
--- 2.6.24-rc3-mm2.orig/init/Kconfig
+++ 2.6.24-rc3-mm2/init/Kconfig
@@ -139,6 +139,12 @@ config POSIX_MQUEUE
 
 	  If unsure, say Y.
 
+config POSIX_MQUEUE_SYSCTL
+	bool
+	depends on POSIX_MQUEUE
+	depends on SYSCTL
+	default y
+
 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"
 	help
Index: 2.6.24-rc3-mm2/ipc/Makefile
===================================================================
--- 2.6.24-rc3-mm2.orig/ipc/Makefile
+++ 2.6.24-rc3-mm2/ipc/Makefile
@@ -9,4 +9,5 @@ obj_mq-$(CONFIG_COMPAT) += compat_mq.o
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
 obj-$(CONFIG_IPC_NS) += namespace.o
 obj-$(CONFIG_MQ_NS) += mq_namespace.o
+obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o
 
Index: 2.6.24-rc3-mm2/ipc/mq_sysctl.c
===================================================================
--- /dev/null
+++ 2.6.24-rc3-mm2/ipc/mq_sysctl.c
@@ -0,0 +1,117 @@
+/*
+ *  Copyright (C) 2007 IBM Corporation
+ *
+ *  Author: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ */
+
+#include <linux/nsproxy.h>
+#include <linux/mq_namespace.h>
+#include <linux/sysctl.h>
+
+
+static void *get_mq(ctl_table *table)
+{
+	char *which = table->data;
+	struct mq_namespace *mq_ns = current->nsproxy->mq_ns;
+	which = (which - (char *)&init_mq_ns) + (char *)mq_ns;
+	return which;
+}
+
+#ifdef CONFIG_PROC_FS
+static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table mq_table;
+	memcpy(&mq_table, table, sizeof(mq_table));
+	mq_table.data = get_mq(table);
+
+	return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+}
+
+static int proc_mq_dointvec_minmax(ctl_table *table, int write,
+	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table mq_table;
+	memcpy(&mq_table, table, sizeof(mq_table));
+	mq_table.data = get_mq(table);
+
+	return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+					lenp, ppos);
+}
+#else
+static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+static int proc_mq_dointvec_minmax(ctl_table *table, int write,
+	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+#endif
+
+static int msg_max_limit_min = DFLT_MSGMAX;
+static int msg_max_limit_max = HARD_MSGMAX;
+
+static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
+static int msg_maxsize_limit_max = INT_MAX;
+
+static ctl_table mq_sysctls[] = {
+	{
+		.procname	= "queues_max",
+		.data		= &init_mq_ns.queues_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_mq_dointvec,
+	},
+	{
+		.procname	= "msg_max",
+		.data		= &init_mq_ns.msg_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_mq_dointvec_minmax,
+		.extra1		= &msg_max_limit_min,
+		.extra2		= &msg_max_limit_max,
+	},
+	{
+		.procname	= "msgsize_max",
+		.data		= &init_mq_ns.msgsize_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_mq_dointvec_minmax,
+		.extra1		= &msg_maxsize_limit_min,
+		.extra2		= &msg_maxsize_limit_max,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table mq_sysctl_dir[] = {
+	{
+		.procname	= "mqueue",
+		.mode		= 0555,
+		.child		= mq_sysctls,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table mq_sysctl_root[] = {
+	{
+		.ctl_name	= CTL_FS,
+		.procname	= "fs",
+		.mode		= 0555,
+		.child		= mq_sysctl_dir,
+	},
+	{ .ctl_name = 0 }
+};
+
+struct ctl_table_header *mq_register_sysctl_table(void)
+{
+	return register_sysctl_table(mq_sysctl_root);
+}
Index: 2.6.24-rc3-mm2/include/linux/mq_namespace.h
===================================================================
--- 2.6.24-rc3-mm2.orig/include/linux/mq_namespace.h
+++ 2.6.24-rc3-mm2/include/linux/mq_namespace.h
@@ -70,4 +70,14 @@ static inline void put_mq_ns(struct mq_n
 
 #endif /* CONFIG_POSIX_MQUEUE */
 
+#ifdef CONFIG_POSIX_MQUEUE_SYSCTL
+struct ctl_table_header;
+extern struct ctl_table_header *mq_register_sysctl_table(void);
+#else
+static inline struct ctl_table_header *mq_register_sysctl_table(void)
+{
+	return NULL;
+}
+#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
+
 #endif /* _LINUX_MQ_H */

-- 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
       [not found] <20071128163728.177495768@fr.ibm.com>
@ 2007-11-28 17:28     ` Pavel Emelyanov
  2007-11-28 16:37 ` [patch -mm 2/4] mqueue namespace : add unshare support Cedric Le Goater
                       ` (5 subsequent siblings)
  6 siblings, 0 replies; 25+ messages in thread
From: Pavel Emelyanov @ 2007-11-28 17:28 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Linux Containers, Andrew Morton, Eric W. Biederman,
	Linux Kernel Mailing List, Pavel Emelianov

Cedric Le Goater wrote:
> Hello ! 
> 
> Here's a small patchset introducing a new namespace for POSIX
> message queues. 
> 
> Nothing really complex a part from the mqueue filesystem which 
> needed some special care

Hm... Why did you decided to make it separately from the
IPC namespace?

> Thanks for reviewing !
> 
> C.
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
@ 2007-11-28 17:28     ` Pavel Emelyanov
  0 siblings, 0 replies; 25+ messages in thread
From: Pavel Emelyanov @ 2007-11-28 17:28 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Pavel Emelianov, Eric W. Biederman

Cedric Le Goater wrote:
> Hello ! 
> 
> Here's a small patchset introducing a new namespace for POSIX
> message queues. 
> 
> Nothing really complex a part from the mqueue filesystem which 
> needed some special care

Hm... Why did you decided to make it separately from the
IPC namespace?

> Thanks for reviewing !
> 
> C.
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
       [not found] ` <20071128164349.196734045@fr.ibm.com>
@ 2007-11-28 17:32   ` Pavel Emelyanov
  2007-11-29 10:28     ` Cedric Le Goater
       [not found]     ` <474DA61B.5030301-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
  2007-11-29 15:03   ` Eric W. Biederman
  1 sibling, 2 replies; 25+ messages in thread
From: Pavel Emelyanov @ 2007-11-28 17:32 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Eric W. Biederman

Cedric Le Goater wrote:
> From: Cedric Le Goater <clg@fr.ibm.com>
> 
> This patch includes the mqueue namespace in the nsproxy object. It  
> also adds the support of unshare() and clone() with a new clone flag 
> CLONE_NEWMQ (1 bit left in the clone flags !)
> 
> CLONE_NEWMQ is required to be cloned or unshared along with CLONE_NEWNS.
> This is to make sure that no user mounts of the internal mqueue fs
> are left behind when the last task exits. 
> 
> It's totally harmless for the moment because the current code still 
> uses the default mqueue namespace object 'init_mq_ns' 
> 
> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
> ---
>  include/linux/init_task.h    |    2 ++
>  include/linux/mq_namespace.h |    4 ++++
>  include/linux/nsproxy.h      |    2 ++
>  include/linux/sched.h        |    1 +
>  ipc/mq_namespace.c           |   36 +++++++++++++++++++++++++++++++++++-
>  kernel/fork.c                |   15 ++++++++++++++-
>  kernel/nsproxy.c             |   16 ++++++++++++++--
>  7 files changed, 72 insertions(+), 4 deletions(-)
> 
> Index: 2.6.24-rc3-mm2/include/linux/init_task.h
> ===================================================================
> --- 2.6.24-rc3-mm2.orig/include/linux/init_task.h
> +++ 2.6.24-rc3-mm2/include/linux/init_task.h
> @@ -10,6 +10,7 @@
>  #include <linux/pid_namespace.h>
>  #include <linux/user_namespace.h>
>  #include <net/net_namespace.h>
> +#include <linux/mq_namespace.h>
>  
>  #define INIT_FDTABLE \
>  {							\
> @@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy;
>  	INIT_NET_NS(net_ns)                                             \
>  	INIT_IPC_NS(ipc_ns)						\
>  	.user_ns	= &init_user_ns,				\
> +	INIT_MQ_NS(mq_ns)						\
>  }
>  
>  #define INIT_SIGHAND(sighand) {						\
> Index: 2.6.24-rc3-mm2/include/linux/sched.h
> ===================================================================
> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h
> +++ 2.6.24-rc3-mm2/include/linux/sched.h
> @@ -27,6 +27,7 @@
>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
>  #define CLONE_NEWPID		0x20000000	/* New pid namespace */
>  #define CLONE_NEWNET		0x40000000	/* New network namespace */
> +#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */

That's it :) We've run out of clone flags on 32-bit platforms :(

>  /*
>   * Scheduling policies
> Index: 2.6.24-rc3-mm2/kernel/nsproxy.c
> ===================================================================
> --- 2.6.24-rc3-mm2.orig/kernel/nsproxy.c
> +++ 2.6.24-rc3-mm2/kernel/nsproxy.c
> @@ -93,8 +93,17 @@ static struct nsproxy *create_new_namesp
>  		goto out_net;
>  	}
>  
> +	new_nsp->mq_ns = copy_mq_ns(flags, tsk->nsproxy->mq_ns);
> +	if (IS_ERR(new_nsp->mq_ns)) {
> +		err = PTR_ERR(new_nsp->mq_ns);
> +		goto out_mq;
> +	}
> +
>  	return new_nsp;
>  
> +out_mq:
> +	if (new_nsp->user_ns)
> +		put_user_ns(new_nsp->user_ns);
>  out_net:
>  	if (new_nsp->user_ns)
>  		put_user_ns(new_nsp->user_ns);
> @@ -131,7 +140,8 @@ int copy_namespaces(unsigned long flags,
>  	get_nsproxy(old_ns);
>  
>  	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
> -				CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
> +				CLONE_NEWUSER | CLONE_NEWPID |
> +				CLONE_NEWNET | CLONE_NEWMQ)))
>  		return 0;
>  
>  	if (!capable(CAP_SYS_ADMIN)) {
> @@ -170,6 +180,8 @@ void free_nsproxy(struct nsproxy *ns)
>  		put_pid_ns(ns->pid_ns);
>  	if (ns->user_ns)
>  		put_user_ns(ns->user_ns);
> +	if (ns->mq_ns)
> +		put_mq_ns(ns->mq_ns);
>  	put_net(ns->net_ns);
>  	kmem_cache_free(nsproxy_cachep, ns);
>  }
> @@ -184,7 +196,7 @@ int unshare_nsproxy_namespaces(unsigned 
>  	int err = 0;
>  
>  	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
> -			       CLONE_NEWUSER | CLONE_NEWNET)))
> +			       CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWMQ)))
>  		return 0;
>  
>  	if (!capable(CAP_SYS_ADMIN))
> Index: 2.6.24-rc3-mm2/kernel/fork.c
> ===================================================================
> --- 2.6.24-rc3-mm2.orig/kernel/fork.c
> +++ 2.6.24-rc3-mm2/kernel/fork.c
> @@ -1004,6 +1004,13 @@ static struct task_struct *copy_process(
>  	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
>  		return ERR_PTR(-EINVAL);
>  
> +	/*
> +	 * mount namespace cannot be unshared when the mqueue
> +	 * namespace is not

vice versa - mqueue namespace cannot be unshared when the mount one is not ;)

> +	 */
> +	if ((clone_flags & CLONE_NEWMQ) && !(clone_flags & CLONE_NEWNS))
> +		return ERR_PTR(-EINVAL);
> +
>  	retval = security_task_create(clone_flags);
>  	if (retval)
>  		goto fork_out;
> @@ -1570,6 +1577,12 @@ static void check_unshare_flags(unsigned
>  		*flags_ptr |= CLONE_THREAD;
>  
>  	/*
> +	 * If unsharing mqueue namespace, must also unshare mnt namespace.
> +	 */
> +	if (*flags_ptr & CLONE_NEWMQ)
> +		*flags_ptr |= CLONE_NEWNS;
> +
> +	/*
>  	 * If unsharing namespace, must also unshare filesystem information.
>  	 */
>  	if (*flags_ptr & CLONE_NEWNS)
> @@ -1687,7 +1700,7 @@ asmlinkage long sys_unshare(unsigned lon
>  	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
>  				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
>  				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
> -				CLONE_NEWNET))
> +				CLONE_NEWNET|CLONE_NEWMQ))
>  		goto bad_unshare_out;
>  
>  	if ((err = unshare_thread(unshare_flags)))
> Index: 2.6.24-rc3-mm2/include/linux/nsproxy.h
> ===================================================================
> --- 2.6.24-rc3-mm2.orig/include/linux/nsproxy.h
> +++ 2.6.24-rc3-mm2/include/linux/nsproxy.h
> @@ -8,6 +8,7 @@ struct mnt_namespace;
>  struct uts_namespace;
>  struct ipc_namespace;
>  struct pid_namespace;
> +struct mq_namespace;
>  
>  /*
>   * A structure to contain pointers to all per-process
> @@ -29,6 +30,7 @@ struct nsproxy {
>  	struct pid_namespace *pid_ns;
>  	struct user_namespace *user_ns;
>  	struct net 	     *net_ns;
> +	struct mq_namespace *mq_ns;
>  };
>  extern struct nsproxy init_nsproxy;
>  
> Index: 2.6.24-rc3-mm2/ipc/mq_namespace.c
> ===================================================================
> --- 2.6.24-rc3-mm2.orig/ipc/mq_namespace.c
> +++ 2.6.24-rc3-mm2/ipc/mq_namespace.c
> @@ -10,14 +10,48 @@
>   */
>  
>  #include <linux/mq_namespace.h>
> +#include <linux/slab.h>
> +#include <linux/sched.h>
> +#include <linux/err.h>
> +
> +static struct mq_namespace *clone_mq_ns(struct mq_namespace *old_ns)
> +{
> +	struct mq_namespace *mq_ns;
> +
> +	mq_ns = kmalloc(sizeof(struct mq_namespace), GFP_KERNEL);
> +	if (!mq_ns)
> +		return ERR_PTR(-ENOMEM);
> +
> +	kref_init(&mq_ns->kref);
> +	mq_ns->queues_count	= 0;
> +	mq_ns->queues_max	= DFLT_QUEUESMAX;
> +	mq_ns->msg_max		= DFLT_MSGMAX;
> +	mq_ns->msgsize_max	= DFLT_MSGSIZEMAX;
> +	mq_ns->mnt		= NULL;
> +	return mq_ns;
> +}
>  
>  struct mq_namespace *copy_mq_ns(unsigned long flags,
>  				struct mq_namespace *old_ns)
>  {
> +	struct mq_namespace *mq_ns;
> +
>  	BUG_ON(!old_ns);
> -	return get_mq_ns(old_ns);
> +	get_mq_ns(old_ns);
> +
> +	if (!(flags & CLONE_NEWMQ))
> +		return old_ns;
> +
> +	mq_ns = clone_mq_ns(old_ns);
> +
> +	put_mq_ns(old_ns);
> +	return mq_ns;
>  }
>  
>  void free_mq_ns(struct kref *kref)
>  {
> +	struct mq_namespace *ns;
> +
> +	ns = container_of(kref, struct mq_namespace, kref);
> +	kfree(ns);
>  }
> Index: 2.6.24-rc3-mm2/include/linux/mq_namespace.h
> ===================================================================
> --- 2.6.24-rc3-mm2.orig/include/linux/mq_namespace.h
> +++ 2.6.24-rc3-mm2/include/linux/mq_namespace.h
> @@ -2,6 +2,7 @@
>  #define _LINUX_MQ_NAMESPACE_H
>  
>  #include <linux/kref.h>
> +#include <linux/err.h>
>  
>  struct vfsmount;
>  
> @@ -57,6 +58,9 @@ static inline struct mq_namespace *get_m
>  static inline struct mq_namespace *copy_mq_ns(unsigned long flags,
>  					struct mq_namespace *old_ns)
>  {
> +	if (flags & CLONE_NEWMQ)
> +		return ERR_PTR(-EINVAL);
> +
>  	return old_ns;
>  }
>  
> 
> -- 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
  2007-11-28 17:28     ` Pavel Emelyanov
  (?)
@ 2007-11-29  9:52     ` Cedric Le Goater
  -1 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-29  9:52 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Eric W. Biederman

Pavel Emelyanov wrote:
> Cedric Le Goater wrote:
>> Hello ! 
>>
>> Here's a small patchset introducing a new namespace for POSIX
>> message queues. 
>>
>> Nothing really complex a part from the mqueue filesystem which 
>> needed some special care
> 
> Hm... Why did you decided to make it separately from the
> IPC namespace?

Mostly because it has its own configuration option and filesystem
which requires to clone also the mnt namespace.  

but yes they could probably be merged. Let's see what the others 
have to say about it.

C.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
  2007-11-28 17:32   ` [patch -mm 2/4] mqueue namespace : add unshare support Pavel Emelyanov
@ 2007-11-29 10:28     ` Cedric Le Goater
       [not found]     ` <474DA61B.5030301-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
  1 sibling, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-29 10:28 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Eric W. Biederman


>> Index: 2.6.24-rc3-mm2/kernel/fork.c
>> ===================================================================
>> --- 2.6.24-rc3-mm2.orig/kernel/fork.c
>> +++ 2.6.24-rc3-mm2/kernel/fork.c
>> @@ -1004,6 +1004,13 @@ static struct task_struct *copy_process(
>>  	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
>>  		return ERR_PTR(-EINVAL);
>>  
>> +	/*
>> +	 * mount namespace cannot be unshared when the mqueue
>> +	 * namespace is not
> 
> vice versa - mqueue namespace cannot be unshared when the mount one is not ;)

arg. yes :) 

Thanks !

C.


Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
---
 kernel/fork.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: 2.6.24-rc3-mm2/kernel/fork.c
===================================================================
--- 2.6.24-rc3-mm2.orig/kernel/fork.c
+++ 2.6.24-rc3-mm2/kernel/fork.c
@@ -1005,7 +1005,7 @@ static struct task_struct *copy_process(
                return ERR_PTR(-EINVAL);
 
        /*
-        * mount namespace cannot be unshared when the mqueue
+        * mqueue namespace cannot be unshared when the mount
         * namespace is not
         */
        if ((clone_flags & CLONE_NEWMQ) && !(clone_flags & CLONE_NEWNS))

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
  2007-11-28 17:32   ` [patch -mm 2/4] mqueue namespace : add unshare support Pavel Emelyanov
@ 2007-11-29 10:28         ` Cedric Le Goater
       [not found]     ` <474DA61B.5030301-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
  1 sibling, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-29 10:28 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Linux Containers, Andrew Morton, Linux Kernel Mailing List,
	Eric W. Biederman

>> Index: 2.6.24-rc3-mm2/include/linux/sched.h
>> ===================================================================
>> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h
>> +++ 2.6.24-rc3-mm2/include/linux/sched.h
>> @@ -27,6 +27,7 @@
>>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
>>  #define CLONE_NEWPID		0x20000000	/* New pid namespace */
>>  #define CLONE_NEWNET		0x40000000	/* New network namespace */
>> +#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */
> 
> That's it :) We've run out of clone flags on 32-bit platforms :(

yes. 

I have been giving some thoughts to a clone2() to extend the flags but
andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for
2.6.26. Some we might have some more time in front of us.

C.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
@ 2007-11-29 10:28         ` Cedric Le Goater
  0 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-29 10:28 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Eric W. Biederman

>> Index: 2.6.24-rc3-mm2/include/linux/sched.h
>> ===================================================================
>> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h
>> +++ 2.6.24-rc3-mm2/include/linux/sched.h
>> @@ -27,6 +27,7 @@
>>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
>>  #define CLONE_NEWPID		0x20000000	/* New pid namespace */
>>  #define CLONE_NEWNET		0x40000000	/* New network namespace */
>> +#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */
> 
> That's it :) We've run out of clone flags on 32-bit platforms :(

yes. 

I have been giving some thoughts to a clone2() to extend the flags but
andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for
2.6.26. Some we might have some more time in front of us.

C.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
  2007-11-29 10:28         ` Cedric Le Goater
  (?)
@ 2007-11-29 10:52         ` Andrew Morton
  2007-11-29 13:57           ` Serge E. Hallyn
  -1 siblings, 1 reply; 25+ messages in thread
From: Andrew Morton @ 2007-11-29 10:52 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Pavel Emelyanov, Linux Containers, Linux Kernel Mailing List,
	Eric W. Biederman

On Thu, 29 Nov 2007 11:28:28 +0100 Cedric Le Goater <clg@fr.ibm.com> wrote:

> >> Index: 2.6.24-rc3-mm2/include/linux/sched.h
> >> ===================================================================
> >> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h
> >> +++ 2.6.24-rc3-mm2/include/linux/sched.h
> >> @@ -27,6 +27,7 @@
> >>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
> >>  #define CLONE_NEWPID		0x20000000	/* New pid namespace */
> >>  #define CLONE_NEWNET		0x40000000	/* New network namespace */
> >> +#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */
> > 
> > That's it :) We've run out of clone flags on 32-bit platforms :(
> 
> yes. 
> 
> I have been giving some thoughts to a clone2() to extend the flags

There appears to be little alternative.

> but
> andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for
> 2.6.26. Some we might have some more time in front of us.

CLONE_DETACHED proved to be in use.  There are no reports of anyone using
CLONE_STOPPED though.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
  2007-11-29 10:52         ` Andrew Morton
@ 2007-11-29 13:57           ` Serge E. Hallyn
  0 siblings, 0 replies; 25+ messages in thread
From: Serge E. Hallyn @ 2007-11-29 13:57 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Cedric Le Goater, Linux Containers, Eric W. Biederman,
	Linux Kernel Mailing List, Pavel Emelyanov

Quoting Andrew Morton (akpm@linux-foundation.org):
> On Thu, 29 Nov 2007 11:28:28 +0100 Cedric Le Goater <clg@fr.ibm.com> wrote:
> 
> > >> Index: 2.6.24-rc3-mm2/include/linux/sched.h
> > >> ===================================================================
> > >> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h
> > >> +++ 2.6.24-rc3-mm2/include/linux/sched.h
> > >> @@ -27,6 +27,7 @@
> > >>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
> > >>  #define CLONE_NEWPID		0x20000000	/* New pid namespace */
> > >>  #define CLONE_NEWNET		0x40000000	/* New network namespace */
> > >> +#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */
> > > 
> > > That's it :) We've run out of clone flags on 32-bit platforms :(
> > 
> > yes. 
> > 
> > I have been giving some thoughts to a clone2() to extend the flags
> 
> There appears to be little alternative.

Just thinking aloud, but

given the concerns with the safety and sanity of unsharing only partial
namespaces, and before much userspace is depending on any of
	CLONE_NEWUTS,CLONE_NEWIPC,CLONE_NEWUSER,CLONE_NEWNET,CLONE_NEWMQUEUE

maybe we should have traditional clone only support CLONE_NEWNS (since
it's the most useful on its own) and CLONE_NEWCONTAINER, where
CLONE_NEWCONTAINER always unshares all the namespaces we know about.

Then clone2 can allow more finegrained choice of namespaces.  It takes
the exact same clone_flags as clone(), but instead of parent_tidptr
and child_tidptr args it has a ns_unshare flag which specifies which
namespaces to unshare.

-serge

> > but
> > andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for
> > 2.6.26. Some we might have some more time in front of us.
> 
> CLONE_DETACHED proved to be in use.  There are no reports of anyone using
> CLONE_STOPPED though.
> 
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
       [not found] ` <20071128164349.196734045@fr.ibm.com>
  2007-11-28 17:32   ` [patch -mm 2/4] mqueue namespace : add unshare support Pavel Emelyanov
@ 2007-11-29 15:03   ` Eric W. Biederman
  1 sibling, 0 replies; 25+ messages in thread
From: Eric W. Biederman @ 2007-11-29 15:03 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Pavel Emelianov

Cedric Le Goater <clg@fr.ibm.com> writes:

> From: Cedric Le Goater <clg@fr.ibm.com>
>
> This patch includes the mqueue namespace in the nsproxy object. It  
> also adds the support of unshare() and clone() with a new clone flag 
> CLONE_NEWMQ (1 bit left in the clone flags !)
>
> CLONE_NEWMQ is required to be cloned or unshared along with CLONE_NEWNS.
> This is to make sure that no user mounts of the internal mqueue fs
> are left behind when the last task exits. 

Sounds reasonable.  It would be cool if we didn't have to do this.
(Why isn't the mqueue fs not MS_NOUSER?)  Ah well.

I'm not certain about requiring CLONE_NEWNS but it looks to be
ugly if we try and work it the other way.

> It's totally harmless for the moment because the current code still 
> uses the default mqueue namespace object 'init_mq_ns' 

I don't believe the harmless part.  Creating new objects should really
come after we have the code to really make them work.  In some sense
the next patch which makes this work causes ABI breakage.  Closely
packed together it doesn't matter but...

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
  2007-11-29 10:28         ` Cedric Le Goater
@ 2007-11-29 20:14             ` Oren Laadan
  -1 siblings, 0 replies; 25+ messages in thread
From: Oren Laadan @ 2007-11-29 20:14 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Linux Containers, Andrew Morton, Eric W. Biederman,
	Linux Kernel Mailing List, Pavel Emelyanov



Cedric Le Goater wrote:
>>> Index: 2.6.24-rc3-mm2/include/linux/sched.h
>>> ===================================================================
>>> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h
>>> +++ 2.6.24-rc3-mm2/include/linux/sched.h
>>> @@ -27,6 +27,7 @@
>>>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
>>>  #define CLONE_NEWPID		0x20000000	/* New pid namespace */
>>>  #define CLONE_NEWNET		0x40000000	/* New network namespace */
>>> +#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */
>> That's it :) We've run out of clone flags on 32-bit platforms :(
> 
> yes. 
> 
> I have been giving some thoughts to a clone2() to extend the flags but
> andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for
> 2.6.26. Some we might have some more time in front of us.

Two comments:

1) Does it ever make any sense to clone the IPC namespace *without* doing
so also for the MQ namespace or vice versa ?  Unless there is a good
reason for doing so, a single CLONE_IPCMQ flag would suffice.

2) Before coming up with a new clone2() or other solution, what about the
proposed (and debated) sys_indrect() -- if it gets merged it can provide
the solution.

Oren.

> 
> C.
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
@ 2007-11-29 20:14             ` Oren Laadan
  0 siblings, 0 replies; 25+ messages in thread
From: Oren Laadan @ 2007-11-29 20:14 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Pavel Emelyanov, Linux Containers, Andrew Morton,
	Linux Kernel Mailing List, Eric W. Biederman



Cedric Le Goater wrote:
>>> Index: 2.6.24-rc3-mm2/include/linux/sched.h
>>> ===================================================================
>>> --- 2.6.24-rc3-mm2.orig/include/linux/sched.h
>>> +++ 2.6.24-rc3-mm2/include/linux/sched.h
>>> @@ -27,6 +27,7 @@
>>>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
>>>  #define CLONE_NEWPID		0x20000000	/* New pid namespace */
>>>  #define CLONE_NEWNET		0x40000000	/* New network namespace */
>>> +#define CLONE_NEWMQ		0x80000000	/* New posix mqueue namespace */
>> That's it :) We've run out of clone flags on 32-bit platforms :(
> 
> yes. 
> 
> I have been giving some thoughts to a clone2() to extend the flags but
> andrew is preparing to recycle CLONE_DETACHED and CLONE_STOPPED for
> 2.6.26. Some we might have some more time in front of us.

Two comments:

1) Does it ever make any sense to clone the IPC namespace *without* doing
so also for the MQ namespace or vice versa ?  Unless there is a good
reason for doing so, a single CLONE_IPCMQ flag would suffice.

2) Before coming up with a new clone2() or other solution, what about the
proposed (and debated) sys_indrect() -- if it gets merged it can provide
the solution.

Oren.

> 
> C.
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 2/4] mqueue namespace : add unshare support
  2007-11-29 20:14             ` Oren Laadan
  (?)
@ 2007-11-29 21:49             ` Eric W. Biederman
  -1 siblings, 0 replies; 25+ messages in thread
From: Eric W. Biederman @ 2007-11-29 21:49 UTC (permalink / raw)
  To: Oren Laadan
  Cc: Cedric Le Goater, Pavel Emelyanov, Linux Containers,
	Andrew Morton, Linux Kernel Mailing List

Oren Laadan <orenl@cs.columbia.edu> writes:

> Two comments:
>
> 1) Does it ever make any sense to clone the IPC namespace *without* doing
> so also for the MQ namespace or vice versa ?  Unless there is a good
> reason for doing so, a single CLONE_IPCMQ flag would suffice.

SYSVIPC and POSIX IPC are different, and I don't see any argument for why
they would be in the same namespace.  So for maintenance, testing, and
the fact that we have already shipped a stable version of the IPC
namespace and we would be breaking the ABI if we were to add messages
queues into it now.

Frankly I find it a shame that we had to do more then implement multiple
mounts of the mq filesystem to make this work.

In general when we use the filesystem namespace for new global objects
visible to user space is a design bug.

> 2) Before coming up with a new clone2() or other solution, what about the
> proposed (and debated) sys_indrect() -- if it gets merged it can provide
> the solution.

Bleh.  We have to have the flag parameters and modify all of the code anyway
so I'm not quite certain that sys_indirect make sense.

Certainly in this case if we have namespaces that can not be combined with
CLONE_THREAD we could double assign a field really easily.  Trouble is that
is just a bit icky.

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
       [not found] <20071128163728.177495768@fr.ibm.com>
                   ` (5 preceding siblings ...)
       [not found] ` <20071128164349.196734045@fr.ibm.com>
@ 2008-06-20  3:00 ` Eric W. Biederman
  2008-06-20  3:39     ` Eric W. Biederman
  2008-06-20 14:50   ` Serge E. Hallyn
  6 siblings, 2 replies; 25+ messages in thread
From: Eric W. Biederman @ 2008-06-20  3:00 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Pavel Emelianov

Cedric Le Goater <clg@fr.ibm.com> writes:

> Hello ! 
>
> Here's a small patchset introducing a new namespace for POSIX
> message queues. 
>
> Nothing really complex a part from the mqueue filesystem which 
> needed some special care

This looks stalled.  I have a brainstorm that might takes a totally
different perspective on things.

The only reason we don't just allow multiple mounts of mqueuefs to
solve this problem is because there is a kernel syscall on the path.

If we just hard coded a mount point into the kernel and required user
space to always mount mqueuefs there the problem would be solved.

hard coding a mount point is unfortunately violates the unix rule
of separating mechanism and policy.

One way to fix that is to add a hidden directory to the mnt namespace.
Where magic in kernel filesystems can be mounted.  Only visible
with a magic openat flag.  Then:

fd = openat(AT_FDKERN, ".", O_DIRECTORY)
fchdir(fd);
umount("./mqueue", MNT_DETACH);
mount(("none", "./mqueue", "mqueue", 0, NULL);

Would unshare the mqueue namespace.

Implemented for plan9 this would solve a problem of how do you get
access to all of it's special filesystems.  As only bind mounts
and remote filesystem mounts are available.  For linux thinking about
it might shake the conversation up a bit.

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
  2008-06-20  3:00 ` [patch -mm 0/4] mqueue namespace Eric W. Biederman
@ 2008-06-20  3:39     ` Eric W. Biederman
  2008-06-20 14:50   ` Serge E. Hallyn
  1 sibling, 0 replies; 25+ messages in thread
From: Eric W. Biederman @ 2008-06-20  3:39 UTC (permalink / raw)
  To: Cedric Le Goater, Andrew Morton, Linux Containers,
	Linux Kernel Mailing List

ebiederm@xmission.com (Eric W. Biederman) writes:

> One way to fix that is to add a hidden directory to the mnt namespace.
> Where magic in kernel filesystems can be mounted.  Only visible
> with a magic openat flag.  Then:
>
> fd = openat(AT_FDKERN, ".", O_DIRECTORY)
> fchdir(fd);
> umount("./mqueue", MNT_DETACH);
> mount(("none", "./mqueue", "mqueue", 0, NULL);
>
> Would unshare the mqueue namespace.
>
> Implemented for plan9 this would solve a problem of how do you get
> access to all of it's special filesystems.  As only bind mounts
> and remote filesystem mounts are available.  For linux thinking about
> it might shake the conversation up a bit.

Thinking about this some more.  What is especially attractive if we do
all namespaces this way is that it solves two lurking problems.
1) How do you keep a namespace around without a process in it.
2) How do you enter a container.

If we could land the namespaces in the filesystem we could easily
persist them past the point where a process is present in one if we so
choose.

Entering a container would be a matter of replacing your current
namespaces mounts with namespace mounts take from the filesystem.

I expect performance would degrade in practice, but it is tempting
to implement it and run a benchmark and see if we can measure anything.

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
@ 2008-06-20  3:39     ` Eric W. Biederman
  0 siblings, 0 replies; 25+ messages in thread
From: Eric W. Biederman @ 2008-06-20  3:39 UTC (permalink / raw)
  To: Cedric Le Goater, Andrew Morton, Linux Containers,
	Linux Kernel Mailing List, Pavel Emelianov, Serge Hallyn

ebiederm@xmission.com (Eric W. Biederman) writes:

> One way to fix that is to add a hidden directory to the mnt namespace.
> Where magic in kernel filesystems can be mounted.  Only visible
> with a magic openat flag.  Then:
>
> fd = openat(AT_FDKERN, ".", O_DIRECTORY)
> fchdir(fd);
> umount("./mqueue", MNT_DETACH);
> mount(("none", "./mqueue", "mqueue", 0, NULL);
>
> Would unshare the mqueue namespace.
>
> Implemented for plan9 this would solve a problem of how do you get
> access to all of it's special filesystems.  As only bind mounts
> and remote filesystem mounts are available.  For linux thinking about
> it might shake the conversation up a bit.

Thinking about this some more.  What is especially attractive if we do
all namespaces this way is that it solves two lurking problems.
1) How do you keep a namespace around without a process in it.
2) How do you enter a container.

If we could land the namespaces in the filesystem we could easily
persist them past the point where a process is present in one if we so
choose.

Entering a container would be a matter of replacing your current
namespaces mounts with namespace mounts take from the filesystem.

I expect performance would degrade in practice, but it is tempting
to implement it and run a benchmark and see if we can measure anything.

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
  2008-06-20  3:00 ` [patch -mm 0/4] mqueue namespace Eric W. Biederman
  2008-06-20  3:39     ` Eric W. Biederman
@ 2008-06-20 14:50   ` Serge E. Hallyn
  2008-06-20 19:11     ` Eric W. Biederman
  1 sibling, 1 reply; 25+ messages in thread
From: Serge E. Hallyn @ 2008-06-20 14:50 UTC (permalink / raw)
  To: Eric W. Biederman, Dave Hansen
  Cc: Cedric Le Goater, Linux Containers, Andrew Morton,
	Linux Kernel Mailing List, Pavel Emelianov

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Cedric Le Goater <clg@fr.ibm.com> writes:
> 
> > Hello ! 
> >
> > Here's a small patchset introducing a new namespace for POSIX
> > message queues. 
> >
> > Nothing really complex a part from the mqueue filesystem which 
> > needed some special care
> 
> This looks stalled.

It actually isn't really - Cedric had resent it a few weeks ago but had
troubles with the mail server so it never hit the lists.  I think Dave
made a few more changes from there and was getting ready to resend
again.  Dave?

> I have a brainstorm that might takes a totally
> different perspective on things.
> 
> The only reason we don't just allow multiple mounts of mqueuefs to
> solve this problem is because there is a kernel syscall on the path.
> 
> If we just hard coded a mount point into the kernel and required user
> space to always mount mqueuefs there the problem would be solved.
> 
> hard coding a mount point is unfortunately violates the unix rule
> of separating mechanism and policy.
> 
> One way to fix that is to add a hidden directory to the mnt namespace.
> Where magic in kernel filesystems can be mounted.  Only visible
> with a magic openat flag.  Then:
> 
> fd = openat(AT_FDKERN, ".", O_DIRECTORY)
> fchdir(fd);
> umount("./mqueue", MNT_DETACH);
> mount(("none", "./mqueue", "mqueue", 0, NULL);
> 
> Would unshare the mqueue namespace.
> 
> Implemented for plan9 this would solve a problem of how do you get
> access to all of it's special filesystems.  As only bind mounts
> and remote filesystem mounts are available.  For linux thinking about
> it might shake the conversation up a bit.

It is unfortunate that two actions are needed to properly complete the
unshare, and we had definately talked about just using the mount before.
I forget why we decided it wasn't practical, so maybe what you describe
solves it...

But at least the current patch reuses CLONE_NEWIPC for posix ipc, which
also seems to make sense.

-serge

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
  2008-06-20  3:39     ` Eric W. Biederman
  (?)
@ 2008-06-20 14:53     ` Serge E. Hallyn
  -1 siblings, 0 replies; 25+ messages in thread
From: Serge E. Hallyn @ 2008-06-20 14:53 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Cedric Le Goater, Andrew Morton, Linux Containers,
	Linux Kernel Mailing List, Pavel Emelianov, Serge Hallyn

Quoting Eric W. Biederman (ebiederm@xmission.com):
> ebiederm@xmission.com (Eric W. Biederman) writes:
> 
> > One way to fix that is to add a hidden directory to the mnt namespace.
> > Where magic in kernel filesystems can be mounted.  Only visible
> > with a magic openat flag.  Then:
> >
> > fd = openat(AT_FDKERN, ".", O_DIRECTORY)
> > fchdir(fd);
> > umount("./mqueue", MNT_DETACH);
> > mount(("none", "./mqueue", "mqueue", 0, NULL);
> >
> > Would unshare the mqueue namespace.
> >
> > Implemented for plan9 this would solve a problem of how do you get
> > access to all of it's special filesystems.  As only bind mounts
> > and remote filesystem mounts are available.  For linux thinking about
> > it might shake the conversation up a bit.
> 
> Thinking about this some more.  What is especially attractive if we do
> all namespaces this way is that it solves two lurking problems.
> 1) How do you keep a namespace around without a process in it.
> 2) How do you enter a container.
> 
> If we could land the namespaces in the filesystem we could easily
> persist them past the point where a process is present in one if we so
> choose.
> 
> Entering a container would be a matter of replacing your current
> namespaces mounts with namespace mounts take from the filesystem.
> 
> I expect performance would degrade in practice, but it is tempting
> to implement it and run a benchmark and see if we can measure anything.

The device ns could be a mount of an fs with the devices created in it,
while mknod becomes a symlink from that fs.  And once a network
namespace is a filesystem, we can aim for the plan9 NAT solution of
mounting a remote /net onto ours.  Neat.

But bye-bye posix?

-serge

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
  2008-06-20 14:50   ` Serge E. Hallyn
@ 2008-06-20 19:11     ` Eric W. Biederman
  0 siblings, 0 replies; 25+ messages in thread
From: Eric W. Biederman @ 2008-06-20 19:11 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Dave Hansen, Cedric Le Goater, Linux Containers, Andrew Morton,
	Linux Kernel Mailing List, Pavel Emelianov

"Serge E. Hallyn" <serue@us.ibm.com> writes:

>
> It is unfortunate that two actions are needed to properly complete the
> unshare, and we had definately talked about just using the mount before.
> I forget why we decided it wasn't practical, so maybe what you describe
> solves it...

What is worse, and I don't see a way around it: Is that we don't have
any callbacks to check where things are mounted.  So we can't ensure the
proper kind of filesystem is mounted in the right place.

That is there is too much freedom in the mount apis to allow for reliable
operation.

> But at least the current patch reuses CLONE_NEWIPC for posix ipc, which
> also seems to make sense.

Sort of.  I'm really annoyed with whoever did the posix mqueue support.
Adding the magic syscall that has to know the internal mount instead of
requiring the thing be mounted somewhere and just rejecting filedescriptors
for the wrong sorts of files.

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [patch -mm 0/4] mqueue namespace
  2008-06-20  3:39     ` Eric W. Biederman
  (?)
  (?)
@ 2008-08-29  9:46     ` Cedric Le Goater
  -1 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2008-08-29  9:46 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, Linux Containers, Linux Kernel Mailing List,
	Pavel Emelianov, Serge Hallyn

Eric W. Biederman wrote:
> ebiederm@xmission.com (Eric W. Biederman) writes:
> 
>> One way to fix that is to add a hidden directory to the mnt namespace.
>> Where magic in kernel filesystems can be mounted.  Only visible
>> with a magic openat flag.  Then:
>>
>> fd = openat(AT_FDKERN, ".", O_DIRECTORY)
>> fchdir(fd);
>> umount("./mqueue", MNT_DETACH);
>> mount(("none", "./mqueue", "mqueue", 0, NULL);
>>
>> Would unshare the mqueue namespace.
>>
>> Implemented for plan9 this would solve a problem of how do you get
>> access to all of it's special filesystems.  As only bind mounts
>> and remote filesystem mounts are available.  For linux thinking about
>> it might shake the conversation up a bit.
> 
> Thinking about this some more.  What is especially attractive if we do
> all namespaces this way is that it solves two lurking problems.
> 1) How do you keep a namespace around without a process in it.
> 2) How do you enter a container.
> 
> If we could land the namespaces in the filesystem we could easily
> persist them past the point where a process is present in one if we so
> choose.
>
> Entering a container would be a matter of replacing your current
> namespaces mounts with namespace mounts take from the filesystem.
> 
> I expect performance would degrade in practice, but it is tempting
> to implement it and run a benchmark and see if we can measure anything.

http://wiki.openvz.org/Containers/Mini-summit_2008_notes

you seem to have talked about this idea at the summit but the notes
are a bit short on the "entering a container" topic. Have you had time to
work on the POC the notes are talking about ? 

the mqueue namespace (and sysv ipc) is typically one of these namespaces
with valid objects which can have no processes in it. 

C. 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [patch -mm 0/4] mqueue namespace
@ 2007-11-28 16:37 Cedric Le Goater
  0 siblings, 0 replies; 25+ messages in thread
From: Cedric Le Goater @ 2007-11-28 16:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Containers, Eric W. Biederman, Linux Kernel Mailing List,
	Pavel Emelianov

Hello ! 

Here's a small patchset introducing a new namespace for POSIX
message queues. 

Nothing really complex a part from the mqueue filesystem which 
needed some special care

Thanks for reviewing !

C.

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2008-08-29  9:46 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20071128163728.177495768@fr.ibm.com>
2007-11-28 16:37 ` [patch -mm 1/4] mqueue namespace : add struct mq_namespace Cedric Le Goater
2007-11-28 16:37 ` [patch -mm 2/4] mqueue namespace : add unshare support Cedric Le Goater
2007-11-28 16:37 ` [patch -mm 3/4] mqueue namespace : enable the mqueue namespace Cedric Le Goater
2007-11-28 16:37 ` [patch -mm 4/4] mqueue namespace: adapt sysctl Cedric Le Goater
     [not found] ` <20071128163728.177495768-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-11-28 17:28   ` [patch -mm 0/4] mqueue namespace Pavel Emelyanov
2007-11-28 17:28     ` Pavel Emelyanov
2007-11-29  9:52     ` Cedric Le Goater
     [not found] ` <20071128164349.196734045@fr.ibm.com>
2007-11-28 17:32   ` [patch -mm 2/4] mqueue namespace : add unshare support Pavel Emelyanov
2007-11-29 10:28     ` Cedric Le Goater
     [not found]     ` <474DA61B.5030301-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2007-11-29 10:28       ` Cedric Le Goater
2007-11-29 10:28         ` Cedric Le Goater
2007-11-29 10:52         ` Andrew Morton
2007-11-29 13:57           ` Serge E. Hallyn
     [not found]         ` <474E944C.4020809-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-11-29 20:14           ` Oren Laadan
2007-11-29 20:14             ` Oren Laadan
2007-11-29 21:49             ` Eric W. Biederman
2007-11-29 15:03   ` Eric W. Biederman
2008-06-20  3:00 ` [patch -mm 0/4] mqueue namespace Eric W. Biederman
2008-06-20  3:39   ` Eric W. Biederman
2008-06-20  3:39     ` Eric W. Biederman
2008-06-20 14:53     ` Serge E. Hallyn
2008-08-29  9:46     ` Cedric Le Goater
2008-06-20 14:50   ` Serge E. Hallyn
2008-06-20 19:11     ` Eric W. Biederman
2007-11-28 16:37 Cedric Le Goater

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.