All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files
@ 2012-05-11 12:25 Pavel Emelyanov
  2012-05-11 12:25 ` [PATCH 2/2] ns: Add proc_ns_operations for mount namespaces Pavel Emelyanov
  2012-05-11 17:07 ` [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files Eric W. Biederman
  0 siblings, 2 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2012-05-11 12:25 UTC (permalink / raw)
  To: Eric W. Biederman, Daniel Lezcano, Linux Kernel Mailing List,
	Serge E. Hallyn
  Cc: Andrew Morton

Some time ago we tried to expose kernel object IDs to the user space to
let it possible to detect shared mm, fs, etc. The namespaces' IDs were
included in this set and Eric proposed, that we'd better expose the ID
in the stat's st_ino field.

That's an implementation of this proposal. The proc_ns_operations is
equipped with the get_ns_id() callback, which should return some unique
ID of a namespace provided. This value is then mixed with the namespace's
type number to make the IDs truly unique.

For IPC and UTS namespaces these IDs are generated with sequentially
incremented 64-bit atomic number. For net namespace the ID is the ifindex
of the namespace's loopback device.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 fs/proc/namespaces.c          |   49 +++++++++++++++++++++++++++++++++++++++++
 include/linux/ipc_namespace.h |    1 +
 include/linux/proc_fs.h       |    1 +
 include/linux/utsname.h       |    1 +
 ipc/namespace.c               |   10 ++++++++
 kernel/utsname.c              |   11 +++++++++
 net/core/net_namespace.c      |    7 ++++++
 7 files changed, 80 insertions(+), 0 deletions(-)

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 0d9e23a..b6c7560 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -30,6 +30,54 @@ static const struct file_operations ns_file_operations = {
 	.llseek		= no_llseek,
 };
 
+/*
+ * Namespaces IDs are generated by sequential increment of a
+ * static variable. Then the type value is mixed with it to make
+ * them unique. Thus, this salt value should never be overwritten
+ * by the ID. The 60 bits for constantly incrementing ID will
+ * overflow in 4k years, so it's big enough.
+ */
+
+#define NS_ID_SHIFT	(4)
+#define NS_BIT_MIN	(16)	/* to fit CLONE_NEWNS */
+
+static inline u64 ns_id_gen(u64 id, unsigned type)
+{
+	type = fls(type) - NS_BIT_MIN;
+	BUG_ON(type == 0 || (type >= (1 << NS_ID_SHIFT)));
+	return (id << NS_ID_SHIFT) | type;
+}
+
+static int proc_ns_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		struct kstat *stat)
+{
+	int ret;
+
+	ret = pid_getattr(mnt, dentry, stat);
+	if (!ret) {
+		u64 ns_id = 0;
+		struct proc_inode *ei;
+		const struct proc_ns_operations *ops;
+
+		ei = PROC_I(dentry->d_inode);
+		ops = ei->ns_ops;
+		if (!ops || !ops->get_id)
+			printk("No OPS/ID for %p, %x, %s\n", ops, ops ? ops->type : -1,
+					dentry->d_name.name);
+		else {
+			ns_id = ops->get_id(ei->ns);
+			stat->ino = ns_id_gen(ns_id, ops->type);
+		}
+	}
+
+	return ret;
+}
+
+static const struct inode_operations ns_inode_operations = {
+	.getattr	= proc_ns_getattr,
+	.setattr	= proc_setattr,
+};
+
 static struct dentry *proc_ns_instantiate(struct inode *dir,
 	struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
@@ -49,6 +97,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
 
 	ei = PROC_I(inode);
 	inode->i_mode = S_IFREG|S_IRUSR;
+	inode->i_op = &ns_inode_operations;
 	inode->i_fop  = &ns_file_operations;
 	ei->ns_ops    = ns_ops;
 	ei->ns	      = ns;
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 8a297a5..e8c2dab 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -65,6 +65,7 @@ struct ipc_namespace {
 
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
+	u64		id;
 };
 
 extern struct ipc_namespace init_ipc_ns;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 85c5073..e5ee83a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -247,6 +247,7 @@ struct proc_ns_operations {
 	void *(*get)(struct task_struct *task);
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
+	u64 (*get_id)(void *ns);
 };
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index c714ed7..5f7cdbb 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -52,6 +52,7 @@ struct uts_namespace {
 	struct kref kref;
 	struct new_utsname name;
 	struct user_namespace *user_ns;
+	u64 id;
 };
 extern struct uts_namespace init_uts_ns;
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index ce0a647..e837bd0 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,6 +16,8 @@
 
 #include "util.h"
 
+static atomic_long_t ipcns_id = ATOMIC_LONG_INIT(0);
+
 static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
 					   struct ipc_namespace *old_ns)
 {
@@ -47,6 +49,7 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
 	register_ipcns_notifier(ns);
 
 	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
+	ns->id = atomic_long_inc_return(&ipcns_id);
 
 	return ns;
 }
@@ -170,10 +173,17 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static u64 ipcns_get_id(void *_ns)
+{
+	struct ipc_namespace *ns = _ns;
+	return ns->id;
+}
+
 const struct proc_ns_operations ipcns_operations = {
 	.name		= "ipc",
 	.type		= CLONE_NEWIPC,
 	.get		= ipcns_get,
 	.put		= ipcns_put,
 	.install	= ipcns_install,
+	.get_id		= ipcns_get_id,
 };
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 405caf9..cba6ad3 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,8 @@
 #include <linux/user_namespace.h>
 #include <linux/proc_fs.h>
 
+static atomic_long_t utsns_id = ATOMIC_LONG_INIT(0);
+
 static struct uts_namespace *create_uts_ns(void)
 {
 	struct uts_namespace *uts_ns;
@@ -45,6 +47,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
 	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
 	up_read(&uts_sem);
+	ns->id = atomic_long_inc_return(&utsns_id);
+
 	return ns;
 }
 
@@ -110,11 +114,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static u64 utsns_get_id(void *_ns)
+{
+	struct uts_namespace *ns = _ns;
+	return ns->id;
+}
+
 const struct proc_ns_operations utsns_operations = {
 	.name		= "uts",
 	.type		= CLONE_NEWUTS,
 	.get		= utsns_get,
 	.put		= utsns_put,
 	.install	= utsns_install,
+	.get_id		= utsns_get_id,
 };
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 0e950fd..6611433 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -627,11 +627,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static u64 netns_get_id(void *_ns)
+{
+	struct net *ns = _ns;
+	return ns->loopback_dev->ifindex;
+}
+
 const struct proc_ns_operations netns_operations = {
 	.name		= "net",
 	.type		= CLONE_NEWNET,
 	.get		= netns_get,
 	.put		= netns_put,
 	.install	= netns_install,
+	.get_id		= netns_get_id,
 };
 #endif
-- 
1.7.6.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] ns: Add proc_ns_operations for mount namespaces
  2012-05-11 12:25 [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files Pavel Emelyanov
@ 2012-05-11 12:25 ` Pavel Emelyanov
  2012-05-11 17:05   ` Eric W. Biederman
       [not found]   ` <87mx5e5tho.fsf_-_@xmission.com>
  2012-05-11 17:07 ` [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files Eric W. Biederman
  1 sibling, 2 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2012-05-11 12:25 UTC (permalink / raw)
  To: Eric W. Biederman, Daniel Lezcano, Linux Kernel Mailing List,
	Serge E. Hallyn
  Cc: Andrew Morton

Currently LXC by default creates a container in a new mount
namespace. Thus in order to explore it we have to

a) find out, that a new mount namespace is in use
b) enter this other namespace

This patch solves both -- allows us to distinguish one mount
namespace from another by comparing its inode numbers and lets
us enter a mount namespace with the setns system call.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 fs/namespace.c          |   45 +++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/namespaces.c    |    1 +
 include/linux/proc_fs.h |    2 ++
 3 files changed, 48 insertions(+), 0 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index e608199..9467904 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
 #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 #include "pnode.h"
 #include "internal.h"
 
@@ -2633,3 +2634,47 @@ bool our_mnt(struct vfsmount *mnt)
 {
 	return check_mnt(real_mount(mnt));
 }
+
+static void *mntns_get(struct task_struct *task)
+{
+	struct mnt_namespace *mn = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy) {
+		mn = nsproxy->mnt_ns;
+		get_mnt_ns(mn);
+	}
+	rcu_read_unlock();
+
+	return mn;
+}
+
+static void mntns_put(void *ns)
+{
+	put_mnt_ns(ns);
+}
+
+static int mntns_install(struct nsproxy *nsproxy, void *ns)
+{
+	put_mnt_ns(nsproxy->mnt_ns);
+	get_mnt_ns(ns);
+	nsproxy->mnt_ns = ns;
+	return 0;
+}
+
+static u64 mntns_get_id(void *_ns)
+{
+	struct mnt_namespace *ns = _ns;
+	return ns->root->mnt_id;
+}
+
+const struct proc_ns_operations mntns_operations = {
+	.name		= "mnt",
+	.type		= CLONE_NEWNS,
+	.get		= mntns_get,
+	.put		= mntns_put,
+	.install	= mntns_install,
+	.get_id		= mntns_get_id,
+};
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b6c7560..e0399dd 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -24,6 +24,7 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_IPC_NS
 	&ipcns_operations,
 #endif
+	&mntns_operations,
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e5ee83a..f6311b4 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -252,6 +252,8 @@ struct proc_ns_operations {
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations mntns_operations;
+
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
-- 
1.7.6.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ns: Add proc_ns_operations for mount namespaces
  2012-05-11 12:25 ` [PATCH 2/2] ns: Add proc_ns_operations for mount namespaces Pavel Emelyanov
@ 2012-05-11 17:05   ` Eric W. Biederman
  2012-05-12 11:42     ` Pavel Emelyanov
       [not found]   ` <87mx5e5tho.fsf_-_@xmission.com>
  1 sibling, 1 reply; 10+ messages in thread
From: Eric W. Biederman @ 2012-05-11 17:05 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Daniel Lezcano, Linux Kernel Mailing List, Serge E. Hallyn,
	Andrew Morton

Pavel Emelyanov <xemul@parallels.com> writes:

> Currently LXC by default creates a container in a new mount
> namespace. Thus in order to explore it we have to
>
> a) find out, that a new mount namespace is in use
> b) enter this other namespace
>
> This patch solves both -- allows us to distinguish one mount
> namespace from another by comparing its inode numbers and lets
> us enter a mount namespace with the setns system call.

There are two significant bugs with your patch.

You do not set fs->root or fs->pwd to values in the new mount namespace,
I don't believe there is anywhere else in the vfs where this is possible
except possible fchdir.

It is easily possible to create a reference counting cycle by bind
mounting the current mount namespace into itself.

Not that I am opposed to the concept I have just been dusting my patch
for this same functionality off.

Eric



> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
> ---
>  fs/namespace.c          |   45 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/proc/namespaces.c    |    1 +
>  include/linux/proc_fs.h |    2 ++
>  3 files changed, 48 insertions(+), 0 deletions(-)
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index e608199..9467904 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -20,6 +20,7 @@
>  #include <linux/fs_struct.h>	/* get_fs_root et.al. */
>  #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
>  #include <linux/uaccess.h>
> +#include <linux/proc_fs.h>
>  #include "pnode.h"
>  #include "internal.h"
>  
> @@ -2633,3 +2634,47 @@ bool our_mnt(struct vfsmount *mnt)
>  {
>  	return check_mnt(real_mount(mnt));
>  }
> +
> +static void *mntns_get(struct task_struct *task)
> +{
> +	struct mnt_namespace *mn = NULL;
> +	struct nsproxy *nsproxy;
> +
> +	rcu_read_lock();
> +	nsproxy = task_nsproxy(task);
> +	if (nsproxy) {
> +		mn = nsproxy->mnt_ns;
> +		get_mnt_ns(mn);
> +	}
> +	rcu_read_unlock();
> +
> +	return mn;
> +}
> +
> +static void mntns_put(void *ns)
> +{
> +	put_mnt_ns(ns);
> +}
> +
> +static int mntns_install(struct nsproxy *nsproxy, void *ns)
> +{
> +	put_mnt_ns(nsproxy->mnt_ns);
> +	get_mnt_ns(ns);
> +	nsproxy->mnt_ns = ns;
> +	return 0;
> +}
> +
> +static u64 mntns_get_id(void *_ns)
> +{
> +	struct mnt_namespace *ns = _ns;
> +	return ns->root->mnt_id;
> +}
> +
> +const struct proc_ns_operations mntns_operations = {
> +	.name		= "mnt",
> +	.type		= CLONE_NEWNS,
> +	.get		= mntns_get,
> +	.put		= mntns_put,
> +	.install	= mntns_install,
> +	.get_id		= mntns_get_id,
> +};
> diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
> index b6c7560..e0399dd 100644
> --- a/fs/proc/namespaces.c
> +++ b/fs/proc/namespaces.c
> @@ -24,6 +24,7 @@ static const struct proc_ns_operations *ns_entries[] = {
>  #ifdef CONFIG_IPC_NS
>  	&ipcns_operations,
>  #endif
> +	&mntns_operations,
>  };
>  
>  static const struct file_operations ns_file_operations = {
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index e5ee83a..f6311b4 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -252,6 +252,8 @@ struct proc_ns_operations {
>  extern const struct proc_ns_operations netns_operations;
>  extern const struct proc_ns_operations utsns_operations;
>  extern const struct proc_ns_operations ipcns_operations;
> +extern const struct proc_ns_operations mntns_operations;
> +
>  
>  union proc_op {
>  	int (*proc_get_link)(struct dentry *, struct path *);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files
  2012-05-11 12:25 [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files Pavel Emelyanov
  2012-05-11 12:25 ` [PATCH 2/2] ns: Add proc_ns_operations for mount namespaces Pavel Emelyanov
@ 2012-05-11 17:07 ` Eric W. Biederman
  2012-05-12 11:40   ` Pavel Emelyanov
  1 sibling, 1 reply; 10+ messages in thread
From: Eric W. Biederman @ 2012-05-11 17:07 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Daniel Lezcano, Linux Kernel Mailing List, Serge E. Hallyn,
	Andrew Morton

Pavel Emelyanov <xemul@parallels.com> writes:

> Some time ago we tried to expose kernel object IDs to the user space to
> let it possible to detect shared mm, fs, etc. The namespaces' IDs were
> included in this set and Eric proposed, that we'd better expose the ID
> in the stat's st_ino field.

A quick question.  With kcmp you have something that is at least in
principle usable for checkpoint restart.

Are wanting this for checkpoint restart or something else?

Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files
  2012-05-11 17:07 ` [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files Eric W. Biederman
@ 2012-05-12 11:40   ` Pavel Emelyanov
  2012-05-26 15:14     ` Eric W. Biederman
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Emelyanov @ 2012-05-12 11:40 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Daniel Lezcano, Linux Kernel Mailing List, Serge E. Hallyn,
	Andrew Morton

On 05/11/2012 09:07 PM, Eric W. Biederman wrote:
> Pavel Emelyanov <xemul@parallels.com> writes:
> 
>> Some time ago we tried to expose kernel object IDs to the user space to
>> let it possible to detect shared mm, fs, etc. The namespaces' IDs were
>> included in this set and Eric proposed, that we'd better expose the ID
>> in the stat's st_ino field.
> 
> A quick question.  With kcmp you have something that is at least in
> principle usable for checkpoint restart.

Not only in principle. We do use it already ;)

> Are wanting this for checkpoint restart or something else?

For me -- checkpoint restart only. I'm perfectly fine with checking 
namespaces sharing with kcmp syscall, but you proposed to show ns ID
in proc inode.

> Eric
> .
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vfs: Add setns support for the mount namespace
       [not found]   ` <87mx5e5tho.fsf_-_@xmission.com>
@ 2012-05-12 11:41     ` Pavel Emelyanov
  2012-05-18 19:44       ` Serge E. Hallyn
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Emelyanov @ 2012-05-12 11:41 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Al Viro, Daniel Lezcano, Linux Kernel Mailing List,
	Serge E. Hallyn, Andrew Morton, linux-fsdevel, Miklos Szeredi

On 05/12/2012 12:54 AM, Eric W. Biederman wrote:
> 
> setns support for the mount namespace is a little tricky as an arbitrary
> decision must be made about what to set fs->root and fs->pwd to, as
> there is no expectation of a relationship between the two mount
> namespaces.  Therefore I arbitrarily find the root mount point, and
> follow every mount on top of it to find the top of the mount stack.
> Then I set fs->root and fs->pwd to that location.  The topmost root of
> the mount stack seems like a reasonable place to be.
> 
> Bind mount support for the mount namespace inodes has the possibility of
> creating circular dependencies between mount namespaces.  Circular
> dependencies can result in loops that prevent mount namespaces from
> every being freed.  I avoid creating those circular dependencies by
> adding a sequence number to the mount namespace and require all bind
> mounts be of a younger mount namespace into an older mount namespace.
> 
> Add a helper function proc_ns_inode so it is possible to detect when we
> are attempting to bind mound a namespace inode.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

Acked-by: Pavel Emelyanov <xemul@parallels.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ns: Add proc_ns_operations for mount namespaces
  2012-05-11 17:05   ` Eric W. Biederman
@ 2012-05-12 11:42     ` Pavel Emelyanov
  0 siblings, 0 replies; 10+ messages in thread
From: Pavel Emelyanov @ 2012-05-12 11:42 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Daniel Lezcano, Linux Kernel Mailing List, Serge E. Hallyn,
	Andrew Morton

On 05/11/2012 09:05 PM, Eric W. Biederman wrote:
> Pavel Emelyanov <xemul@parallels.com> writes:
> 
>> Currently LXC by default creates a container in a new mount
>> namespace. Thus in order to explore it we have to
>>
>> a) find out, that a new mount namespace is in use
>> b) enter this other namespace
>>
>> This patch solves both -- allows us to distinguish one mount
>> namespace from another by comparing its inode numbers and lets
>> us enter a mount namespace with the setns system call.
> 
> There are two significant bugs with your patch.
> 
> You do not set fs->root or fs->pwd to values in the new mount namespace,
> I don't believe there is anywhere else in the vfs where this is possible
> except possible fchdir.
> 
> It is easily possible to create a reference counting cycle by bind
> mounting the current mount namespace into itself.
> 
> Not that I am opposed to the concept I have just been dusting my patch
> for this same functionality off.

Oh, that's just perfect. Let's move your one then. Hopefully it won't
get covered with dust again.

> Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vfs: Add setns support for the mount namespace
  2012-05-12 11:41     ` [PATCH] vfs: Add setns support for the mount namespace Pavel Emelyanov
@ 2012-05-18 19:44       ` Serge E. Hallyn
  2012-05-18 22:47         ` Eric W. Biederman
  0 siblings, 1 reply; 10+ messages in thread
From: Serge E. Hallyn @ 2012-05-18 19:44 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Eric W. Biederman, Al Viro, Daniel Lezcano,
	Linux Kernel Mailing List, Serge E. Hallyn, Andrew Morton,
	linux-fsdevel, Miklos Szeredi

Quoting Pavel Emelyanov (xemul@parallels.com):
> On 05/12/2012 12:54 AM, Eric W. Biederman wrote:
> > 
> > setns support for the mount namespace is a little tricky as an arbitrary
> > decision must be made about what to set fs->root and fs->pwd to, as
> > there is no expectation of a relationship between the two mount
> > namespaces.  Therefore I arbitrarily find the root mount point, and
> > follow every mount on top of it to find the top of the mount stack.
> > Then I set fs->root and fs->pwd to that location.  The topmost root of
> > the mount stack seems like a reasonable place to be.
> > 
> > Bind mount support for the mount namespace inodes has the possibility of
> > creating circular dependencies between mount namespaces.  Circular
> > dependencies can result in loops that prevent mount namespaces from
> > every being freed.  I avoid creating those circular dependencies by
> > adding a sequence number to the mount namespace and require all bind
> > mounts be of a younger mount namespace into an older mount namespace.
> > 
> > Add a helper function proc_ns_inode so it is possible to detect when we
> > are attempting to bind mound a namespace inode.
> > 
> > Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> 
> Acked-by: Pavel Emelyanov <xemul@parallels.com>

These patches haven't made it into linux-next or Linus' tree.  (More
worrisome, is that the several online linux-kernel mail archives I've
checked seem to have most of this thread, but not Eric's original
patch.)  What path were they expected to go in by?

thanks,
-serge

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vfs: Add setns support for the mount namespace
  2012-05-18 19:44       ` Serge E. Hallyn
@ 2012-05-18 22:47         ` Eric W. Biederman
  0 siblings, 0 replies; 10+ messages in thread
From: Eric W. Biederman @ 2012-05-18 22:47 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Pavel Emelyanov, Al Viro, Daniel Lezcano,
	Linux Kernel Mailing List, Andrew Morton, linux-fsdevel,
	Miklos Szeredi

"Serge E. Hallyn" <serge@hallyn.com> writes:

> Quoting Pavel Emelyanov (xemul@parallels.com):
>> On 05/12/2012 12:54 AM, Eric W. Biederman wrote:
>> > 
>> > setns support for the mount namespace is a little tricky as an arbitrary
>> > decision must be made about what to set fs->root and fs->pwd to, as
>> > there is no expectation of a relationship between the two mount
>> > namespaces.  Therefore I arbitrarily find the root mount point, and
>> > follow every mount on top of it to find the top of the mount stack.
>> > Then I set fs->root and fs->pwd to that location.  The topmost root of
>> > the mount stack seems like a reasonable place to be.
>> > 
>> > Bind mount support for the mount namespace inodes has the possibility of
>> > creating circular dependencies between mount namespaces.  Circular
>> > dependencies can result in loops that prevent mount namespaces from
>> > every being freed.  I avoid creating those circular dependencies by
>> > adding a sequence number to the mount namespace and require all bind
>> > mounts be of a younger mount namespace into an older mount namespace.
>> > 
>> > Add a helper function proc_ns_inode so it is possible to detect when we
>> > are attempting to bind mound a namespace inode.
>> > 
>> > Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>> 
>> Acked-by: Pavel Emelyanov <xemul@parallels.com>
>
> These patches haven't made it into linux-next or Linus' tree.  (More
> worrisome, is that the several online linux-kernel mail archives I've
> checked seem to have most of this thread, but not Eric's original
> patch.)  What path were they expected to go in by?

I was hoping Al Viro would pick it up in the vfs tree, or at least
review the patch.  With a backup possibility that Andrew Morton might.

Given that the merge window is likely to open in next couple of days
I plan to resubmit this after the merge window closes. 

I don't have a clue what happened to the archives that day but it does
look like there was a significant mail hiccup as the number of messages
archived are way down.

Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files
  2012-05-12 11:40   ` Pavel Emelyanov
@ 2012-05-26 15:14     ` Eric W. Biederman
  0 siblings, 0 replies; 10+ messages in thread
From: Eric W. Biederman @ 2012-05-26 15:14 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: Daniel Lezcano, Linux Kernel Mailing List, Serge E. Hallyn,
	Andrew Morton

Pavel Emelyanov <xemul@parallels.com> writes:

> On 05/11/2012 09:07 PM, Eric W. Biederman wrote:
>> Pavel Emelyanov <xemul@parallels.com> writes:
>> 
>>> Some time ago we tried to expose kernel object IDs to the user space to
>>> let it possible to detect shared mm, fs, etc. The namespaces' IDs were
>>> included in this set and Eric proposed, that we'd better expose the ID
>>> in the stat's st_ino field.
>> 
>> A quick question.  With kcmp you have something that is at least in
>> principle usable for checkpoint restart.
>
> Not only in principle. We do use it already ;)
>
>> Are wanting this for checkpoint restart or something else?
>
> For me -- checkpoint restart only. I'm perfectly fine with checking 
> namespaces sharing with kcmp syscall, but you proposed to show ns ID
> in proc inode.

I still think it makes a lot of sense to use stable inode number for
each filesystem.  I am travelling at the moment and I can't find a copy
of your original patch I plan to be home by the end of the week, so
I will have to dig it up then.

>From what I remember the something seemed off with your inode
generation.  From my quick glance earlier it looked a bit wrong.  My
earlier attempt was to essentially export
fs/proc/generic.c:get_inode_number and just have a field in the various
namespaces to store that number.  Plus a method to read that field.
Your patch from my memory seemd to be doing something more than that.

My original plan was to finish setns support before moving on to
stable inode numbers.  But that didn't work out.  I will reevaluate
as I sort out what I need to get done for the next merge window.

I was quite disappointed with the lack of review my mount namespace
patch got from Viro and the other vfs people.  So I think I will
probably have to host a tree myself.  At which point stable
inode support won't be hard. 

It would be good to have kcmp support for namespaces as well as having
an id.  I think the merged functionality in proc_ns_operations is
probably enough for kcmp as is.

Until I have had a few days to play after the merge window closes and
I can't say what I will really be doing but I do want to get this
chunk of work finished, it is definitely time.

Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2012-05-26 15:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-11 12:25 [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files Pavel Emelyanov
2012-05-11 12:25 ` [PATCH 2/2] ns: Add proc_ns_operations for mount namespaces Pavel Emelyanov
2012-05-11 17:05   ` Eric W. Biederman
2012-05-12 11:42     ` Pavel Emelyanov
     [not found]   ` <87mx5e5tho.fsf_-_@xmission.com>
2012-05-12 11:41     ` [PATCH] vfs: Add setns support for the mount namespace Pavel Emelyanov
2012-05-18 19:44       ` Serge E. Hallyn
2012-05-18 22:47         ` Eric W. Biederman
2012-05-11 17:07 ` [PATCH 1/2] proc: Show ns-based inode numbers for /proc/pid/ns/* files Eric W. Biederman
2012-05-12 11:40   ` Pavel Emelyanov
2012-05-26 15:14     ` Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.