linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
@ 2013-08-27 21:44 Eric W. Biederman
       [not found] ` <878uzmhkqg.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Eric W. Biederman @ 2013-08-27 21:44 UTC (permalink / raw)
  To: Linux Containers
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA


Rely on the fact that another flavor of the filesystem is already
mounted and do not rely on state in the user namespace.

Verify that the mounted filesystem is not covered in any significant
way.  I would love to verify that the previously mounted filesystem
has no mounts on top but there are at least the directories
/proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
for other filesystems to mount on top of.

Refactor the test into a function named fs_fully_visible and call that
function from the mount routines of proc and sysfs.  This makes this
test local to the filesystems involved and the results current of when
the mounts take place, removing a weird threading of the user
namespace, the mount namespace and the filesystems themselves.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 fs/namespace.c                 |   37 +++++++++++++++++++++++++------------
 fs/proc/root.c                 |    7 +++++--
 fs/sysfs/mount.c               |    3 ++-
 include/linux/fs.h             |    1 +
 include/linux/user_namespace.h |    4 ----
 kernel/user.c                  |    2 --
 kernel/user_namespace.c        |    2 --
 7 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 64627f8..877e427 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2867,25 +2867,38 @@ bool current_chrooted(void)
 	return chrooted;
 }
 
-void update_mnt_policy(struct user_namespace *userns)
+bool fs_fully_visible(struct file_system_type *type)
 {
 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
 	struct mount *mnt;
+	bool visible = false;
 
-	down_read(&namespace_sem);
+	if (unlikely(!ns))
+		return false;
+
+	namespace_lock();
 	list_for_each_entry(mnt, &ns->list, mnt_list) {
-		switch (mnt->mnt.mnt_sb->s_magic) {
-		case SYSFS_MAGIC:
-			userns->may_mount_sysfs = true;
-			break;
-		case PROC_SUPER_MAGIC:
-			userns->may_mount_proc = true;
-			break;
+		struct mount *child;
+		if (mnt->mnt.mnt_sb->s_type != type)
+			continue;
+
+		/* This mount is not fully visible if there are any child mounts
+		 * that cover anything except for empty directories.
+		 */
+		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+			struct inode *inode = child->mnt_mountpoint->d_inode;
+			if (!S_ISDIR(inode->i_mode))
+				goto next;
+			if (inode->i_nlink != 2)
+				goto next;
 		}
-		if (userns->may_mount_sysfs && userns->may_mount_proc)
-			break;
+		visible = true;
+		goto found;
+	next:	;
 	}
-	up_read(&namespace_sem);
+found:
+	namespace_unlock();
+	return visible;
 }
 
 static void *mntns_get(struct task_struct *task)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 38bd5d4..45e5fb7 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -110,8 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
 		ns = task_active_pid_ns(current);
 		options = data;
 
-		if (!current_user_ns()->may_mount_proc ||
-		    !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+			return ERR_PTR(-EPERM);
+
+		/* Does the mounter have privilege over the pid namespace? */
+		if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
 			return ERR_PTR(-EPERM);
 	}
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index afd8327..4a2da3a 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -112,7 +112,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	struct super_block *sb;
 	int error;
 
-	if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+	if (!(flags & MS_KERNMOUNT) && !capable(CAP_SYS_ADMIN) &&
+	    !fs_fully_visible(fs_type))
 		return ERR_PTR(-EPERM);
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9818747..3050c62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1897,6 +1897,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
 extern bool our_mnt(struct vfsmount *mnt);
+extern bool fs_fully_visible(struct file_system_type *);
 
 extern int current_umask(void);
 
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b6b215f..4ce0093 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,8 +26,6 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
-	bool			may_mount_sysfs;
-	bool			may_mount_proc;
 };
 
 extern struct user_namespace init_user_ns;
@@ -84,6 +82,4 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #endif
 
-void update_mnt_policy(struct user_namespace *userns);
-
 #endif /* _LINUX_USER_H */
diff --git a/kernel/user.c b/kernel/user.c
index 69b4c3d..5bbb919 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,8 +51,6 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
-	.may_mount_sysfs = true,
-	.may_mount_proc = true,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index d8c30db..d58ad1e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -97,8 +97,6 @@ int create_user_ns(struct cred *new)
 
 	set_cred_user_ns(new, ns);
 
-	update_mnt_policy(ns);
-
 	return 0;
 }
 
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [REVIEW][PATCH 2/2] sysfs: Restrict mounting sysfs
       [not found] ` <878uzmhkqg.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2013-08-27 21:46   ` Eric W. Biederman
       [not found]     ` <874naahkng.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2013-08-27 21:47   ` [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted Andy Lutomirski
  2013-11-02  6:06   ` Gao feng
  2 siblings, 1 reply; 22+ messages in thread
From: Eric W. Biederman @ 2013-08-27 21:46 UTC (permalink / raw)
  To: Linux Containers
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Greg Kroah-Hartman,
	Andy Lutomirski, linux-kernel-u79uwXL29TY76Z2rM5mHXA


Don't allow mounting sysfs unless the caller has CAP_SYS_ADMIN rights
over the net namespace.  The principle here is if you create or have
capabilities over it you can mount it, otherwise you get to live with
what other people have mounted.

Instead of testing this with a straight forward ns_capable call,
perform this check the long and torturous way with kobject helpers,
this keeps direct knowledge of namespaces out of sysfs, and preserves
the existing sysfs abstractions.

Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 fs/sysfs/mount.c           |   12 +++++++++---
 include/linux/kobject_ns.h |    2 ++
 lib/kobject.c              |   12 ++++++++++++
 net/core/net-sysfs.c       |    8 ++++++++
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 4a2da3a..8c69ef4 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -112,9 +112,15 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	struct super_block *sb;
 	int error;
 
-	if (!(flags & MS_KERNMOUNT) && !capable(CAP_SYS_ADMIN) &&
-	    !fs_fully_visible(fs_type))
-		return ERR_PTR(-EPERM);
+	if (!(flags & MS_KERNMOUNT)) {
+		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+			return ERR_PTR(-EPERM);
+
+		for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
+			if (!kobj_ns_current_may_mount(type))
+				return ERR_PTR(-EPERM);
+		}
+	}
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index f66b065..df32d25 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -39,6 +39,7 @@ enum kobj_ns_type {
  */
 struct kobj_ns_type_operations {
 	enum kobj_ns_type type;
+	bool (*current_may_mount)(void);
 	void *(*grab_current_ns)(void);
 	const void *(*netlink_ns)(struct sock *sk);
 	const void *(*initial_ns)(void);
@@ -50,6 +51,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type);
 const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
 const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
 
+bool kobj_ns_current_may_mount(enum kobj_ns_type type);
 void *kobj_ns_grab_current(enum kobj_ns_type type);
 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
 const void *kobj_ns_initial(enum kobj_ns_type type);
diff --git a/lib/kobject.c b/lib/kobject.c
index 4a1f33d..1853bd9 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -915,6 +915,18 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
 	return kobj_child_ns_ops(kobj->parent);
 }
 
+bool kobj_ns_current_may_mount(enum kobj_ns_type type)
+{
+	bool may_mount = false;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		may_mount = kobj_ns_ops_tbl[type]->current_may_mount();
+	spin_unlock(&kobj_ns_type_lock);
+
+	return may_mount;
+}
 
 void *kobj_ns_grab_current(enum kobj_ns_type type)
 {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 981fed3..0b0ff95 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1157,6 +1157,13 @@ static void remove_queue_kobjects(struct net_device *net)
 #endif
 }
 
+static bool net_current_may_mount(void)
+{
+	struct net *net = current->nsproxy->net_ns;
+
+	return !ns_capable(net->user_ns, CAP_SYS_ADMIN);
+}
+
 static void *net_grab_current_ns(void)
 {
 	struct net *ns = current->nsproxy->net_ns;
@@ -1179,6 +1186,7 @@ static const void *net_netlink_ns(struct sock *sk)
 
 struct kobj_ns_type_operations net_ns_type_operations = {
 	.type = KOBJ_NS_TYPE_NET,
+	.current_may_mount = net_current_may_mount,
 	.grab_current_ns = net_grab_current_ns,
 	.netlink_ns = net_netlink_ns,
 	.initial_ns = net_initial_ns,
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found] ` <878uzmhkqg.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2013-08-27 21:46   ` [REVIEW][PATCH 2/2] sysfs: Restrict mounting sysfs Eric W. Biederman
@ 2013-08-27 21:47   ` Andy Lutomirski
       [not found]     ` <CALCETrWPDzuoaJp2ko5jAbwYUBqSdPjvO5uGo-gZVsS4Wm1PKQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2013-11-02  6:06   ` Gao feng
  2 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2013-08-27 21:47 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux FS Devel, Linux Containers, linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Tue, Aug 27, 2013 at 2:44 PM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>
> Rely on the fact that another flavor of the filesystem is already
> mounted and do not rely on state in the user namespace.

Possibly dumb question: does this check whether the pre-existing mount
has hidepid set?

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]     ` <CALCETrWPDzuoaJp2ko5jAbwYUBqSdPjvO5uGo-gZVsS4Wm1PKQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-08-27 21:57       ` Eric W. Biederman
  2013-09-01  4:45         ` Eric W. Biederman
  0 siblings, 1 reply; 22+ messages in thread
From: Eric W. Biederman @ 2013-08-27 21:57 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linux FS Devel, Linux Containers, linux-kernel-u79uwXL29TY76Z2rM5mHXA

Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:

> On Tue, Aug 27, 2013 at 2:44 PM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>
>> Rely on the fact that another flavor of the filesystem is already
>> mounted and do not rely on state in the user namespace.
>
> Possibly dumb question: does this check whether the pre-existing mount
> has hidepid set?

Not currently. 

It may be worth doing something with respect to hidepid.  I forget what
hidepid tries to do, and I need to dash.  But feel free to cook up a
follow on patch.

My goal is simply to reduce the hack level and increase the readability
and maintainability of the code.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 2/2] sysfs: Restrict mounting sysfs
       [not found]     ` <874naahkng.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2013-08-28 19:00       ` Greg Kroah-Hartman
  2013-09-23 10:33       ` James Hogan
  1 sibling, 0 replies; 22+ messages in thread
From: Greg Kroah-Hartman @ 2013-08-28 19:00 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	Andy Lutomirski, linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Tue, Aug 27, 2013 at 02:46:27PM -0700, Eric W. Biederman wrote:
> 
> Don't allow mounting sysfs unless the caller has CAP_SYS_ADMIN rights
> over the net namespace.  The principle here is if you create or have
> capabilities over it you can mount it, otherwise you get to live with
> what other people have mounted.
> 
> Instead of testing this with a straight forward ns_capable call,
> perform this check the long and torturous way with kobject helpers,
> this keeps direct knowledge of namespaces out of sysfs, and preserves
> the existing sysfs abstractions.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Odd, but makes sense.

Acked-by: Greg Kroah-Hartman <gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
  2013-08-27 21:57       ` Eric W. Biederman
@ 2013-09-01  4:45         ` Eric W. Biederman
       [not found]           ` <87eh99noa0.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Eric W. Biederman @ 2013-09-01  4:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linux Containers, Serge E. Hallyn, Linux FS Devel, linux-kernel

ebiederm@xmission.com (Eric W. Biederman) writes:

> Andy Lutomirski <luto@amacapital.net> writes:
>
>> On Tue, Aug 27, 2013 at 2:44 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>>
>>> Rely on the fact that another flavor of the filesystem is already
>>> mounted and do not rely on state in the user namespace.
>>
>> Possibly dumb question: does this check whether the pre-existing mount
>> has hidepid set?
>
> Not currently. 
>
> It may be worth doing something with respect to hidepid.  I forget what
> hidepid tries to do, and I need to dash.  But feel free to cook up a
> follow on patch.

So I have thought about this a bit more.

hidepid hides the processes that ptrace_may_access will fail on.

You can only reach the point where an unprivileged mount of a pid
namespace is possible if you have created both a user namespace and a
pid namespace.  Which means the creator of the pid namespace will be
capable of ptracing all of the other processes in the pid namespace
(ignoring setns).

So I don't see a point of worry about hidepid or the hidepid gid on
child pid namespaces.  The cases it is attempting to protecting against
really don't exist.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]           ` <87eh99noa0.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2013-09-03 17:40             ` Andy Lutomirski
  0 siblings, 0 replies; 22+ messages in thread
From: Andy Lutomirski @ 2013-09-03 17:40 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux FS Devel, Linux Containers, linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Sat, Aug 31, 2013 at 9:45 PM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org (Eric W. Biederman) writes:
>
>> Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:
>>
>>> On Tue, Aug 27, 2013 at 2:44 PM, Eric W. Biederman
>>> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>>>
>>>> Rely on the fact that another flavor of the filesystem is already
>>>> mounted and do not rely on state in the user namespace.
>>>
>>> Possibly dumb question: does this check whether the pre-existing mount
>>> has hidepid set?
>>
>> Not currently.
>>
>> It may be worth doing something with respect to hidepid.  I forget what
>> hidepid tries to do, and I need to dash.  But feel free to cook up a
>> follow on patch.
>
> So I have thought about this a bit more.
>
> hidepid hides the processes that ptrace_may_access will fail on.
>
> You can only reach the point where an unprivileged mount of a pid
> namespace is possible if you have created both a user namespace and a
> pid namespace.  Which means the creator of the pid namespace will be
> capable of ptracing all of the other processes in the pid namespace
> (ignoring setns).
>
> So I don't see a point of worry about hidepid or the hidepid gid on
> child pid namespaces.  The cases it is attempting to protecting against
> really don't exist.

Fair enough.  I didn't realize that you had to own the pid namespace.

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 2/2] sysfs: Restrict mounting sysfs
       [not found]     ` <874naahkng.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2013-08-28 19:00       ` Greg Kroah-Hartman
@ 2013-09-23 10:33       ` James Hogan
       [not found]         ` <524018EA.9070202-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
  1 sibling, 1 reply; 22+ messages in thread
From: James Hogan @ 2013-09-23 10:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Greg Kroah-Hartman, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA


[-- Attachment #1.1: Type: text/plain, Size: 4416 bytes --]

On 27/08/13 22:46, Eric W. Biederman wrote:
> 
> Don't allow mounting sysfs unless the caller has CAP_SYS_ADMIN rights
> over the net namespace.  The principle here is if you create or have
> capabilities over it you can mount it, otherwise you get to live with
> what other people have mounted.
> 
> Instead of testing this with a straight forward ns_capable call,
> perform this check the long and torturous way with kobject helpers,
> this keeps direct knowledge of namespaces out of sysfs, and preserves
> the existing sysfs abstractions.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Hi,

Unfortunately this patch causes problems for me in v3.12-rc1 if
CONFIG_NET=n. Sysfs fails to mount with EPERM. Having networking enabled
seems an odd prerequisite for mounting sysfs :-P.

Cheers
James

> ---
>  fs/sysfs/mount.c           |   12 +++++++++---
>  include/linux/kobject_ns.h |    2 ++
>  lib/kobject.c              |   12 ++++++++++++
>  net/core/net-sysfs.c       |    8 ++++++++
>  4 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
> index 4a2da3a..8c69ef4 100644
> --- a/fs/sysfs/mount.c
> +++ b/fs/sysfs/mount.c
> @@ -112,9 +112,15 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
>  	struct super_block *sb;
>  	int error;
>  
> -	if (!(flags & MS_KERNMOUNT) && !capable(CAP_SYS_ADMIN) &&
> -	    !fs_fully_visible(fs_type))
> -		return ERR_PTR(-EPERM);
> +	if (!(flags & MS_KERNMOUNT)) {
> +		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
> +			return ERR_PTR(-EPERM);
> +
> +		for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
> +			if (!kobj_ns_current_may_mount(type))
> +				return ERR_PTR(-EPERM);
> +		}
> +	}
>  
>  	info = kzalloc(sizeof(*info), GFP_KERNEL);
>  	if (!info)
> diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
> index f66b065..df32d25 100644
> --- a/include/linux/kobject_ns.h
> +++ b/include/linux/kobject_ns.h
> @@ -39,6 +39,7 @@ enum kobj_ns_type {
>   */
>  struct kobj_ns_type_operations {
>  	enum kobj_ns_type type;
> +	bool (*current_may_mount)(void);
>  	void *(*grab_current_ns)(void);
>  	const void *(*netlink_ns)(struct sock *sk);
>  	const void *(*initial_ns)(void);
> @@ -50,6 +51,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type);
>  const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
>  const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
>  
> +bool kobj_ns_current_may_mount(enum kobj_ns_type type);
>  void *kobj_ns_grab_current(enum kobj_ns_type type);
>  const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
>  const void *kobj_ns_initial(enum kobj_ns_type type);
> diff --git a/lib/kobject.c b/lib/kobject.c
> index 4a1f33d..1853bd9 100644
> --- a/lib/kobject.c
> +++ b/lib/kobject.c
> @@ -915,6 +915,18 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
>  	return kobj_child_ns_ops(kobj->parent);
>  }
>  
> +bool kobj_ns_current_may_mount(enum kobj_ns_type type)
> +{
> +	bool may_mount = false;
> +
> +	spin_lock(&kobj_ns_type_lock);
> +	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
> +	    kobj_ns_ops_tbl[type])
> +		may_mount = kobj_ns_ops_tbl[type]->current_may_mount();
> +	spin_unlock(&kobj_ns_type_lock);
> +
> +	return may_mount;
> +}
>  
>  void *kobj_ns_grab_current(enum kobj_ns_type type)
>  {
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index 981fed3..0b0ff95 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -1157,6 +1157,13 @@ static void remove_queue_kobjects(struct net_device *net)
>  #endif
>  }
>  
> +static bool net_current_may_mount(void)
> +{
> +	struct net *net = current->nsproxy->net_ns;
> +
> +	return !ns_capable(net->user_ns, CAP_SYS_ADMIN);
> +}
> +
>  static void *net_grab_current_ns(void)
>  {
>  	struct net *ns = current->nsproxy->net_ns;
> @@ -1179,6 +1186,7 @@ static const void *net_netlink_ns(struct sock *sk)
>  
>  struct kobj_ns_type_operations net_ns_type_operations = {
>  	.type = KOBJ_NS_TYPE_NET,
> +	.current_may_mount = net_current_may_mount,
>  	.grab_current_ns = net_grab_current_ns,
>  	.netlink_ns = net_netlink_ns,
>  	.initial_ns = net_initial_ns,
> 


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

[-- Attachment #2: Type: text/plain, Size: 205 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linuxfoundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] sysfs: Allow mounting without CONFIG_NET
       [not found]         ` <524018EA.9070202-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
@ 2013-09-23 21:41           ` Eric W. Biederman
       [not found]             ` <87ioxrrzb6.fsf_-_-HxuHnoDHeQZYhcs0q7wBk77fW72O3V7zAL8bYrjMMd8@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Eric W. Biederman @ 2013-09-23 21:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: James Hogan, Greg Kroah-Hartman, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA


In kobj_ns_current_may_mount the default should be to allow the
mount.  The test is only for a single kobj_ns_type at a time, and unless
there is a reason to prevent it the mounting sysfs should be allowed.
Subsystems that are not registered can't have are not involved so can't
have a reason to prevent mounting sysfs.

This is a bug-fix to:
    commit 7dc5dbc879bd0779924b5132a48b731a0bc04a1e
    Author: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
    Date:   Mon Mar 25 20:07:01 2013 -0700
    
        sysfs: Restrict mounting sysfs
        
        Don't allow mounting sysfs unless the caller has CAP_SYS_ADMIN rights
        over the net namespace.  The principle here is if you create or have
        capabilities over it you can mount it, otherwise you get to live with
        what other people have mounted.
        
        Instead of testing this with a straight forward ns_capable call,
        perform this check the long and torturous way with kobject helpers,
        this keeps direct knowledge of namespaces out of sysfs, and preserves
        the existing sysfs abstractions.
        
        Acked-by: Greg Kroah-Hartman <gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org>
        Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

That came in via the userns tree during the 3.12 merge window.
    
Reported-by: James Hogan <james.hogan-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---

 lib/kobject.c |    5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/lib/kobject.c b/lib/kobject.c
index 9621751..669bf19 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -933,10 +933,7 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
 
 bool kobj_ns_current_may_mount(enum kobj_ns_type type)
 {
-	bool may_mount = false;
-
-	if (type == KOBJ_NS_TYPE_NONE)
-		return true;
+	bool may_mount = true;
 
 	spin_lock(&kobj_ns_type_lock);
 	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] sysfs: Allow mounting without CONFIG_NET
       [not found]             ` <87ioxrrzb6.fsf_-_-HxuHnoDHeQZYhcs0q7wBk77fW72O3V7zAL8bYrjMMd8@public.gmane.org>
@ 2013-09-24 11:25               ` James Hogan
  0 siblings, 0 replies; 22+ messages in thread
From: James Hogan @ 2013-09-24 11:25 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Greg Kroah-Hartman, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linus Torvalds

On 23/09/13 22:41, Eric W. Biederman wrote:
> 
> In kobj_ns_current_may_mount the default should be to allow the
> mount.  The test is only for a single kobj_ns_type at a time, and unless
> there is a reason to prevent it the mounting sysfs should be allowed.
> Subsystems that are not registered can't have are not involved so can't
> have a reason to prevent mounting sysfs.
> 
> This is a bug-fix to:
>     commit 7dc5dbc879bd0779924b5132a48b731a0bc04a1e
>     Author: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>     Date:   Mon Mar 25 20:07:01 2013 -0700
>     
>         sysfs: Restrict mounting sysfs
>         
>         Don't allow mounting sysfs unless the caller has CAP_SYS_ADMIN rights
>         over the net namespace.  The principle here is if you create or have
>         capabilities over it you can mount it, otherwise you get to live with
>         what other people have mounted.
>         
>         Instead of testing this with a straight forward ns_capable call,
>         perform this check the long and torturous way with kobject helpers,
>         this keeps direct knowledge of namespaces out of sysfs, and preserves
>         the existing sysfs abstractions.
>         
>         Acked-by: Greg Kroah-Hartman <gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org>
>         Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> 
> That came in via the userns tree during the 3.12 merge window.
>     
> Reported-by: James Hogan <james.hogan-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Tested-by: James Hogan <james.hogan-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>

Thanks
James

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found] ` <878uzmhkqg.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  2013-08-27 21:46   ` [REVIEW][PATCH 2/2] sysfs: Restrict mounting sysfs Eric W. Biederman
  2013-08-27 21:47   ` [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted Andy Lutomirski
@ 2013-11-02  6:06   ` Gao feng
       [not found]     ` <52749663.2000701-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  2 siblings, 1 reply; 22+ messages in thread
From: Gao feng @ 2013-11-02  6:06 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	Andy Lutomirski, linux-kernel-u79uwXL29TY76Z2rM5mHXA

Hi Eric,

On 08/28/2013 05:44 AM, Eric W. Biederman wrote:
> 
> Rely on the fact that another flavor of the filesystem is already
> mounted and do not rely on state in the user namespace.
> 
> Verify that the mounted filesystem is not covered in any significant
> way.  I would love to verify that the previously mounted filesystem
> has no mounts on top but there are at least the directories
> /proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
> for other filesystems to mount on top of.
> 
> Refactor the test into a function named fs_fully_visible and call that
> function from the mount routines of proc and sysfs.  This makes this
> test local to the filesystems involved and the results current of when
> the mounts take place, removing a weird threading of the user
> namespace, the mount namespace and the filesystems themselves.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---
>  fs/namespace.c                 |   37 +++++++++++++++++++++++++------------
>  fs/proc/root.c                 |    7 +++++--
>  fs/sysfs/mount.c               |    3 ++-
>  include/linux/fs.h             |    1 +
>  include/linux/user_namespace.h |    4 ----
>  kernel/user.c                  |    2 --
>  kernel/user_namespace.c        |    2 --
>  7 files changed, 33 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 64627f8..877e427 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -2867,25 +2867,38 @@ bool current_chrooted(void)
>  	return chrooted;
>  }
>  
> -void update_mnt_policy(struct user_namespace *userns)
> +bool fs_fully_visible(struct file_system_type *type)
>  {
>  	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
>  	struct mount *mnt;
> +	bool visible = false;
>  
> -	down_read(&namespace_sem);
> +	if (unlikely(!ns))
> +		return false;
> +
> +	namespace_lock();
>  	list_for_each_entry(mnt, &ns->list, mnt_list) {
> -		switch (mnt->mnt.mnt_sb->s_magic) {
> -		case SYSFS_MAGIC:
> -			userns->may_mount_sysfs = true;
> -			break;
> -		case PROC_SUPER_MAGIC:
> -			userns->may_mount_proc = true;
> -			break;
> +		struct mount *child;
> +		if (mnt->mnt.mnt_sb->s_type != type)
> +			continue;
> +
> +		/* This mount is not fully visible if there are any child mounts
> +		 * that cover anything except for empty directories.
> +		 */
> +		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
> +			struct inode *inode = child->mnt_mountpoint->d_inode;
> +			if (!S_ISDIR(inode->i_mode))
> +				goto next;
> +			if (inode->i_nlink != 2)
> +				goto next;


I met a problem that proc filesystem failed to mount in user namespace,
The problem is the i_nlink of sysctl entries under proc filesystem is not
2. it always is 1 even it's a directory, see proc_sys_make_inode. and for
btrfs, the i_nlink for an empty dir is 2 too. it seems like depends on the
filesystem itself,not depends on vfs. In my system binfmt_misc is mounted
on /proc/sys/fs/binfmt_misc, and the i_nlink of this directory's inode is
1.

btw, I'm not quite understand what's the inode->i_nlink != 2 here means?
is this directory empty? as I know, when we create a file(not dir) under
a dir, the i_nlink of this dir will not increase.

And another question, it looks like if we don't have proc/sys fs mounted,
then proc/sys will be failed to be mounted?

Thanks!

>  		}
> -		if (userns->may_mount_sysfs && userns->may_mount_proc)
> -			break;
> +		visible = true;
> +		goto found;
> +	next:	;
>  	}
> -	up_read(&namespace_sem);
> +found:
> +	namespace_unlock();
> +	return visible;
>  }
>  
>  static void *mntns_get(struct task_struct *task)
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 38bd5d4..45e5fb7 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -110,8 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
>  		ns = task_active_pid_ns(current);
>  		options = data;
>  
> -		if (!current_user_ns()->may_mount_proc ||
> -		    !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
> +		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
> +			return ERR_PTR(-EPERM);
> +
> +		/* Does the mounter have privilege over the pid namespace? */
> +		if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
>  			return ERR_PTR(-EPERM);
>  	}
>  
> diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
> index afd8327..4a2da3a 100644
> --- a/fs/sysfs/mount.c
> +++ b/fs/sysfs/mount.c
> @@ -112,7 +112,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
>  	struct super_block *sb;
>  	int error;
>  
> -	if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
> +	if (!(flags & MS_KERNMOUNT) && !capable(CAP_SYS_ADMIN) &&
> +	    !fs_fully_visible(fs_type))
>  		return ERR_PTR(-EPERM);
>  
>  	info = kzalloc(sizeof(*info), GFP_KERNEL);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 9818747..3050c62 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1897,6 +1897,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
>  extern int freeze_super(struct super_block *super);
>  extern int thaw_super(struct super_block *super);
>  extern bool our_mnt(struct vfsmount *mnt);
> +extern bool fs_fully_visible(struct file_system_type *);
>  
>  extern int current_umask(void);
>  
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index b6b215f..4ce0093 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -26,8 +26,6 @@ struct user_namespace {
>  	kuid_t			owner;
>  	kgid_t			group;
>  	unsigned int		proc_inum;
> -	bool			may_mount_sysfs;
> -	bool			may_mount_proc;
>  };
>  
>  extern struct user_namespace init_user_ns;
> @@ -84,6 +82,4 @@ static inline void put_user_ns(struct user_namespace *ns)
>  
>  #endif
>  
> -void update_mnt_policy(struct user_namespace *userns);
> -
>  #endif /* _LINUX_USER_H */
> diff --git a/kernel/user.c b/kernel/user.c
> index 69b4c3d..5bbb919 100644
> --- a/kernel/user.c
> +++ b/kernel/user.c
> @@ -51,8 +51,6 @@ struct user_namespace init_user_ns = {
>  	.owner = GLOBAL_ROOT_UID,
>  	.group = GLOBAL_ROOT_GID,
>  	.proc_inum = PROC_USER_INIT_INO,
> -	.may_mount_sysfs = true,
> -	.may_mount_proc = true,
>  };
>  EXPORT_SYMBOL_GPL(init_user_ns);
>  
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index d8c30db..d58ad1e 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -97,8 +97,6 @@ int create_user_ns(struct cred *new)
>  
>  	set_cred_user_ns(new, ns);
>  
> -	update_mnt_policy(ns);
> -
>  	return 0;
>  }
>  
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]     ` <52749663.2000701-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2013-11-04  7:00       ` Janne Karhunen
       [not found]         ` <CAE=NcrY+CzX+H4XQTdGj7CSZ98a5T=bNgT6=jGZzcjyaHb-ttw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2013-11-08  2:33       ` Gao feng
  1 sibling, 1 reply; 22+ messages in thread
From: Janne Karhunen @ 2013-11-04  7:00 UTC (permalink / raw)
  To: Gao feng
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Kernel Mailing List,
	Linux Containers, Eric W. Biederman, Andy Lutomirski

On Sat, Nov 2, 2013 at 8:06 AM, Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:

> And another question, it looks like if we don't have proc/sys fs mounted,
> then proc/sys will be failed to be mounted?

I have been wondering the same. Was quite some illogical surprise that
we have to be doing overlay mounts. This is the exact opposite from what
anyone would expect.


-- 
Janne

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]     ` <52749663.2000701-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  2013-11-04  7:00       ` Janne Karhunen
@ 2013-11-08  2:33       ` Gao feng
       [not found]         ` <527C4D88.10907-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  1 sibling, 1 reply; 22+ messages in thread
From: Gao feng @ 2013-11-08  2:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	Andy Lutomirski, linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 11/02/2013 02:06 PM, Gao feng wrote:
> Hi Eric,
> 
> On 08/28/2013 05:44 AM, Eric W. Biederman wrote:
>>
>> Rely on the fact that another flavor of the filesystem is already
>> mounted and do not rely on state in the user namespace.
>>
>> Verify that the mounted filesystem is not covered in any significant
>> way.  I would love to verify that the previously mounted filesystem
>> has no mounts on top but there are at least the directories
>> /proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
>> for other filesystems to mount on top of.
>>
>> Refactor the test into a function named fs_fully_visible and call that
>> function from the mount routines of proc and sysfs.  This makes this
>> test local to the filesystems involved and the results current of when
>> the mounts take place, removing a weird threading of the user
>> namespace, the mount namespace and the filesystems themselves.
>>
>> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>> ---
>>  fs/namespace.c                 |   37 +++++++++++++++++++++++++------------
>>  fs/proc/root.c                 |    7 +++++--
>>  fs/sysfs/mount.c               |    3 ++-
>>  include/linux/fs.h             |    1 +
>>  include/linux/user_namespace.h |    4 ----
>>  kernel/user.c                  |    2 --
>>  kernel/user_namespace.c        |    2 --
>>  7 files changed, 33 insertions(+), 23 deletions(-)
>>
>> diff --git a/fs/namespace.c b/fs/namespace.c
>> index 64627f8..877e427 100644
>> --- a/fs/namespace.c
>> +++ b/fs/namespace.c
>> @@ -2867,25 +2867,38 @@ bool current_chrooted(void)
>>  	return chrooted;
>>  }
>>  
>> -void update_mnt_policy(struct user_namespace *userns)
>> +bool fs_fully_visible(struct file_system_type *type)
>>  {
>>  	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
>>  	struct mount *mnt;
>> +	bool visible = false;
>>  
>> -	down_read(&namespace_sem);
>> +	if (unlikely(!ns))
>> +		return false;
>> +
>> +	namespace_lock();
>>  	list_for_each_entry(mnt, &ns->list, mnt_list) {
>> -		switch (mnt->mnt.mnt_sb->s_magic) {
>> -		case SYSFS_MAGIC:
>> -			userns->may_mount_sysfs = true;
>> -			break;
>> -		case PROC_SUPER_MAGIC:
>> -			userns->may_mount_proc = true;
>> -			break;
>> +		struct mount *child;
>> +		if (mnt->mnt.mnt_sb->s_type != type)
>> +			continue;
>> +
>> +		/* This mount is not fully visible if there are any child mounts
>> +		 * that cover anything except for empty directories.
>> +		 */
>> +		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
>> +			struct inode *inode = child->mnt_mountpoint->d_inode;
>> +			if (!S_ISDIR(inode->i_mode))
>> +				goto next;
>> +			if (inode->i_nlink != 2)
>> +				goto next;
> 
> 
> I met a problem that proc filesystem failed to mount in user namespace,
> The problem is the i_nlink of sysctl entries under proc filesystem is not
> 2. it always is 1 even it's a directory, see proc_sys_make_inode. and for
> btrfs, the i_nlink for an empty dir is 2 too. it seems like depends on the
> filesystem itself,not depends on vfs. In my system binfmt_misc is mounted
> on /proc/sys/fs/binfmt_misc, and the i_nlink of this directory's inode is
> 1.
> 
> btw, I'm not quite understand what's the inode->i_nlink != 2 here means?
> is this directory empty? as I know, when we create a file(not dir) under
> a dir, the i_nlink of this dir will not increase.
> 
> And another question, it looks like if we don't have proc/sys fs mounted,
> then proc/sys will be failed to be mounted?
> 

Any Idea?? or should we need to revert this patch??

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]         ` <CAE=NcrY+CzX+H4XQTdGj7CSZ98a5T=bNgT6=jGZzcjyaHb-ttw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-11-09  5:22           ` Eric W. Biederman
  0 siblings, 0 replies; 22+ messages in thread
From: Eric W. Biederman @ 2013-11-09  5:22 UTC (permalink / raw)
  To: Janne Karhunen
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Kernel Mailing List,
	Linux Containers, Andy Lutomirski

Janne Karhunen <janne.karhunen-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> On Sat, Nov 2, 2013 at 8:06 AM, Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>
>> And another question, it looks like if we don't have proc/sys fs mounted,
>> then proc/sys will be failed to be mounted?
>
> I have been wondering the same. Was quite some illogical surprise that
> we have to be doing overlay mounts. This is the exact opposite from what
> anyone would expect.

Before I address the question of bugs I will answer the question of
semantics.

In weird cases like chroot jails it is desirable not to mount /sys and /proc
and if root sets that policy it would be unfortunate if user namespaces
overrode the policy.  It limits what an attacker can accomplish.

So yes in the case of /proc and /sys the goal is to limit you to
functionality you could have had with bind mounts.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]         ` <527C4D88.10907-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2013-11-09  5:42           ` Eric W. Biederman
       [not found]             ` <87k3gigmgj.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Eric W. Biederman @ 2013-11-09  5:42 UTC (permalink / raw)
  To: Gao feng
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	Andy Lutomirski, linux-kernel-u79uwXL29TY76Z2rM5mHXA

Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> writes:

> On 11/02/2013 02:06 PM, Gao feng wrote:
>> Hi Eric,
>> 
>> On 08/28/2013 05:44 AM, Eric W. Biederman wrote:
>>>
>>> Rely on the fact that another flavor of the filesystem is already
>>> mounted and do not rely on state in the user namespace.
>>>
>>> Verify that the mounted filesystem is not covered in any significant
>>> way.  I would love to verify that the previously mounted filesystem
>>> has no mounts on top but there are at least the directories
>>> /proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
>>> for other filesystems to mount on top of.
>>>
>>> Refactor the test into a function named fs_fully_visible and call that
>>> function from the mount routines of proc and sysfs.  This makes this
>>> test local to the filesystems involved and the results current of when
>>> the mounts take place, removing a weird threading of the user
>>> namespace, the mount namespace and the filesystems themselves.
>>>
>>> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>>> ---
>>>  fs/namespace.c                 |   37 +++++++++++++++++++++++++------------
>>>  fs/proc/root.c                 |    7 +++++--
>>>  fs/sysfs/mount.c               |    3 ++-
>>>  include/linux/fs.h             |    1 +
>>>  include/linux/user_namespace.h |    4 ----
>>>  kernel/user.c                  |    2 --
>>>  kernel/user_namespace.c        |    2 --
>>>  7 files changed, 33 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/fs/namespace.c b/fs/namespace.c
>>> index 64627f8..877e427 100644
>>> --- a/fs/namespace.c
>>> +++ b/fs/namespace.c
>>> @@ -2867,25 +2867,38 @@ bool current_chrooted(void)
>>>  	return chrooted;
>>>  }
>>>  
>>> -void update_mnt_policy(struct user_namespace *userns)
>>> +bool fs_fully_visible(struct file_system_type *type)
>>>  {
>>>  	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
>>>  	struct mount *mnt;
>>> +	bool visible = false;
>>>  
>>> -	down_read(&namespace_sem);
>>> +	if (unlikely(!ns))
>>> +		return false;
>>> +
>>> +	namespace_lock();
>>>  	list_for_each_entry(mnt, &ns->list, mnt_list) {
>>> -		switch (mnt->mnt.mnt_sb->s_magic) {
>>> -		case SYSFS_MAGIC:
>>> -			userns->may_mount_sysfs = true;
>>> -			break;
>>> -		case PROC_SUPER_MAGIC:
>>> -			userns->may_mount_proc = true;
>>> -			break;
>>> +		struct mount *child;
>>> +		if (mnt->mnt.mnt_sb->s_type != type)
>>> +			continue;
>>> +
>>> +		/* This mount is not fully visible if there are any child mounts
>>> +		 * that cover anything except for empty directories.
>>> +		 */
>>> +		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
>>> +			struct inode *inode = child->mnt_mountpoint->d_inode;
>>> +			if (!S_ISDIR(inode->i_mode))
>>> +				goto next;
>>> +			if (inode->i_nlink != 2)
>>> +				goto next;
>> 
>> 
>> I met a problem that proc filesystem failed to mount in user namespace,
>> The problem is the i_nlink of sysctl entries under proc filesystem is not
>> 2. it always is 1 even it's a directory, see proc_sys_make_inode. and for
>> btrfs, the i_nlink for an empty dir is 2 too. it seems like depends on the
>> filesystem itself,not depends on vfs. In my system binfmt_misc is mounted
>> on /proc/sys/fs/binfmt_misc, and the i_nlink of this directory's inode is
>> 1.

Yes. 1 is what filesystems that are too lazy to count the number of
links to a directory return, and /proc/sys is currently such a
filesystem.

Ordinarily nlink == 2 means a directory does not have any subdirectories.

>> btw, I'm not quite understand what's the inode->i_nlink != 2 here means?
>> is this directory empty? as I know, when we create a file(not dir) under
>> a dir, the i_nlink of this dir will not increase.
>> 
>> And another question, it looks like if we don't have proc/sys fs mounted,
>> then proc/sys will be failed to be mounted?
>> 
>
> Any Idea?? or should we need to revert this patch??

The patch is mostly doing what it is supposed to be doing.

Now the code is slightly buggy.  inode->i_nlink will test to see if a
directory has subdirectories but it won't test to see if a directory is
empty.  Where did my brain go when I was writing that test?

Right now I would rather not have the empty directory exception than
remove this code.

The test is a little trickier to write than it might otherwise be
because /proc and /sys tend to be slightly imperfect filesystems.

I think the only way to really test that is to call readdir on the
directory itself :(  I don't like that thought.

I don't know what I was thinking when I wrote that test but I definitely
goofed up.  Grr!

I can certainly filter out any directory with nlink > 2.  That would be
an easy partial step forward.

The real question though is how do I detect directories it is safe to
mount on where there will not be files in them.  I can't call iterate
with the namespace_lock held so things are a bit tricky.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]             ` <87k3gigmgj.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2013-11-13  7:26               ` Gao feng
       [not found]                 ` <5283299B.8080702-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Gao feng @ 2013-11-13  7:26 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	Andy Lutomirski, linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 11/09/2013 01:42 PM, Eric W. Biederman wrote:
> Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> writes:
> 
>> On 11/02/2013 02:06 PM, Gao feng wrote:
>>> Hi Eric,
>>>
>>> On 08/28/2013 05:44 AM, Eric W. Biederman wrote:
>>>>
>>>> Rely on the fact that another flavor of the filesystem is already
>>>> mounted and do not rely on state in the user namespace.
>>>>
>>>> Verify that the mounted filesystem is not covered in any significant
>>>> way.  I would love to verify that the previously mounted filesystem
>>>> has no mounts on top but there are at least the directories
>>>> /proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
>>>> for other filesystems to mount on top of.
>>>>
>>>> Refactor the test into a function named fs_fully_visible and call that
>>>> function from the mount routines of proc and sysfs.  This makes this
>>>> test local to the filesystems involved and the results current of when
>>>> the mounts take place, removing a weird threading of the user
>>>> namespace, the mount namespace and the filesystems themselves.
>>>>
>>>> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>>>> ---
>>>>  fs/namespace.c                 |   37 +++++++++++++++++++++++++------------
>>>>  fs/proc/root.c                 |    7 +++++--
>>>>  fs/sysfs/mount.c               |    3 ++-
>>>>  include/linux/fs.h             |    1 +
>>>>  include/linux/user_namespace.h |    4 ----
>>>>  kernel/user.c                  |    2 --
>>>>  kernel/user_namespace.c        |    2 --
>>>>  7 files changed, 33 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/fs/namespace.c b/fs/namespace.c
>>>> index 64627f8..877e427 100644
>>>> --- a/fs/namespace.c
>>>> +++ b/fs/namespace.c
>>>> @@ -2867,25 +2867,38 @@ bool current_chrooted(void)
>>>>  	return chrooted;
>>>>  }
>>>>  
>>>> -void update_mnt_policy(struct user_namespace *userns)
>>>> +bool fs_fully_visible(struct file_system_type *type)
>>>>  {
>>>>  	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
>>>>  	struct mount *mnt;
>>>> +	bool visible = false;
>>>>  
>>>> -	down_read(&namespace_sem);
>>>> +	if (unlikely(!ns))
>>>> +		return false;
>>>> +
>>>> +	namespace_lock();
>>>>  	list_for_each_entry(mnt, &ns->list, mnt_list) {
>>>> -		switch (mnt->mnt.mnt_sb->s_magic) {
>>>> -		case SYSFS_MAGIC:
>>>> -			userns->may_mount_sysfs = true;
>>>> -			break;
>>>> -		case PROC_SUPER_MAGIC:
>>>> -			userns->may_mount_proc = true;
>>>> -			break;
>>>> +		struct mount *child;
>>>> +		if (mnt->mnt.mnt_sb->s_type != type)
>>>> +			continue;
>>>> +
>>>> +		/* This mount is not fully visible if there are any child mounts
>>>> +		 * that cover anything except for empty directories.
>>>> +		 */
>>>> +		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
>>>> +			struct inode *inode = child->mnt_mountpoint->d_inode;
>>>> +			if (!S_ISDIR(inode->i_mode))
>>>> +				goto next;
>>>> +			if (inode->i_nlink != 2)
>>>> +				goto next;
>>>
>>>
>>> I met a problem that proc filesystem failed to mount in user namespace,
>>> The problem is the i_nlink of sysctl entries under proc filesystem is not
>>> 2. it always is 1 even it's a directory, see proc_sys_make_inode. and for
>>> btrfs, the i_nlink for an empty dir is 2 too. it seems like depends on the
>>> filesystem itself,not depends on vfs. In my system binfmt_misc is mounted
>>> on /proc/sys/fs/binfmt_misc, and the i_nlink of this directory's inode is
>>> 1.
> 
> Yes. 1 is what filesystems that are too lazy to count the number of
> links to a directory return, and /proc/sys is currently such a
> filesystem.
> 
> Ordinarily nlink == 2 means a directory does not have any subdirectories.
> 
>>> btw, I'm not quite understand what's the inode->i_nlink != 2 here means?
>>> is this directory empty? as I know, when we create a file(not dir) under
>>> a dir, the i_nlink of this dir will not increase.
>>>
>>> And another question, it looks like if we don't have proc/sys fs mounted,
>>> then proc/sys will be failed to be mounted?
>>>
>>
>> Any Idea?? or should we need to revert this patch??
> 
> The patch is mostly doing what it is supposed to be doing.
> 
> Now the code is slightly buggy.  inode->i_nlink will test to see if a
> directory has subdirectories but it won't test to see if a directory is
> empty.  Where did my brain go when I was writing that test?
> 
> Right now I would rather not have the empty directory exception than
> remove this code.
> 
> The test is a little trickier to write than it might otherwise be
> because /proc and /sys tend to be slightly imperfect filesystems.
> 
> I think the only way to really test that is to call readdir on the
> directory itself :(  I don't like that thought.
> 
> I don't know what I was thinking when I wrote that test but I definitely
> goofed up.  Grr!
> 
> I can certainly filter out any directory with nlink > 2.  That would be
> an easy partial step forward.
> 
> The real question though is how do I detect directories it is safe to
> mount on where there will not be files in them.  I can't call iterate
> with the namespace_lock held so things are a bit tricky.
> 

I know this problem is not easy to be resolved. why not let the user
make the decision?  maybe we can introduce a new mount option MS_LOCK,
if user wants to use mount to hide something, he should use mount with
option MS_LOCK. so the unpriviged user can't umount this filesystem and
fail to mount the filesystem if one of it's child mount is mounted with
MS_LOCK option otherwise he use MS_REC too.

Thanks

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]                 ` <5283299B.8080702-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2013-11-14 11:10                   ` Gao feng
       [not found]                     ` <5284AF90.7060506-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Gao feng @ 2013-11-14 11:10 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Linux Containers,
	Andy Lutomirski, linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 11/13/2013 03:26 PM, Gao feng wrote:
> On 11/09/2013 01:42 PM, Eric W. Biederman wrote:
>> Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> writes:
>>
>>> On 11/02/2013 02:06 PM, Gao feng wrote:
>>>> Hi Eric,
>>>>
>>>> On 08/28/2013 05:44 AM, Eric W. Biederman wrote:
>>>>>
>>>>> Rely on the fact that another flavor of the filesystem is already
>>>>> mounted and do not rely on state in the user namespace.
>>>>>
>>>>> Verify that the mounted filesystem is not covered in any significant
>>>>> way.  I would love to verify that the previously mounted filesystem
>>>>> has no mounts on top but there are at least the directories
>>>>> /proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
>>>>> for other filesystems to mount on top of.
>>>>>
>>>>> Refactor the test into a function named fs_fully_visible and call that
>>>>> function from the mount routines of proc and sysfs.  This makes this
>>>>> test local to the filesystems involved and the results current of when
>>>>> the mounts take place, removing a weird threading of the user
>>>>> namespace, the mount namespace and the filesystems themselves.
>>>>>
>>>>> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>>>>> ---
>>>>>  fs/namespace.c                 |   37 +++++++++++++++++++++++++------------
>>>>>  fs/proc/root.c                 |    7 +++++--
>>>>>  fs/sysfs/mount.c               |    3 ++-
>>>>>  include/linux/fs.h             |    1 +
>>>>>  include/linux/user_namespace.h |    4 ----
>>>>>  kernel/user.c                  |    2 --
>>>>>  kernel/user_namespace.c        |    2 --
>>>>>  7 files changed, 33 insertions(+), 23 deletions(-)
>>>>>
>>>>> diff --git a/fs/namespace.c b/fs/namespace.c
>>>>> index 64627f8..877e427 100644
>>>>> --- a/fs/namespace.c
>>>>> +++ b/fs/namespace.c
>>>>> @@ -2867,25 +2867,38 @@ bool current_chrooted(void)
>>>>>  	return chrooted;
>>>>>  }
>>>>>  
>>>>> -void update_mnt_policy(struct user_namespace *userns)
>>>>> +bool fs_fully_visible(struct file_system_type *type)
>>>>>  {
>>>>>  	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
>>>>>  	struct mount *mnt;
>>>>> +	bool visible = false;
>>>>>  
>>>>> -	down_read(&namespace_sem);
>>>>> +	if (unlikely(!ns))
>>>>> +		return false;
>>>>> +
>>>>> +	namespace_lock();
>>>>>  	list_for_each_entry(mnt, &ns->list, mnt_list) {
>>>>> -		switch (mnt->mnt.mnt_sb->s_magic) {
>>>>> -		case SYSFS_MAGIC:
>>>>> -			userns->may_mount_sysfs = true;
>>>>> -			break;
>>>>> -		case PROC_SUPER_MAGIC:
>>>>> -			userns->may_mount_proc = true;
>>>>> -			break;
>>>>> +		struct mount *child;
>>>>> +		if (mnt->mnt.mnt_sb->s_type != type)
>>>>> +			continue;
>>>>> +
>>>>> +		/* This mount is not fully visible if there are any child mounts
>>>>> +		 * that cover anything except for empty directories.
>>>>> +		 */
>>>>> +		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
>>>>> +			struct inode *inode = child->mnt_mountpoint->d_inode;
>>>>> +			if (!S_ISDIR(inode->i_mode))
>>>>> +				goto next;
>>>>> +			if (inode->i_nlink != 2)
>>>>> +				goto next;
>>>>
>>>>
>>>> I met a problem that proc filesystem failed to mount in user namespace,
>>>> The problem is the i_nlink of sysctl entries under proc filesystem is not
>>>> 2. it always is 1 even it's a directory, see proc_sys_make_inode. and for
>>>> btrfs, the i_nlink for an empty dir is 2 too. it seems like depends on the
>>>> filesystem itself,not depends on vfs. In my system binfmt_misc is mounted
>>>> on /proc/sys/fs/binfmt_misc, and the i_nlink of this directory's inode is
>>>> 1.
>>
>> Yes. 1 is what filesystems that are too lazy to count the number of
>> links to a directory return, and /proc/sys is currently such a
>> filesystem.
>>
>> Ordinarily nlink == 2 means a directory does not have any subdirectories.
>>
>>>> btw, I'm not quite understand what's the inode->i_nlink != 2 here means?
>>>> is this directory empty? as I know, when we create a file(not dir) under
>>>> a dir, the i_nlink of this dir will not increase.
>>>>
>>>> And another question, it looks like if we don't have proc/sys fs mounted,
>>>> then proc/sys will be failed to be mounted?
>>>>
>>>
>>> Any Idea?? or should we need to revert this patch??
>>
>> The patch is mostly doing what it is supposed to be doing.
>>
>> Now the code is slightly buggy.  inode->i_nlink will test to see if a
>> directory has subdirectories but it won't test to see if a directory is
>> empty.  Where did my brain go when I was writing that test?
>>
>> Right now I would rather not have the empty directory exception than
>> remove this code.
>>
>> The test is a little trickier to write than it might otherwise be
>> because /proc and /sys tend to be slightly imperfect filesystems.
>>
>> I think the only way to really test that is to call readdir on the
>> directory itself :(  I don't like that thought.
>>
>> I don't know what I was thinking when I wrote that test but I definitely
>> goofed up.  Grr!
>>
>> I can certainly filter out any directory with nlink > 2.  That would be
>> an easy partial step forward.
>>
>> The real question though is how do I detect directories it is safe to
>> mount on where there will not be files in them.  I can't call iterate
>> with the namespace_lock held so things are a bit tricky.
>>
> 
> I know this problem is not easy to be resolved. why not let the user
> make the decision?  maybe we can introduce a new mount option MS_LOCK,
> if user wants to use mount to hide something, he should use mount with
> option MS_LOCK. so the unpriviged user can't umount this filesystem and
> fail to mount the filesystem if one of it's child mount is mounted with
> MS_LOCK option otherwise he use MS_REC too.
> 

Something like this.

>From 437f33ea366623c7a9d557b2e84cae424876a44f Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
Date: Wed, 13 Nov 2013 16:06:46 +0800
Subject: [PATCH] userns: introduce new mount option MS_LOCK

After commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
vfs: Lock in place mounts from more privileged users,
in userns, the mounts of child mntns which copied from
parent mntns is locked and user has no rights to umount/move
them, it's too strict.

The core purpose of above commit is trying to prevent
unprivileged user from accessing files hidden by mount.
This patch introduces a new mount option MS_LOCK, this
gives user the capable to mount filesystem as the type
of lock if he wants to use mount to hide something.

And this patch also makes it possible to resolve the
problem brought by commit e51db73532955dc5eaba4235e62b74b460709d5b,
the i_nlink is incorrect in some filesystems.

Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 fs/namespace.c          | 10 ++++------
 fs/proc_namespace.c     |  1 +
 include/uapi/linux/fs.h |  1 +
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 7097fc7..8e6b3e8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -830,10 +830,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
 		mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;

-	/* Don't allow unprivileged users to reveal what is under a mount */
-	if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
-		mnt->mnt.mnt_flags |= MNT_LOCKED;
-
 	atomic_inc(&sb->s_active);
 	mnt->mnt.mnt_sb = sb;
 	mnt->mnt.mnt_root = dget(root);
@@ -2359,10 +2355,12 @@ long do_mount(const char *dev_name, const char *dir_name,
 		mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
 	if (flags & MS_RDONLY)
 		mnt_flags |= MNT_READONLY;
+	if (flags & MS_LOCK)
+		mnt_flags |= MNT_LOCKED;

 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
-		   MS_STRICTATIME);
+		   MS_STRICTATIME | MS_LOCK);

 	if (flags & MS_REMOUNT)
 		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
@@ -2894,7 +2892,7 @@ bool fs_fully_visible(struct file_system_type *type)
 			struct inode *inode = child->mnt_mountpoint->d_inode;
 			if (!S_ISDIR(inode->i_mode))
 				goto next;
-			if (inode->i_nlink != 2)
+			if (!may_mount_lock(child))
 				goto next;
 		}
 		visible = true;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 5fe34c3..fc9b15c 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -65,6 +65,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
 		{ MNT_NOATIME, ",noatime" },
 		{ MNT_NODIRATIME, ",nodiratime" },
 		{ MNT_RELATIME, ",relatime" },
+		{ MNT_LOCKED, ",locked" },
 		{ 0, NULL }
 	};
 	const struct proc_fs_info *fs_infop;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 6c28b61..731d894 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -69,6 +69,7 @@ struct inodes_stat_t {
 #define MS_REMOUNT	32	/* Alter flags of a mounted FS */
 #define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
 #define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_LOCK		512	/* Disallow unpriviged user to umount FS. */
 #define MS_NOATIME	1024	/* Do not update access times. */
 #define MS_NODIRATIME	2048	/* Do not update directory access times */
 #define MS_BIND		4096
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]                     ` <5284AF90.7060506-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2013-11-14 16:54                       ` Andy Lutomirski
  2013-11-15  1:16                         ` Gao feng
  0 siblings, 1 reply; 22+ messages in thread
From: Andy Lutomirski @ 2013-11-14 16:54 UTC (permalink / raw)
  To: Gao feng
  Cc: Linux FS Devel, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	Linux Containers, Eric W. Biederman

On Thu, Nov 14, 2013 at 3:10 AM, Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
> On 11/13/2013 03:26 PM, Gao feng wrote:
>> On 11/09/2013 01:42 PM, Eric W. Biederman wrote:
>>> Right now I would rather not have the empty directory exception than
>>> remove this code.
>>>
>>> The test is a little trickier to write than it might otherwise be
>>> because /proc and /sys tend to be slightly imperfect filesystems.
>>>
>>> I think the only way to really test that is to call readdir on the
>>> directory itself :(  I don't like that thought.
>>>
>>> I don't know what I was thinking when I wrote that test but I definitely
>>> goofed up.  Grr!
>>>
>>> I can certainly filter out any directory with nlink > 2.  That would be
>>> an easy partial step forward.
>>>
>>> The real question though is how do I detect directories it is safe to
>>> mount on where there will not be files in them.  I can't call iterate
>>> with the namespace_lock held so things are a bit tricky.
>>>
>>
>> I know this problem is not easy to be resolved. why not let the user
>> make the decision?  maybe we can introduce a new mount option MS_LOCK,
>> if user wants to use mount to hide something, he should use mount with
>> option MS_LOCK. so the unpriviged user can't umount this filesystem and
>> fail to mount the filesystem if one of it's child mount is mounted with
>> MS_LOCK option otherwise he use MS_REC too.
>>
>
> Something like this.
>
> From 437f33ea366623c7a9d557b2e84cae424876a44f Mon Sep 17 00:00:00 2001
> From: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
> Date: Wed, 13 Nov 2013 16:06:46 +0800
> Subject: [PATCH] userns: introduce new mount option MS_LOCK
>
> After commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
> vfs: Lock in place mounts from more privileged users,
> in userns, the mounts of child mntns which copied from
> parent mntns is locked and user has no rights to umount/move
> them, it's too strict.
>
> The core purpose of above commit is trying to prevent
> unprivileged user from accessing files hidden by mount.
> This patch introduces a new mount option MS_LOCK, this
> gives user the capable to mount filesystem as the type
> of lock if he wants to use mount to hide something.
>

This is bad -- if something was secure in old kernels, it needs to
stay secure.  If you had MS_NOT_A_LOCK, that would be okay, but it
might not solve your problem.

--Andy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
  2013-11-14 16:54                       ` Andy Lutomirski
@ 2013-11-15  1:16                         ` Gao feng
       [not found]                           ` <528575EC.2030309-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Gao feng @ 2013-11-15  1:16 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Eric W. Biederman, Linux Containers, Serge E. Hallyn,
	Linux FS Devel, linux-kernel

On 11/15/2013 12:54 AM, Andy Lutomirski wrote:
> On Thu, Nov 14, 2013 at 3:10 AM, Gao feng <gaofeng@cn.fujitsu.com> wrote:
>> On 11/13/2013 03:26 PM, Gao feng wrote:
>>> On 11/09/2013 01:42 PM, Eric W. Biederman wrote:
>>>> Right now I would rather not have the empty directory exception than
>>>> remove this code.
>>>>
>>>> The test is a little trickier to write than it might otherwise be
>>>> because /proc and /sys tend to be slightly imperfect filesystems.
>>>>
>>>> I think the only way to really test that is to call readdir on the
>>>> directory itself :(  I don't like that thought.
>>>>
>>>> I don't know what I was thinking when I wrote that test but I definitely
>>>> goofed up.  Grr!
>>>>
>>>> I can certainly filter out any directory with nlink > 2.  That would be
>>>> an easy partial step forward.
>>>>
>>>> The real question though is how do I detect directories it is safe to
>>>> mount on where there will not be files in them.  I can't call iterate
>>>> with the namespace_lock held so things are a bit tricky.
>>>>
>>>
>>> I know this problem is not easy to be resolved. why not let the user
>>> make the decision?  maybe we can introduce a new mount option MS_LOCK,
>>> if user wants to use mount to hide something, he should use mount with
>>> option MS_LOCK. so the unpriviged user can't umount this filesystem and
>>> fail to mount the filesystem if one of it's child mount is mounted with
>>> MS_LOCK option otherwise he use MS_REC too.
>>>
>>
>> Something like this.
>>
>> From 437f33ea366623c7a9d557b2e84cae424876a44f Mon Sep 17 00:00:00 2001
>> From: Gao feng <gaofeng@cn.fujitsu.com>
>> Date: Wed, 13 Nov 2013 16:06:46 +0800
>> Subject: [PATCH] userns: introduce new mount option MS_LOCK
>>
>> After commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
>> vfs: Lock in place mounts from more privileged users,
>> in userns, the mounts of child mntns which copied from
>> parent mntns is locked and user has no rights to umount/move
>> them, it's too strict.
>>
>> The core purpose of above commit is trying to prevent
>> unprivileged user from accessing files hidden by mount.
>> This patch introduces a new mount option MS_LOCK, this
>> gives user the capable to mount filesystem as the type
>> of lock if he wants to use mount to hide something.
>>
> 
> This is bad -- if something was secure in old kernels, it needs to
> stay secure.  If you had MS_NOT_A_LOCK, that would be okay, but it
> might not solve your problem.
> 

what you mean old kernels here? I saw patch "vfs: Lock in place mounts from more privileged users"
is merged into upstream in linux 3.12-rc1, this is not very old. I think there
are not many userspace processes rely on this feature.

If user think host needs to be secure, he should use MS_LOCK to mount filesystem.
we can't make decison for user.

Thanks

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]                           ` <528575EC.2030309-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2013-11-15  4:54                             ` Eric W. Biederman
       [not found]                               ` <87txfexo25.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Eric W. Biederman @ 2013-11-15  4:54 UTC (permalink / raw)
  To: Gao feng
  Cc: Linux FS Devel, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski

Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> writes:

> On 11/15/2013 12:54 AM, Andy Lutomirski wrote:
>> On Thu, Nov 14, 2013 at 3:10 AM, Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>> On 11/13/2013 03:26 PM, Gao feng wrote:
>>>> On 11/09/2013 01:42 PM, Eric W. Biederman wrote:
>>>>> Right now I would rather not have the empty directory exception than
>>>>> remove this code.
>>>>>
>>>>> The test is a little trickier to write than it might otherwise be
>>>>> because /proc and /sys tend to be slightly imperfect filesystems.
>>>>>
>>>>> I think the only way to really test that is to call readdir on the
>>>>> directory itself :(  I don't like that thought.
>>>>>
>>>>> I don't know what I was thinking when I wrote that test but I definitely
>>>>> goofed up.  Grr!
>>>>>
>>>>> I can certainly filter out any directory with nlink > 2.  That would be
>>>>> an easy partial step forward.
>>>>>
>>>>> The real question though is how do I detect directories it is safe to
>>>>> mount on where there will not be files in them.  I can't call iterate
>>>>> with the namespace_lock held so things are a bit tricky.
>>>>>
>>>>
>>>> I know this problem is not easy to be resolved. why not let the user
>>>> make the decision?  maybe we can introduce a new mount option MS_LOCK,
>>>> if user wants to use mount to hide something, he should use mount with
>>>> option MS_LOCK. so the unpriviged user can't umount this filesystem and
>>>> fail to mount the filesystem if one of it's child mount is mounted with
>>>> MS_LOCK option otherwise he use MS_REC too.
>>>>
>>>
>>> Something like this.
>>>
>>> From 437f33ea366623c7a9d557b2e84cae424876a44f Mon Sep 17 00:00:00 2001
>>> From: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
>>> Date: Wed, 13 Nov 2013 16:06:46 +0800
>>> Subject: [PATCH] userns: introduce new mount option MS_LOCK
>>>
>>> After commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
>>> vfs: Lock in place mounts from more privileged users,
>>> in userns, the mounts of child mntns which copied from
>>> parent mntns is locked and user has no rights to umount/move
>>> them, it's too strict.
>>>
>>> The core purpose of above commit is trying to prevent
>>> unprivileged user from accessing files hidden by mount.
>>> This patch introduces a new mount option MS_LOCK, this
>>> gives user the capable to mount filesystem as the type
>>> of lock if he wants to use mount to hide something.
>>>
>> 
>> This is bad -- if something was secure in old kernels, it needs to
>> stay secure.  If you had MS_NOT_A_LOCK, that would be okay, but it
>> might not solve your problem.
>> 
>
> what you mean old kernels here? I saw patch "vfs: Lock in place mounts from more privileged users"
> is merged into upstream in linux 3.12-rc1, this is not very old. I think there
> are not many userspace processes rely on this feature.

Sort of true.  Most people aren't that silly.  This feature was added to
defend against a theoretical attack that you can use with mount
namespaces.

In particular the scenario we are concerned with is:

Suppose the file system looks like:

Suppose there are two filesystems a and b that look like:

a:/usr/
a:/usr/my_very_secret_file
a:/dev/
a:/etc/
a:/lib/

b:/bin/
b:/etc/
b:/games/
b:/include/
b:/lib/
b:/lib32/
b:/local/
b:/sbin/
b:/share/
b:/src/

And filesystem b is mounted on a:/usr hiding a:/usr/my_very_secret_file

So the filesystem looks like:

/usr/
/usr/bin/
/usr/etc/
/usr/games/
/usr/include/
/usr/lib/
/usr/lib32/
/usr/local/
/usr/sbin/
/usr/share/
/usr/src/
/dev/
/etc/
/lib/

Without locking mounts into place an unprivileged user can clone the
mount namespace and do "umount /usr" and read /usr/my_very_secret_file.

Most systems don't hide sensitive things with mounts but it is very
possible and guarding against is fairly cheap and easy.  And while a
little annoying it should not be a large impediment to unprivileged user
of the user namespace because pivot root still works.

This thread started talking about bugs in fs_fully_visible.  And those
bugs are fixable and I aim to get to them shortly.  At the very least
I can lie and test for nlink <= 2 which fixes the regression in mounting
proc.

Then I can write the fun version that takes references and drops locks
so it can call the internal version of readdir to see if a directory is
actually empty.

But the principle remains the same we really don't want to reveal
anything that is hidden under a mount on purpose or by mistake.  Just
because then we don't have to think about those things from a security
point of view making everyone's life easier.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]                               ` <87txfexo25.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
@ 2013-11-15  6:14                                 ` Gao feng
       [not found]                                   ` <5285BBE2.7010001-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Gao feng @ 2013-11-15  6:14 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux FS Devel, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski

On 11/15/2013 12:54 PM, Eric W. Biederman wrote:
> Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> writes:
> 
>> On 11/15/2013 12:54 AM, Andy Lutomirski wrote:
>>> On Thu, Nov 14, 2013 at 3:10 AM, Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
>>>> On 11/13/2013 03:26 PM, Gao feng wrote:
>>>>> On 11/09/2013 01:42 PM, Eric W. Biederman wrote:
>>>>>> Right now I would rather not have the empty directory exception than
>>>>>> remove this code.
>>>>>>
>>>>>> The test is a little trickier to write than it might otherwise be
>>>>>> because /proc and /sys tend to be slightly imperfect filesystems.
>>>>>>
>>>>>> I think the only way to really test that is to call readdir on the
>>>>>> directory itself :(  I don't like that thought.
>>>>>>
>>>>>> I don't know what I was thinking when I wrote that test but I definitely
>>>>>> goofed up.  Grr!
>>>>>>
>>>>>> I can certainly filter out any directory with nlink > 2.  That would be
>>>>>> an easy partial step forward.
>>>>>>
>>>>>> The real question though is how do I detect directories it is safe to
>>>>>> mount on where there will not be files in them.  I can't call iterate
>>>>>> with the namespace_lock held so things are a bit tricky.
>>>>>>
>>>>>
>>>>> I know this problem is not easy to be resolved. why not let the user
>>>>> make the decision?  maybe we can introduce a new mount option MS_LOCK,
>>>>> if user wants to use mount to hide something, he should use mount with
>>>>> option MS_LOCK. so the unpriviged user can't umount this filesystem and
>>>>> fail to mount the filesystem if one of it's child mount is mounted with
>>>>> MS_LOCK option otherwise he use MS_REC too.
>>>>>
>>>>
>>>> Something like this.
>>>>
>>>> From 437f33ea366623c7a9d557b2e84cae424876a44f Mon Sep 17 00:00:00 2001
>>>> From: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
>>>> Date: Wed, 13 Nov 2013 16:06:46 +0800
>>>> Subject: [PATCH] userns: introduce new mount option MS_LOCK
>>>>
>>>> After commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
>>>> vfs: Lock in place mounts from more privileged users,
>>>> in userns, the mounts of child mntns which copied from
>>>> parent mntns is locked and user has no rights to umount/move
>>>> them, it's too strict.
>>>>
>>>> The core purpose of above commit is trying to prevent
>>>> unprivileged user from accessing files hidden by mount.
>>>> This patch introduces a new mount option MS_LOCK, this
>>>> gives user the capable to mount filesystem as the type
>>>> of lock if he wants to use mount to hide something.
>>>>
>>>
>>> This is bad -- if something was secure in old kernels, it needs to
>>> stay secure.  If you had MS_NOT_A_LOCK, that would be okay, but it
>>> might not solve your problem.
>>>
>>
>> what you mean old kernels here? I saw patch "vfs: Lock in place mounts from more privileged users"
>> is merged into upstream in linux 3.12-rc1, this is not very old. I think there
>> are not many userspace processes rely on this feature.
> 
> Sort of true.  Most people aren't that silly.  This feature was added to
> defend against a theoretical attack that you can use with mount
> namespaces.
> 
> In particular the scenario we are concerned with is:
> 
> Suppose the file system looks like:
> 
> Suppose there are two filesystems a and b that look like:
> 
> a:/usr/
> a:/usr/my_very_secret_file
> a:/dev/
> a:/etc/
> a:/lib/
> 
> b:/bin/
> b:/etc/
> b:/games/
> b:/include/
> b:/lib/
> b:/lib32/
> b:/local/
> b:/sbin/
> b:/share/
> b:/src/
> 
> And filesystem b is mounted on a:/usr hiding a:/usr/my_very_secret_file
> 
> So the filesystem looks like:
> 
> /usr/
> /usr/bin/
> /usr/etc/
> /usr/games/
> /usr/include/
> /usr/lib/
> /usr/lib32/
> /usr/local/
> /usr/sbin/
> /usr/share/
> /usr/src/
> /dev/
> /etc/
> /lib/
> 
> Without locking mounts into place an unprivileged user can clone the
> mount namespace and do "umount /usr" and read /usr/my_very_secret_file.
> 
> Most systems don't hide sensitive things with mounts but it is very
> possible and guarding against is fairly cheap and easy.  And while a
> little annoying it should not be a large impediment to unprivileged user
> of the user namespace because pivot root still works.
> 
> This thread started talking about bugs in fs_fully_visible.  And those
> bugs are fixable and I aim to get to them shortly.  At the very least
> I can lie and test for nlink <= 2 which fixes the regression in mounting
> proc.
> 
> Then I can write the fun version that takes references and drops locks
> so it can call the internal version of readdir to see if a directory is
> actually empty.
> 
> But the principle remains the same we really don't want to reveal
> anything that is hidden under a mount on purpose or by mistake.  Just
> because then we don't have to think about those things from a security
> point of view making everyone's life easier.
> 

Ok,I agree with you that we should make container security by default.

What's your idea that introduces option MS_NOT_A_LOCK just like Andy's
advisement?

In libvirt, host creates dev and devpts directories for container,then
mount devpts, tmpfs on them and create device nodes inside these dirs
for container. and then in container, these filesystems are moved to
container's /dev/ /dev/pts directory. We really have no need to lock
these mounts. they are just created for container.

Thanks

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted
       [not found]                                   ` <5285BBE2.7010001-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
@ 2013-11-15  8:37                                     ` Eric W. Biederman
  0 siblings, 0 replies; 22+ messages in thread
From: Eric W. Biederman @ 2013-11-15  8:37 UTC (permalink / raw)
  To: Gao feng
  Cc: Linux FS Devel, Linux Containers,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andy Lutomirski

Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> writes:

> Ok,I agree with you that we should make container security by default.
>
> What's your idea that introduces option MS_NOT_A_LOCK just like Andy's
> advisement?

It might be doable but it is unnecessary.

> In libvirt, host creates dev and devpts directories for container,then
> mount devpts, tmpfs on them and create device nodes inside these dirs
> for container. and then in container, these filesystems are moved to
> container's /dev/ /dev/pts directory. We really have no need to lock
> these mounts. they are just created for container.

If the global root creates the namespace and performs all of the mounts
it is unnecessary.

Now I believe you can create those directories for the most part as
non-root in libvirt and gain some interesting applications.

That said if you don't want locked mounts you just just be able to
create a temporary mount namespace as the global root, and do your prep
work.

Then create your unprivileged mount namespace and bind mount the
directories where you want them, and then pivot_root away the bits you
don't want.

There is already more mechanism than I like to deal with the mount
namespace I would really rather not invent/debug/support any more.

Eric

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2013-11-15  8:37 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-27 21:44 [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted Eric W. Biederman
     [not found] ` <878uzmhkqg.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2013-08-27 21:46   ` [REVIEW][PATCH 2/2] sysfs: Restrict mounting sysfs Eric W. Biederman
     [not found]     ` <874naahkng.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2013-08-28 19:00       ` Greg Kroah-Hartman
2013-09-23 10:33       ` James Hogan
     [not found]         ` <524018EA.9070202-1AXoQHu6uovQT0dZR+AlfA@public.gmane.org>
2013-09-23 21:41           ` [PATCH] sysfs: Allow mounting without CONFIG_NET Eric W. Biederman
     [not found]             ` <87ioxrrzb6.fsf_-_-HxuHnoDHeQZYhcs0q7wBk77fW72O3V7zAL8bYrjMMd8@public.gmane.org>
2013-09-24 11:25               ` James Hogan
2013-08-27 21:47   ` [REVIEW][PATCH 1/2] userns: Better restrictions on when proc and sysfs can be mounted Andy Lutomirski
     [not found]     ` <CALCETrWPDzuoaJp2ko5jAbwYUBqSdPjvO5uGo-gZVsS4Wm1PKQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-08-27 21:57       ` Eric W. Biederman
2013-09-01  4:45         ` Eric W. Biederman
     [not found]           ` <87eh99noa0.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2013-09-03 17:40             ` Andy Lutomirski
2013-11-02  6:06   ` Gao feng
     [not found]     ` <52749663.2000701-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2013-11-04  7:00       ` Janne Karhunen
     [not found]         ` <CAE=NcrY+CzX+H4XQTdGj7CSZ98a5T=bNgT6=jGZzcjyaHb-ttw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-11-09  5:22           ` Eric W. Biederman
2013-11-08  2:33       ` Gao feng
     [not found]         ` <527C4D88.10907-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2013-11-09  5:42           ` Eric W. Biederman
     [not found]             ` <87k3gigmgj.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2013-11-13  7:26               ` Gao feng
     [not found]                 ` <5283299B.8080702-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2013-11-14 11:10                   ` Gao feng
     [not found]                     ` <5284AF90.7060506-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2013-11-14 16:54                       ` Andy Lutomirski
2013-11-15  1:16                         ` Gao feng
     [not found]                           ` <528575EC.2030309-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2013-11-15  4:54                             ` Eric W. Biederman
     [not found]                               ` <87txfexo25.fsf-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2013-11-15  6:14                                 ` Gao feng
     [not found]                                   ` <5285BBE2.7010001-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
2013-11-15  8:37                                     ` Eric W. Biederman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).