All of lore.kernel.org
 help / color / mirror / Atom feed
From: Seth Forshee <seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
To: "Eric W. Biederman"
	<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>,
	Alexander Viro
	<viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org>,
	Greg Kroah-Hartman
	<gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org>,
	Jeff Layton <jlayton-vpEMnDpepFuMZCB2o+C8xQ@public.gmane.org>,
	"J. Bruce Fields"
	<bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>,
	Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: linux-bcache-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Serge Hallyn
	<serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>,
	Seth Forshee
	<seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>,
	dm-devel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	Miklos Szeredi <mszeredi-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	linux-security-module-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-raid-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	fuse-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org,
	Austin S Hemmelgarn
	<ahferroin7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,
	linux-mtd-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org,
	selinux-+05T5uksL2qpZYMLLGbcSA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Pavel Tikhomirov
	<ptikhomirov-5HdwGun5lf+gSpxsJD1C4w@public.gmane.org>
Subject: [PATCH v4 03/21] fs: Allow sysfs and cgroupfs to share super blocks between user namespaces
Date: Tue, 26 Apr 2016 14:36:16 -0500	[thread overview]
Message-ID: <1461699396-33000-4-git-send-email-seth.forshee@canonical.com> (raw)
In-Reply-To: <1461699396-33000-1-git-send-email-seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Both of these filesystems already have use cases for mounting the
same super block from multiple user namespaces. For sysfs this
happens when using criu for snapshotting a container, where sysfs
is mounted in the containers network ns but the hosts user ns.
The cgroup filesystem shares the same super block for all mounts
of the same hierarchy regardless of the namespace.

As a result, the restriction on mounting a super block from a
single user namespace creates regressions for existing uses of
these filesystems. For these specific filesystems this
restriction isn't really necessary since the backing store is
objects in kernel memory and thus the ids assigned from inodes
is not subject to translation relative to s_user_ns.

Add a new filesystem flag, FS_USERNS_SHARE_SB, which when set
causes sget_userns() to skip the check of s_user_ns. Set this
flag for the sysfs and cgroup filesystems to fix the
regressions.

Signed-off-by: Seth Forshee <seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
Acked-by: Serge Hallyn <serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA@public.gmane.org>
---
 fs/super.c         | 3 ++-
 fs/sysfs/mount.c   | 3 ++-
 include/linux/fs.h | 1 +
 kernel/cgroup.c    | 4 ++--
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 092a7828442e..ead156b44bf8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -472,7 +472,8 @@ retry:
 		hlist_for_each_entry(old, &type->fs_supers, s_instances) {
 			if (!test(old, data))
 				continue;
-			if (user_ns != old->s_user_ns) {
+			if (!(type->fs_flags & FS_USERNS_SHARE_SB) &&
+			    user_ns != old->s_user_ns) {
 				spin_unlock(&sb_lock);
 				if (s) {
 					up_write(&s->s_umount);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f3db82071cfb..9555accd4322 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -59,7 +59,8 @@ static struct file_system_type sysfs_fs_type = {
 	.name		= "sysfs",
 	.mount		= sysfs_mount,
 	.kill_sb	= sysfs_kill_sb,
-	.fs_flags	= FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+	.fs_flags	= FS_USERNS_VISIBLE | FS_USERNS_MOUNT |
+			  FS_USERNS_SHARE_SB,
 };
 
 int __init sysfs_init(void)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index be0f8023e28c..66a639ec1bc4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1988,6 +1988,7 @@ struct file_system_type {
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
 #define FS_USERNS_VISIBLE	32	/* FS must already be visible */
+#define FS_USERNS_SHARE_SB	64	/* Allow sharing sb between userns-es */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671dc05c0b0f..9c9aa27e531a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2247,14 +2247,14 @@ static struct file_system_type cgroup_fs_type = {
 	.name = "cgroup",
 	.mount = cgroup_mount,
 	.kill_sb = cgroup_kill_sb,
-	.fs_flags = FS_USERNS_MOUNT,
+	.fs_flags = FS_USERNS_MOUNT | FS_USERNS_SHARE_SB,
 };
 
 static struct file_system_type cgroup2_fs_type = {
 	.name = "cgroup2",
 	.mount = cgroup_mount,
 	.kill_sb = cgroup_kill_sb,
-	.fs_flags = FS_USERNS_MOUNT,
+	.fs_flags = FS_USERNS_MOUNT | FS_USERNS_SHARE_SB,
 };
 
 static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-- 
2.7.4


------------------------------------------------------------------------------
Find and fix application performance issues faster with Applications Manager
Applications Manager provides deep performance insights into multiple tiers of
your business applications. It resolves application problems quickly and
reduces your MTTR. Get your free trial!
https://ad.doubleclick.net/ddm/clk/302982198;130105516;z
-- 
fuse-devel mailing list
To unsubscribe or subscribe, visit https://lists.sourceforge.net/lists/listinfo/fuse-devel

WARNING: multiple messages have this Message-ID (diff)
From: Seth Forshee <seth.forshee@canonical.com>
To: "Eric W. Biederman" <ebiederm@xmission.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Jeff Layton <jlayton@poochiereds.net>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	Tejun Heo <tj@kernel.org>, Li Zefan <lizefan@huawei.com>,
	Johannes Weiner <hannes@cmpxchg.org>
Cc: Serge Hallyn <serge.hallyn@canonical.com>,
	Richard Weinberger <richard.weinberger@gmail.com>,
	Austin S Hemmelgarn <ahferroin7@gmail.com>,
	Miklos Szeredi <mszeredi@redhat.com>,
	Pavel Tikhomirov <ptikhomirov@virtuozzo.com>,
	linux-kernel@vger.kernel.org, linux-bcache@vger.kernel.org,
	dm-devel@redhat.com, linux-raid@vger.kernel.org,
	linux-mtd@lists.infradead.org, linux-fsdevel@vger.kernel.org,
	fuse-devel@lists.sourceforge.net,
	linux-security-module@vger.kernel.org, selinux@tycho.nsa.gov,
	cgroups@vger.kernel.org,
	Seth Forshee <seth.forshee@canonical.com>
Subject: [PATCH v4 03/21] fs: Allow sysfs and cgroupfs to share super blocks between user namespaces
Date: Tue, 26 Apr 2016 14:36:16 -0500	[thread overview]
Message-ID: <1461699396-33000-4-git-send-email-seth.forshee@canonical.com> (raw)
In-Reply-To: <1461699396-33000-1-git-send-email-seth.forshee@canonical.com>

Both of these filesystems already have use cases for mounting the
same super block from multiple user namespaces. For sysfs this
happens when using criu for snapshotting a container, where sysfs
is mounted in the containers network ns but the hosts user ns.
The cgroup filesystem shares the same super block for all mounts
of the same hierarchy regardless of the namespace.

As a result, the restriction on mounting a super block from a
single user namespace creates regressions for existing uses of
these filesystems. For these specific filesystems this
restriction isn't really necessary since the backing store is
objects in kernel memory and thus the ids assigned from inodes
is not subject to translation relative to s_user_ns.

Add a new filesystem flag, FS_USERNS_SHARE_SB, which when set
causes sget_userns() to skip the check of s_user_ns. Set this
flag for the sysfs and cgroup filesystems to fix the
regressions.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com>
---
 fs/super.c         | 3 ++-
 fs/sysfs/mount.c   | 3 ++-
 include/linux/fs.h | 1 +
 kernel/cgroup.c    | 4 ++--
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 092a7828442e..ead156b44bf8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -472,7 +472,8 @@ retry:
 		hlist_for_each_entry(old, &type->fs_supers, s_instances) {
 			if (!test(old, data))
 				continue;
-			if (user_ns != old->s_user_ns) {
+			if (!(type->fs_flags & FS_USERNS_SHARE_SB) &&
+			    user_ns != old->s_user_ns) {
 				spin_unlock(&sb_lock);
 				if (s) {
 					up_write(&s->s_umount);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f3db82071cfb..9555accd4322 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -59,7 +59,8 @@ static struct file_system_type sysfs_fs_type = {
 	.name		= "sysfs",
 	.mount		= sysfs_mount,
 	.kill_sb	= sysfs_kill_sb,
-	.fs_flags	= FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+	.fs_flags	= FS_USERNS_VISIBLE | FS_USERNS_MOUNT |
+			  FS_USERNS_SHARE_SB,
 };
 
 int __init sysfs_init(void)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index be0f8023e28c..66a639ec1bc4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1988,6 +1988,7 @@ struct file_system_type {
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
 #define FS_USERNS_VISIBLE	32	/* FS must already be visible */
+#define FS_USERNS_SHARE_SB	64	/* Allow sharing sb between userns-es */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671dc05c0b0f..9c9aa27e531a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2247,14 +2247,14 @@ static struct file_system_type cgroup_fs_type = {
 	.name = "cgroup",
 	.mount = cgroup_mount,
 	.kill_sb = cgroup_kill_sb,
-	.fs_flags = FS_USERNS_MOUNT,
+	.fs_flags = FS_USERNS_MOUNT | FS_USERNS_SHARE_SB,
 };
 
 static struct file_system_type cgroup2_fs_type = {
 	.name = "cgroup2",
 	.mount = cgroup_mount,
 	.kill_sb = cgroup_kill_sb,
-	.fs_flags = FS_USERNS_MOUNT,
+	.fs_flags = FS_USERNS_MOUNT | FS_USERNS_SHARE_SB,
 };
 
 static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-- 
2.7.4

  parent reply	other threads:[~2016-04-26 19:36 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-26 19:36 [PATCH v4 00/21] Support fuse mounts in user namespaces Seth Forshee
2016-04-26 19:36 ` Seth Forshee
2016-04-26 19:36 ` [PATCH v4 01/21] fs: fix a posible leak of allocated superblock Seth Forshee
2016-04-26 19:36 ` [PATCH v4 04/21] block_dev: Support checking inode permissions in lookup_bdev() Seth Forshee
2016-04-26 19:36   ` Seth Forshee
2016-04-26 19:36 ` [PATCH v4 05/21] block_dev: Check permissions towards block device inode when mounting Seth Forshee
2016-04-26 19:36 ` [PATCH v4 06/21] fs: Treat foreign mounts as nosuid Seth Forshee
2016-04-26 19:36   ` Seth Forshee
2016-04-26 19:36 ` [PATCH v4 08/21] userns: Replace in_userns with current_in_userns Seth Forshee
2016-04-26 19:36   ` Seth Forshee
2016-04-26 19:36 ` [PATCH v4 09/21] Smack: Handle labels consistently in untrusted mounts Seth Forshee
2016-04-26 19:36 ` [PATCH v4 10/21] fs: Check for invalid i_uid in may_follow_link() Seth Forshee
2016-04-26 19:36   ` Seth Forshee
2016-05-24 15:55   ` Djalal Harouni
2016-04-26 19:36 ` [PATCH v4 11/21] cred: Reject inodes with invalid ids in set_create_file_as() Seth Forshee
2016-04-26 19:36   ` Seth Forshee
2016-04-26 19:36 ` [PATCH v4 15/21] fs: Don't remove suid for CAP_FSETID in s_user_ns Seth Forshee
2016-04-26 19:36   ` Seth Forshee
     [not found] ` <1461699396-33000-1-git-send-email-seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2016-04-26 19:36   ` [PATCH v4 02/21] fs: Remove check of s_user_ns for existing mounts in fs_fully_visible() Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-04-26 19:36   ` Seth Forshee [this message]
2016-04-26 19:36     ` [PATCH v4 03/21] fs: Allow sysfs and cgroupfs to share super blocks between user namespaces Seth Forshee
2016-04-26 19:36   ` [PATCH v4 07/21] selinux: Add support for unprivileged mounts from " Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-04-26 19:36   ` [PATCH v4 12/21] fs: Refuse uid/gid changes which don't map into s_user_ns Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-04-26 19:36   ` [PATCH v4 13/21] fs: Update posix_acl support to handle user namespace mounts Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-04-26 19:36   ` [PATCH v4 14/21] fs: Allow superblock owner to change ownership of inodes with unmappable ids Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-04-26 19:36   ` [PATCH v4 16/21] fs: Allow superblock owner to access do_remount_sb() Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-04-26 19:36   ` [PATCH v4 17/21] capabilities: Allow privileged user in s_user_ns to set security.* xattrs Seth Forshee
2016-04-26 19:36     ` Seth Forshee
     [not found]     ` <1461699396-33000-18-git-send-email-seth.forshee-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2016-04-27  7:22       ` James Morris
2016-04-27  7:22         ` James Morris
2016-04-26 19:36   ` [PATCH v4 18/21] fuse: Add support for pid namespaces Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-07-20  2:44     ` Sheng Yang
     [not found]       ` <CA+2rt426_pshAauQizcxkfAq16vmEpB4sJ4genW_ucosH3j=zQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-07-20 12:52         ` Seth Forshee
2016-07-20 12:52           ` Seth Forshee
2016-07-20 22:28           ` Sheng Yang
2016-07-21  7:25           ` Miklos Szeredi
2016-07-21  7:25             ` Miklos Szeredi
2016-04-26 19:36   ` [PATCH v4 19/21] fuse: Support fuse filesystems outside of init_user_ns Seth Forshee
2016-04-26 19:36     ` Seth Forshee
2016-04-26 19:36 ` [PATCH v4 20/21] fuse: Restrict allow_other to the superblock's namespace or a descendant Seth Forshee
2016-04-26 19:36 ` [PATCH v4 21/21] fuse: Allow user namespace mounts Seth Forshee
  -- strict thread matches above, loose matches on Subject: below --
2016-04-26 19:30 [PATCH v4 00/21] Support fuse mounts in user namespaces Seth Forshee
     [not found] ` <1461699046-30485-4-git-send-email-seth.forshee@canonical.com>
2016-05-17 22:39   ` [PATCH v4 03/21] fs: Allow sysfs and cgroupfs to share super blocks between " Eric W. Biederman
2016-05-17 23:58     ` Seth Forshee
2016-05-18 15:45       ` Eric W. Biederman
2016-05-18 15:45         ` Eric W. Biederman
     [not found]         ` <8760ubs738.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>
2016-05-18 16:16           ` Seth Forshee
2016-05-18 16:16             ` Seth Forshee
2016-05-18 16:27             ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1461699396-33000-4-git-send-email-seth.forshee@canonical.com \
    --to=seth.forshee-z7wlfzj8ewms+fvcfc7uqw@public.gmane.org \
    --cc=ahferroin7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org \
    --cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=dm-devel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org \
    --cc=fuse-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    --cc=gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org \
    --cc=hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org \
    --cc=jlayton-vpEMnDpepFuMZCB2o+C8xQ@public.gmane.org \
    --cc=linux-bcache-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-mtd-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org \
    --cc=linux-raid-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-security-module-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org \
    --cc=mszeredi-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=ptikhomirov-5HdwGun5lf+gSpxsJD1C4w@public.gmane.org \
    --cc=selinux-+05T5uksL2qpZYMLLGbcSA@public.gmane.org \
    --cc=serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org \
    --cc=tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.