All of lore.kernel.org
 help / color / mirror / Atom feed
From: Valerie Aurora <vaurora@redhat.com>
To: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Miklos Szeredi <miklos@szeredi.hu>, Jan Blunck <jblunck@suse.de>,
	Christoph Hellwig <hch@infradead.org>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Valerie Aurora <vaurora@redhat.com>
Subject: [PATCH 21/38] union-mount: Support for mounting union mount file systems
Date: Fri, 25 Jun 2010 12:05:11 -0700	[thread overview]
Message-ID: <1277492728-11446-22-git-send-email-vaurora@redhat.com> (raw)
In-Reply-To: <1277492728-11446-1-git-send-email-vaurora@redhat.com>

Create and tear down union mount structures on mount.  Check
requirements for union mounts.  This version clones the read-only
mounts as one big tree and points to them from the superblock.

Thanks to Felix Fietkau <nbd@openwrt.org> for a bug fix.
---
 fs/namespace.c        |  245 ++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/super.c            |    1 +
 include/linux/fs.h    |    6 +
 include/linux/mount.h |    2 +
 4 files changed, 252 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 121a137..c310676 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -33,6 +33,7 @@
 #include <asm/unistd.h>
 #include "pnode.h"
 #include "internal.h"
+#include "union.h"
 
 #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
 #define HASH_SIZE (1UL << HASH_SHIFT)
@@ -1051,6 +1052,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
 		propagate_umount(kill);
 
 	list_for_each_entry(p, kill, mnt_hash) {
+		d_free_unions(p->mnt_root);
 		list_del_init(&p->mnt_expire);
 		list_del_init(&p->mnt_list);
 		__touch_mnt_namespace(p->mnt_ns);
@@ -1336,6 +1338,207 @@ static int invent_group_ids(struct vfsmount *mnt, bool recurse)
 	return 0;
 }
 
+/**
+ * check_mnt_union - mount-time checks for union mount
+ *
+ * @mntpnt: path of the mountpoint the new mount will be on
+ * @topmost_mnt: vfsmount of the new file system to be mounted
+ * @mnt_flags: mount flags for the new file system
+ *
+ * Mount-time check of upper and lower layer file systems to see if we
+ * can union mount one on the other.
+ *
+ * The rules:
+ *
+ * Lower layer(s) and submounts read-only: We can't deal with
+ * namespace changes in the lower layers of a union, so the lower
+ * layer must be read-only.  Note that we could possibly convert a
+ * read-write unioned mount into a read-only mount here.
+ *
+ * Lower layer(s) and submounts not shared: The lower layer(s) of a
+ * union mount must not have any changes to its namespace.  Therefore,
+ * it must not be part of any mount event propagation group - i.e.,
+ * shared or slave.
+ *
+ * Union only at roots of file systems: Only permit unioning of file
+ * systems at their root directories.  This allows us to mark entire
+ * mounts as unioned.  Otherwise we must slowly and expensively work
+ * our way up a path looking for a unioned directory before we know if
+ * a path is from a unioned lower layer.
+ *
+ * Topmost layer must be writable to support our readdir()
+ * solution of copying up all lower level entries to the
+ * topmost layer.
+ *
+ * Topmost file system must support whiteouts and fallthrus.
+ *
+ * Topmost file system can't be mounted elsewhere. XXX implement some
+ * kind of marker in the superblock so subsequent mounts are not
+ * possible.
+ *
+ */
+
+static int
+check_mnt_union(struct path *mntpnt, struct vfsmount *topmost_mnt, int mnt_flags)
+{
+	struct vfsmount *p, *lower_mnt = mntpnt->mnt;
+
+	if (!(mnt_flags & MNT_UNION))
+		return 0;
+
+#ifndef CONFIG_UNION_MOUNT
+	return -EINVAL;
+#endif
+	for (p = lower_mnt; p; p = next_mnt(p, lower_mnt)) {
+		if (!(p->mnt_sb->s_flags & MS_RDONLY))
+			return -EBUSY;
+		if (IS_MNT_SHARED(p) || IS_MNT_SLAVE(p))
+			return -EBUSY;
+	}
+
+	if (!IS_ROOT(mntpnt->dentry))
+		return -EINVAL;
+
+	if (mnt_flags & MNT_READONLY)
+		return -EROFS;
+
+	if (!(topmost_mnt->mnt_sb->s_flags & MS_WHITEOUT))
+		return -EINVAL;
+
+	/* XXX top level mount should only be mounted once */
+
+	return 0;
+}
+
+void put_union_sb(struct super_block *sb)
+{
+	struct vfsmount *p, *mnt;
+	LIST_HEAD(umount_list);
+
+	if (!sb->s_ro_union_mnts)
+		return;
+	mnt = sb->s_ro_union_mnts;
+	for (p = mnt; p; p = next_mnt(p, mnt))
+		dec_hard_readonly_users(p);
+	spin_lock(&vfsmount_lock);
+	umount_tree(mnt, 0, &umount_list);
+	spin_unlock(&vfsmount_lock);
+	release_mounts(&umount_list);
+}
+
+static void cleanup_mnt_union(struct vfsmount *topmost_mnt)
+{
+	d_free_unions(topmost_mnt->mnt_root);
+	put_union_sb(topmost_mnt->mnt_sb);
+}
+
+/*
+ * find_union_root - Find the "lowest" (union low) mount to be unioned
+ */
+
+static struct vfsmount *find_union_root(struct vfsmount *topmost_mnt, struct path *mntpnt)
+{
+	struct path this_layer = *mntpnt;
+	struct vfsmount *lowest_mnt = NULL;
+
+	while(check_mnt_union(&this_layer, topmost_mnt, MNT_UNION) == 0) {
+		lowest_mnt = this_layer.mnt;
+		this_layer.dentry = this_layer.mnt->mnt_mountpoint;
+		this_layer.mnt = this_layer.mnt->mnt_parent;
+	}
+	return lowest_mnt;
+}
+
+/*
+ * Build the union stack for the root dir.  Note that topmost_mnt is
+ * not connected to the mount tree yet and that the cloned tree is not
+ * either.
+ */
+
+static int build_root_union(struct vfsmount *topmost_mnt, struct vfsmount *clone_root)
+{
+	struct union_dir **next_ud;
+	struct path upper, lower;
+	struct vfsmount *p, *mnt;
+	int err = 0;
+
+	/*
+	 * Find the topmost read-only mount, starting from the root
+	 * of the cloned tree of read-only mounts. __lookup_mnt() and
+	 * friends don't work because the cloned tree is not mounted
+	 * anywhere.
+	 */
+	mnt = clone_root;
+	for (p = clone_root; p; p = next_mnt(p, clone_root)) {
+		if ((p->mnt_parent == mnt) &&
+		    (p->mnt_mountpoint == mnt->mnt_root))
+			mnt = p;
+	}
+
+	/* Build the root union stack */
+	upper.mnt = topmost_mnt;
+	upper.dentry = topmost_mnt->mnt_root;
+	next_ud = &upper.dentry->d_union_dir;
+
+	while (upper.mnt != clone_root) {
+		lower.mnt = mntget(mnt);
+		lower.dentry = dget(mnt->mnt_root);
+		err = union_add_dir(&upper, &lower, next_ud);
+		if (err)
+			goto out;
+		upper = lower;
+		next_ud = &lower.dentry->d_union_dir;
+		mnt = mnt->mnt_parent;
+	}
+out:
+	return err;
+}
+
+/**
+ * prepare_mnt_union - do setup necessary for a union mount
+ *
+ * @topmost_mnt: vfsmount of topmost layer
+ * @mntpnt: path of requested mountpoint
+ *
+ * We union every underlying file system that is mounted on the same
+ * mountpoint (well, pathname), read-only, and not shared.  We clone
+ * the entire underlying read-only mount tree and keep a pointer to it
+ * from the topmost file system's superblock.
+ *
+ * XXX - Maybe should take # of layers to go down as an argument. But
+ * how to pass this in through mount options? All solutions look ugly.
+ */
+
+static int prepare_mnt_union(struct vfsmount *topmost_mnt, struct path *mntpnt)
+{
+	struct super_block *sb = topmost_mnt->mnt_sb;
+	struct vfsmount *p, *clone_root;
+	int err;
+
+	clone_root = find_union_root(topmost_mnt, mntpnt);
+	if (!clone_root)
+		return 0; /* Nothing to union */
+
+	/* Clone the whole mount tree that we're going to union. */
+	err = -ENOMEM;
+	sb->s_ro_union_mnts = copy_tree(clone_root, clone_root->mnt_root,
+					CL_COPY_ALL | CL_PRIVATE);
+	if (!sb->s_ro_union_mnts)
+		goto out;
+
+	for (p = sb->s_ro_union_mnts; p; p = next_mnt(p, sb->s_ro_union_mnts))
+		inc_hard_readonly_users(p);
+
+	err = build_root_union(topmost_mnt, clone_root);
+	if (err)
+		goto out;
+
+	return 0;
+out:
+	cleanup_mnt_union(topmost_mnt);
+	return err;
+}
+
 /*
  *  @source_mnt : mount tree to be attached
  *  @nd         : place the mount tree @source_mnt is attached
@@ -1413,9 +1616,16 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
 		if (err)
 			goto out;
 	}
+
+	if (!parent_path && IS_MNT_UNION(source_mnt)) {
+		err = prepare_mnt_union(source_mnt, path);
+		if (err)
+			goto out_cleanup_ids;
+	}
+
 	err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
 	if (err)
-		goto out_cleanup_ids;
+		goto out_cleanup_union;
 
 	spin_lock(&vfsmount_lock);
 
@@ -1439,6 +1649,9 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
 	spin_unlock(&vfsmount_lock);
 	return 0;
 
+ out_cleanup_union:
+	if (IS_MNT_UNION(source_mnt))
+		cleanup_mnt_union(source_mnt);
  out_cleanup_ids:
 	if (IS_MNT_SHARED(dest_mnt))
 		cleanup_group_ids(source_mnt, NULL);
@@ -1492,6 +1705,17 @@ static int do_change_type(struct path *path, int flag)
 		return -EINVAL;
 
 	down_write(&namespace_sem);
+
+	/*
+	 * Mounts of file systems with read-only users can't deal with
+	 * mount/umount propagation events - it's the moral equivalent
+	 * of rm -rf dir/ or the like.
+	 */
+	if (sb_is_hard_readonly(mnt->mnt_sb)) {
+		err = -EROFS;
+		goto out_unlock;
+	}
+
 	if (type == MS_SHARED) {
 		err = invent_group_ids(mnt, recurse);
 		if (err)
@@ -1529,6 +1753,9 @@ static int do_loopback(struct path *path, char *old_name,
 	err = -EINVAL;
 	if (IS_MNT_UNBINDABLE(old_path.mnt))
 		goto out;
+	/* Mount part of a union mount elsewhere? The mind boggles. */
+	if (IS_MNT_UNION(old_path.mnt))
+		goto out;
 
 	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
 		goto out;
@@ -1550,7 +1777,6 @@ static int do_loopback(struct path *path, char *old_name,
 		spin_unlock(&vfsmount_lock);
 		release_mounts(&umount_list);
 	}
-
 out:
 	up_write(&namespace_sem);
 	path_put(&old_path);
@@ -1591,6 +1817,17 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	if (!check_mnt(path->mnt))
 		return -EINVAL;
 
+	if (mnt_flags & MNT_UNION)
+		return -EINVAL;
+
+	if ((path->mnt->mnt_flags & MNT_UNION) &&
+	    !(mnt_flags & MNT_UNION))
+		return -EINVAL;
+
+	if ((path->mnt->mnt_flags & MNT_UNION) &&
+	    (mnt_flags & MNT_READONLY))
+		return -EINVAL;
+
 	if (path->dentry != path->mnt->mnt_root)
 		return -EINVAL;
 
@@ -1755,6 +1992,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 	if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
 		goto unlock;
 
+	err = check_mnt_union(path, newmnt, mnt_flags);
+	if (err)
+		goto unlock;
+
 	newmnt->mnt_flags = mnt_flags;
 	if ((err = graft_tree(newmnt, path)))
 		goto unlock;
diff --git a/fs/super.c b/fs/super.c
index 6add39b..2ade113 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -197,6 +197,7 @@ void deactivate_super(struct super_block *s)
 		down_write(&s->s_umount);
 		fs->kill_sb(s);
 		put_filesystem(fs);
+		put_union_sb(s);
 		put_super(s);
 	}
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 32e6988..8f79a90 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1396,6 +1396,12 @@ struct super_block {
 	 */
 	int s_hard_readonly_users;
 
+	/*
+	 * If this is the topmost file system in a union mount, this
+	 * points to the root of the private cloned vfsmount tree of
+	 * the read-only mounts in this union.
+	 */
+	struct vfsmount *s_ro_union_mnts;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 0302703..17d3d27 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -136,4 +136,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts);
 
 extern dev_t name_to_dev_t(char *name);
 
+extern void put_union_sb(struct super_block *sb);
+
 #endif /* _LINUX_MOUNT_H */
-- 
1.6.3.3


  parent reply	other threads:[~2010-06-25 19:07 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-25 19:04 [PATCH 00/38] Union mounts - multiple layers and submounts Valerie Aurora
2010-06-25 19:04 ` [PATCH 01/38] VFS: Comment follow_mount() and friends Valerie Aurora
2010-06-25 19:04 ` [PATCH 02/38] VFS: Make lookup_hash() return a struct path Valerie Aurora
2010-06-25 19:04 ` [PATCH 03/38] VFS: Add read-only users count to superblock Valerie Aurora
2010-06-25 19:04 ` [PATCH 04/38] autofs4: Save autofs trigger's vfsmount in super block info Valerie Aurora
2010-06-25 19:04   ` Valerie Aurora
2010-06-25 19:04 ` [PATCH 05/38] whiteout/NFSD: Don't return information about whiteouts to userspace Valerie Aurora
2010-06-25 19:04 ` [PATCH 06/38] whiteout: Add vfs_whiteout() and whiteout inode operation Valerie Aurora
2010-06-25 19:04 ` [PATCH 07/38] whiteout: Set S_OPAQUE inode flag when creating directories Valerie Aurora
2010-06-25 19:04 ` [PATCH 08/38] whiteout: Allow removal of a directory with whiteouts Valerie Aurora
2010-06-25 19:04 ` [PATCH 09/38] whiteout: tmpfs whiteout support Valerie Aurora
2010-06-25 19:04   ` Valerie Aurora
2010-06-25 19:05 ` [PATCH 10/38] whiteout: Split of ext2_append_link() from ext2_add_link() Valerie Aurora
2010-06-25 19:05 ` [PATCH 11/38] whiteout: ext2 whiteout support Valerie Aurora
2010-06-25 19:05 ` [PATCH 12/38] whiteout: jffs2 " Valerie Aurora
2010-06-25 19:05   ` Valerie Aurora
2010-06-25 19:05   ` Valerie Aurora
2010-06-25 19:05 ` [PATCH 13/38] fallthru: Basic fallthru definitions Valerie Aurora
2010-06-25 19:05 ` [PATCH 14/38] fallthru: ext2 fallthru support Valerie Aurora
2010-06-25 19:05 ` [PATCH 15/38] fallthru: jffs2 " Valerie Aurora
2010-06-25 19:05   ` Valerie Aurora
2010-06-25 19:05   ` Valerie Aurora
2010-06-25 19:05 ` [PATCH 16/38] fallthru: tmpfs " Valerie Aurora
2010-06-25 19:05 ` [PATCH 17/38] union-mount: Union mounts documentation Valerie Aurora
2010-06-25 19:05 ` [PATCH 18/38] union-mount: Introduce MNT_UNION and MS_UNION flags Valerie Aurora
2010-06-25 19:05 ` [PATCH 19/38] union-mount: Introduce union_dir structure and basic operations Valerie Aurora
2010-06-25 19:05 ` [PATCH 20/38] union-mount: Free union dirs on removal from dcache Valerie Aurora
2010-06-25 19:05 ` Valerie Aurora [this message]
2010-06-25 19:05 ` [PATCH 22/38] union-mount: Implement union lookup Valerie Aurora
2010-06-25 19:05 ` [PATCH 23/38] union-mount: Call do_whiteout() on unlink and rmdir in unions Valerie Aurora
2010-06-25 19:05 ` [PATCH 24/38] union-mount: Copy up directory entries on first readdir() Valerie Aurora
2010-06-25 19:05 ` [PATCH 25/38] VFS: Split inode_permission() and create path_permission() Valerie Aurora
2010-06-25 19:05 ` [PATCH 26/38] VFS: Create user_path_nd() to lookup both parent and target Valerie Aurora
2010-06-25 19:05 ` [PATCH 27/38] union-mount: In-kernel file copyup routines Valerie Aurora
2010-06-25 19:05 ` [PATCH 28/38] union-mount: Implement union-aware access()/faccessat() Valerie Aurora
2010-06-25 19:05 ` [PATCH 29/38] union-mount: Implement union-aware link() Valerie Aurora
2010-06-25 19:05 ` [PATCH 30/38] union-mount: Implement union-aware rename() Valerie Aurora
2010-06-25 19:05 ` [PATCH 31/38] union-mount: Implement union-aware writable open() Valerie Aurora
2010-06-25 19:05 ` [PATCH 32/38] union-mount: Implement union-aware chown() Valerie Aurora
2010-06-25 19:05 ` [PATCH 33/38] union-mount: Implement union-aware truncate() Valerie Aurora
2010-06-25 19:05 ` [PATCH 34/38] union-mount: Implement union-aware chmod()/fchmodat() Valerie Aurora
2010-06-25 19:05 ` [PATCH 35/38] union-mount: Implement union-aware lchown() Valerie Aurora
2010-06-25 19:05 ` [PATCH 36/38] union-mount: Implement union-aware utimensat() Valerie Aurora
2010-06-25 19:05 ` [PATCH 37/38] union-mount: Implement union-aware setxattr() Valerie Aurora
2010-06-25 19:05 ` [PATCH 38/38] union-mount: Implement union-aware lsetxattr() Valerie Aurora
  -- strict thread matches above, loose matches on Subject: below --
2010-06-15 18:39 [PATCH 00/38] Union mounts - union stack as linked list Valerie Aurora
2010-06-15 18:39 ` [PATCH 21/38] union-mount: Support for mounting union mount file systems Valerie Aurora
2010-07-13  4:47   ` Ian Kent
2010-07-16 21:02     ` Valerie Aurora
2010-07-20  3:12       ` Ian Kent
2010-08-04 21:59         ` Valerie Aurora
2010-08-05 10:34           ` Miklos Szeredi
2010-08-06 16:33             ` Valerie Aurora
2010-07-16 21:05     ` Valerie Aurora
2010-08-04 14:55   ` Miklos Szeredi
2010-08-04 19:50     ` Valerie Aurora
2010-08-05  4:26       ` Valerie Aurora

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1277492728-11446-22-git-send-email-vaurora@redhat.com \
    --to=vaurora@redhat.com \
    --cc=hch@infradead.org \
    --cc=jblunck@suse.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.