All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
	Miklos Szeredi <miklos@szeredi.hu>
Cc: Matthew Wilcox <willy@infradead.org>,
	dhowells@redhat.com, Matthew Wilcox <willy@infradead.org>,
	Ian Kent <raven@themaw.net>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 1/3] vfs: Use an xarray in the mount namespace to handle /proc/mounts list
Date: Mon, 15 Mar 2021 12:07:47 +0000	[thread overview]
Message-ID: <161581006790.2850696.15507933486273306779.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <161581005972.2850696.12854461380574304411.stgit@warthog.procyon.org.uk>

Add an xarray to the mount namespace and use this to perform a mnt_id to
mount object mapping for the namespace.  Make use of xa_reserve() to
perform preallocation before taking the mount_lock.

This will allow the set of mount objects in a namespace to be iterated
using xarray iteration and without the need to insert and remove fake
mounts as bookmarks - which cause issues for other trawlers of the list.

As a bonus, if we want to allow it, lseek() can be used to start at a
particular mount - though there's no easy way to limit the return to just a
single entry or enforce a failure if that mount doesn't exist, but a later
one does.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Alexander Viro <viro@zeniv.linux.org.uk>
cc: Miklos Szeredi <miklos@szeredi.hu>
cc: Matthew Wilcox <willy@infradead.org>
---

 fs/mount.h     |    2 +
 fs/namespace.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 74 insertions(+), 9 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 0b6e08cf8afb..455f4d293a65 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,6 +4,7 @@
 #include <linux/poll.h>
 #include <linux/ns_common.h>
 #include <linux/fs_pin.h>
+#include <linux/xarray.h>
 
 struct mnt_namespace {
 	struct ns_common	ns;
@@ -14,6 +15,7 @@ struct mnt_namespace {
 	 * - taking namespace_sem for read AND taking .ns_lock.
 	 */
 	struct list_head	list;
+	struct xarray		mounts_by_id; /* List of mounts by mnt_id */
 	spinlock_t		ns_lock;
 	struct user_namespace	*user_ns;
 	struct ucounts		*ucounts;
diff --git a/fs/namespace.c b/fs/namespace.c
index 56bb5a5fdc0d..5c9bcaeac4de 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -901,6 +901,57 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
 	mnt_add_count(old_parent, -1);
 }
 
+/*
+ * Reserve slots in the mnt_id-to-mount mapping in a namespace.  This gets the
+ * memory allocation done upfront.
+ */
+static int reserve_mnt_id_one(struct mount *mnt, struct mnt_namespace *ns)
+{
+	struct mount *m;
+	int ret;
+
+	ret = xa_reserve(&ns->mounts_by_id, mnt->mnt_id, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	list_for_each_entry(m, &mnt->mnt_list, mnt_list) {
+		ret = xa_reserve(&ns->mounts_by_id, m->mnt_id, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int reserve_mnt_id_list(struct hlist_head *tree_list)
+{
+	struct mount *child;
+	int ret;
+
+	hlist_for_each_entry(child, tree_list, mnt_hash) {
+		ret = reserve_mnt_id_one(child, child->mnt_parent->mnt_ns);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static void add_mnt_to_ns(struct mount *m, struct mnt_namespace *ns)
+{
+	void *x;
+
+	m->mnt_ns = ns;
+	x = xa_store(&ns->mounts_by_id, m->mnt_id, m, GFP_ATOMIC);
+	WARN(xa_err(x), "Couldn't store mnt_id %x\n", m->mnt_id);
+}
+
+static void remove_mnt_from_ns(struct mount *mnt)
+{
+	if (mnt->mnt_ns && mnt->mnt_ns != MNT_NS_INTERNAL)
+		xa_erase(&mnt->mnt_ns->mounts_by_id, mnt->mnt_id);
+	mnt->mnt_ns = NULL;
+}
+
 /*
  * vfsmount lock must be held for write
  */
@@ -914,8 +965,9 @@ static void commit_tree(struct mount *mnt)
 	BUG_ON(parent == mnt);
 
 	list_add_tail(&head, &mnt->mnt_list);
-	list_for_each_entry(m, &head, mnt_list)
-		m->mnt_ns = n;
+	list_for_each_entry(m, &head, mnt_list) {
+		add_mnt_to_ns(m, n);
+	}
 
 	list_splice(&head, n->list.prev);
 
@@ -1529,7 +1581,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 			ns->mounts--;
 			__touch_mnt_namespace(ns);
 		}
-		p->mnt_ns = NULL;
+		remove_mnt_from_ns(p);
 		if (how & UMOUNT_SYNC)
 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
 
@@ -2144,6 +2196,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 		err = count_mounts(ns, source_mnt);
 		if (err)
 			goto out;
+
+		/* Reserve id-to-mount mapping slots in the namespace we're
+		 * going to use.
+		 */
+		err = reserve_mnt_id_one(source_mnt, dest_mnt->mnt_ns);
+		if (err)
+			goto out;
 	}
 
 	if (IS_MNT_SHARED(dest_mnt)) {
@@ -2151,6 +2210,8 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 		if (err)
 			goto out;
 		err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
+		if (!err && !moving)
+			err = reserve_mnt_id_list(&tree_list);
 		lock_mount_hash();
 		if (err)
 			goto out_cleanup_ids;
@@ -3260,6 +3321,7 @@ static void dec_mnt_namespaces(struct ucounts *ucounts)
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
+	WARN_ON(!xa_empty(&ns->mounts_by_id));
 	if (!is_anon_ns(ns))
 		ns_free_inum(&ns->ns);
 	dec_mnt_namespaces(ns->ucounts);
@@ -3306,6 +3368,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
 	spin_lock_init(&new_ns->ns_lock);
+	xa_init(&new_ns->mounts_by_id);
 	new_ns->user_ns = get_user_ns(user_ns);
 	new_ns->ucounts = ucounts;
 	return new_ns;
@@ -3362,7 +3425,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	p = old;
 	q = new;
 	while (p) {
-		q->mnt_ns = new_ns;
+		add_mnt_to_ns(q, new_ns);
 		new_ns->mounts++;
 		if (new_fs) {
 			if (&p->mnt == new_fs->root.mnt) {
@@ -3404,7 +3467,7 @@ struct dentry *mount_subtree(struct vfsmount *m, const char *name)
 		mntput(m);
 		return ERR_CAST(ns);
 	}
-	mnt->mnt_ns = ns;
+	add_mnt_to_ns(mnt, ns);
 	ns->root = mnt;
 	ns->mounts++;
 	list_add(&mnt->mnt_list, &ns->list);
@@ -3583,7 +3646,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
 		goto err_path;
 	}
 	mnt = real_mount(newmount.mnt);
-	mnt->mnt_ns = ns;
+	add_mnt_to_ns(mnt, ns);
 	ns->root = mnt;
 	ns->mounts = 1;
 	list_add(&mnt->mnt_list, &ns->list);
@@ -4193,7 +4256,7 @@ static void __init init_mount_tree(void)
 	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
 	m = real_mount(mnt);
-	m->mnt_ns = ns;
+	add_mnt_to_ns(m, ns);
 	ns->root = m;
 	ns->mounts = 1;
 	list_add(&m->mnt_list, &ns->list);
@@ -4270,7 +4333,7 @@ void kern_unmount(struct vfsmount *mnt)
 {
 	/* release long term mount so mount point can be released */
 	if (!IS_ERR_OR_NULL(mnt)) {
-		real_mount(mnt)->mnt_ns = NULL;
+		remove_mnt_from_ns(real_mount(mnt));
 		synchronize_rcu();	/* yecchhh... */
 		mntput(mnt);
 	}
@@ -4283,7 +4346,7 @@ void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
 
 	for (i = 0; i < num; i++)
 		if (mnt[i])
-			real_mount(mnt[i])->mnt_ns = NULL;
+			remove_mnt_from_ns(real_mount(mnt[i]));
 	synchronize_rcu_expedited();
 	for (i = 0; i < num; i++)
 		mntput(mnt[i]);



  reply	other threads:[~2021-03-15 12:08 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-15 12:07 [RFC][PATCH 0/3] vfs: Use an xarray instead of inserted bookmarks to scan mount list David Howells
2021-03-15 12:07 ` David Howells [this message]
2021-03-15 12:07 ` [PATCH 2/3] vfs: Use the mounts_to_id array to do /proc/mounts and co David Howells
2021-03-15 12:54   ` Matthew Wilcox
2021-03-15 12:08 ` [PATCH 3/3] vfs: Remove mount list trawling cursor stuff David Howells
2021-03-15 12:46 ` [RFC][PATCH 0/3] vfs: Use an xarray instead of inserted bookmarks to scan mount list Matthew Wilcox
2021-03-15 13:14 ` Miklos Szeredi
2021-03-15 13:17   ` Matthew Wilcox
2021-03-15 13:41 ` David Howells
2021-03-15 14:22   ` Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=161581006790.2850696.15507933486273306779.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=raven@themaw.net \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.