All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: <linux-fsdevel@vger.kernel.org>
Cc: <linux-block@vger.kernel.org>,
	Christoph Hellwig <hch@infradead.org>,
	Christian Brauner <brauner@kernel.org>,
	Jens Axboe <axboe@kernel.dk>, Kees Cook <keescook@google.com>,
	Ted Tso <tytso@mit.edu>, syzkaller <syzkaller@googlegroups.com>,
	Alexander Popov <alex.popov@linux.com>,
	Eric Biggers <ebiggers@google.com>, <linux-xfs@vger.kernel.org>,
	linux-btrfs@vger.kernel.org, Dmitry Vyukov <dvyukov@google.com>,
	Jan Kara <jack@suse.cz>
Subject: [PATCH 6/6] fs: Make bind mounts work with bdev_allow_write_mounted=n
Date: Tue,  4 Jul 2023 14:56:54 +0200	[thread overview]
Message-ID: <20230704125702.23180-6-jack@suse.cz> (raw)
In-Reply-To: <20230704122727.17096-1-jack@suse.cz>

When we don't allow opening of mounted block devices for writing, bind
mounting is broken because the bind mount tries to open the block device
before finding the superblock for it already exists. Reorganize the
mounting code to first look whether the superblock for a particular
device is already mounted and open the block device only if it is not.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/super.c | 188 +++++++++++++++++++++++++----------------------------
 1 file changed, 89 insertions(+), 99 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index ea135fece772..fdf1e286926e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1228,13 +1228,7 @@ static const struct blk_holder_ops fs_holder_ops = {
 
 static int set_bdev_super(struct super_block *s, void *data)
 {
-	s->s_bdev_handle = data;
-	s->s_bdev = s->s_bdev_handle->bdev;
-	s->s_dev = s->s_bdev->bd_dev;
-	s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
-
-	if (bdev_stable_writes(s->s_bdev))
-		s->s_iflags |= SB_I_STABLE_WRITES;
+	s->s_dev = *(dev_t *)data;
 	return 0;
 }
 
@@ -1246,7 +1240,53 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 {
 	return !(s->s_iflags & SB_I_RETIRED) &&
-		s->s_bdev == ((struct bdev_handle *)fc->sget_key)->bdev;
+	       s->s_dev == *(dev_t *)fc->sget_key;
+}
+
+static int setup_bdev_super(struct super_block *s, int sb_flags,
+			    struct fs_context *fc)
+{
+	struct bdev_handle *bdev_handle;
+
+	bdev_handle = blkdev_get_by_dev(s->s_dev, sb_open_mode(sb_flags),
+					s->s_type, &fs_holder_ops);
+	if (IS_ERR(bdev_handle)) {
+		if (fc)
+			errorf(fc, "%s: Can't open blockdev", fc->source);
+		return PTR_ERR(bdev_handle);
+	}
+	spin_lock(&sb_lock);
+	s->s_bdev_handle = bdev_handle;
+	s->s_bdev = bdev_handle->bdev;
+	s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
+
+	if (bdev_stable_writes(s->s_bdev))
+		s->s_iflags |= SB_I_STABLE_WRITES;
+	spin_unlock(&sb_lock);
+
+	/*
+	 * Until SB_BORN flag is set, there can be no active superblock
+ 	 * references and thus no filesystem freezing. get_active_super()
+	 * will just loop waiting for SB_BORN so even freeze_bdev() cannot
+	 * proceed. It is enough to check bdev was not frozen before we set
+	 * s_bdev.
+	 */
+	mutex_lock(&s->s_bdev->bd_fsfreeze_mutex);
+	if (s->s_bdev->bd_fsfreeze_count > 0) {
+		mutex_unlock(&s->s_bdev->bd_fsfreeze_mutex);
+		if (fc)
+			warnf(fc, "%pg: Can't mount, blockdev is frozen",
+			      s->s_bdev);
+		return -EBUSY;
+	}
+	mutex_unlock(&s->s_bdev->bd_fsfreeze_mutex);
+
+	snprintf(s->s_id, sizeof(s->s_id), "%pg", s->s_bdev);
+	shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
+				fc->fs_type->name, s->s_id);
+	sb_set_blocksize(s, block_size(s->s_bdev));
+
+	return 0;
 }
 
 /**
@@ -1258,75 +1298,51 @@ int get_tree_bdev(struct fs_context *fc,
 		int (*fill_super)(struct super_block *,
 				  struct fs_context *))
 {
-	struct bdev_handle *bdev_handle;
-	struct block_device *bdev;
+	dev_t dev;
 	struct super_block *s;
 	int error = 0;
 
 	if (!fc->source)
 		return invalf(fc, "No source specified");
 
-	bdev_handle = blkdev_get_by_path(fc->source,
-					 sb_open_mode(fc->sb_flags),
-					 fc->fs_type, &fs_holder_ops);
-	if (IS_ERR(bdev_handle)) {
-		errorf(fc, "%s: Can't open blockdev", fc->source);
-		return PTR_ERR(bdev_handle);
-	}
-	bdev = bdev_handle->bdev;
-
-	/* Once the superblock is inserted into the list by sget_fc(), s_umount
-	 * will protect the lockfs code from trying to start a snapshot while
-	 * we are mounting
-	 */
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-		blkdev_put(bdev_handle);
-		return -EBUSY;
+	error = lookup_bdev(fc->source, &dev);
+	if (error) {
+		errorf(fc, "%s: Can't lookup blockdev", fc->source);
+		return error;
 	}
 
 	fc->sb_flags |= SB_NOSEC;
-	fc->sget_key = bdev_handle;
+	fc->sget_key = &dev;
 	s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
-	if (IS_ERR(s)) {
-		blkdev_put(bdev_handle);
+	if (IS_ERR(s))
 		return PTR_ERR(s);
-	}
 
 	if (s->s_root) {
 		/* Don't summarily change the RO/RW state. */
 		if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
-			warnf(fc, "%pg: Can't mount, would change RO state", bdev);
+			warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
 			deactivate_locked_super(s);
-			blkdev_put(bdev_handle);
 			return -EBUSY;
 		}
-
+	} else {
 		/*
-		 * s_umount nests inside open_mutex during
-		 * __invalidate_device().  blkdev_put() acquires open_mutex and
-		 * can't be called under s_umount.  Drop s_umount temporarily.
-		 * This is safe as we're holding an active reference.
+		 * We drop s_umount here because we need to lookup bdev and
+		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+		 * invalidate_bdev()). It is safe because we have active sb
+		 * reference and SB_BORN is not set yet.
 		 */
 		up_write(&s->s_umount);
-		blkdev_put(bdev_handle);
+		error = setup_bdev_super(s, fc->sb_flags, fc);
 		down_write(&s->s_umount);
-	} else {
-		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-		shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-					fc->fs_type->name, s->s_id);
-		sb_set_blocksize(s, block_size(bdev));
-		error = fill_super(s, fc);
+		if (!error)
+			error = fill_super(s, fc);
 		if (error) {
 			deactivate_locked_super(s);
 			return error;
 		}
 
 		s->s_flags |= SB_ACTIVE;
-		bdev->bd_super = s;
+		s->s_bdev->bd_super = s;
 	}
 
 	BUG_ON(fc->root);
@@ -1337,81 +1353,53 @@ EXPORT_SYMBOL(get_tree_bdev);
 
 static int test_bdev_super(struct super_block *s, void *data)
 {
-	return !(s->s_iflags & SB_I_RETIRED) &&
-		s->s_bdev == ((struct bdev_handle *)data)->bdev;
+	return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
 }
 
 struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
 {
-	struct bdev_handle *bdev_handle;
-	struct block_device *bdev;
 	struct super_block *s;
 	int error = 0;
+	dev_t dev;
 
-	bdev_handle = blkdev_get_by_path(dev_name, sb_open_mode(flags),
-					 fs_type, &fs_holder_ops);
-	if (IS_ERR(bdev_handle))
-		return ERR_CAST(bdev_handle);
-	bdev = bdev_handle->bdev;
+	error = lookup_bdev(dev_name, &dev);
+	if (error)
+		return ERR_PTR(error);
 
-	/*
-	 * once the super is inserted into the list by sget, s_umount
-	 * will protect the lockfs code from trying to start a snapshot
-	 * while we are mounting
-	 */
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		error = -EBUSY;
-		goto error_bdev;
-	}
-	s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
-		 bdev_handle);
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
+	flags |= SB_NOSEC;
+	s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
 	if (IS_ERR(s))
-		goto error_s;
+		return ERR_CAST(s);
 
 	if (s->s_root) {
 		if ((flags ^ s->s_flags) & SB_RDONLY) {
 			deactivate_locked_super(s);
-			error = -EBUSY;
-			goto error_bdev;
+			return ERR_PTR(-EBUSY);
 		}
-
+	} else {
 		/*
-		 * s_umount nests inside open_mutex during
-		 * __invalidate_device().  blkdev_put() acquires open_mutex and
-		 * can't be called under s_umount.  Drop s_umount temporarily.
-		 * This is safe as we're holding an active reference.
+		 * We drop s_umount here because we need to lookup bdev and
+		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+		 * invalidate_bdev()). It is safe because we have active sb
+		 * reference and SB_BORN is not set yet.
 		 */
 		up_write(&s->s_umount);
-		blkdev_put(bdev_handle);
+		error = setup_bdev_super(s, flags, NULL);
 		down_write(&s->s_umount);
-	} else {
-		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-		shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-					fs_type->name, s->s_id);
-		sb_set_blocksize(s, block_size(bdev));
-		error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+		if (!error)
+			error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(s);
-			goto error;
+			return ERR_PTR(error);
 		}
 
 		s->s_flags |= SB_ACTIVE;
-		bdev->bd_super = s;
+		s->s_bdev->bd_super = s;
 	}
 
 	return dget(s->s_root);
-
-error_s:
-	error = PTR_ERR(s);
-error_bdev:
-	blkdev_put(bdev_handle);
-error:
-	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(mount_bdev);
 
@@ -1419,10 +1407,12 @@ void kill_block_super(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
 
-	bdev->bd_super = NULL;
 	generic_shutdown_super(sb);
-	sync_blockdev(bdev);
-	blkdev_put(sb->s_bdev_handle);
+	if (bdev) {
+		bdev->bd_super = NULL;
+		sync_blockdev(bdev);
+		blkdev_put(sb->s_bdev_handle);
+	}
 }
 
 EXPORT_SYMBOL(kill_block_super);
-- 
2.35.3


  parent reply	other threads:[~2023-07-04 12:57 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-04 12:56 [PATCH RFC 0/6 v2] block: Add config option to not allow writing to mounted devices Jan Kara
2023-07-04 12:56 ` [PATCH 1/6] " Jan Kara
2023-07-04 15:56   ` Colin Walters
2023-07-04 16:52     ` Eric Biggers
2023-08-14 16:41       ` Jan Kara
2023-08-14 16:43     ` Jan Kara
2023-07-04 18:44   ` Eric Biggers
2023-07-04 20:55     ` Theodore Ts'o
2023-07-05 10:30     ` Jan Kara
2023-07-05 15:12   ` Darrick J. Wong
2023-08-22  5:35   ` Eric Biggers
2023-08-22 10:11     ` Jan Kara
2023-10-19  9:16       ` Aleksandr Nogikh
2023-10-24 11:10         ` Jan Kara
2023-10-27 12:06           ` Aleksandr Nogikh
2023-11-08 10:10           ` Jan Kara
2023-11-08 18:24             ` Aleksandr Nogikh
2023-07-04 12:56 ` [PATCH 2/6] fs: Block writes to mounted block devices Jan Kara
2023-07-04 12:56 ` [PATCH 3/6] xfs: Block writes to log device Jan Kara
2023-07-04 15:53   ` Darrick J. Wong
2023-07-05 10:31     ` Jan Kara
2023-07-04 12:56 ` [PATCH 4/6] ext4: Block writes to journal device Jan Kara
2023-07-04 12:56 ` [PATCH 5/6] btrfs: Block writes to seed devices Jan Kara
2023-07-12 14:33   ` David Sterba
2023-07-04 12:56 ` Jan Kara [this message]
2023-07-04 13:59   ` [PATCH 6/6] fs: Make bind mounts work with bdev_allow_write_mounted=n Christian Brauner
2023-07-05 13:00     ` Jan Kara
2023-07-05 13:46       ` Christian Brauner
2023-07-05 16:14         ` Jan Kara
2023-07-06 15:55   ` Christoph Hellwig
2023-07-06 16:12     ` Jan Kara
2023-07-07  7:39       ` Christian Brauner
2023-07-07 10:48         ` Jan Kara
2023-07-07 11:31         ` Christoph Hellwig
2023-07-07 12:28           ` Jan Kara
2023-07-07 11:30       ` Christoph Hellwig
2023-07-04 13:40 ` [PATCH RFC 0/6 v2] block: Add config option to not allow writing to mounted devices Christian Brauner
2023-07-05 12:27 ` Mike Fleetwood
2023-08-14 16:39   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230704125702.23180-6-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=alex.popov@linux.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=dvyukov@google.com \
    --cc=ebiggers@google.com \
    --cc=hch@infradead.org \
    --cc=keescook@google.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=syzkaller@googlegroups.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.