linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Luis Chamberlain <mcgrof@kernel.org>
To: hch@infradead.org, djwong@kernel.org, dchinner@redhat.com,
	kbusch@kernel.org, willy@infradead.org
Cc: hare@suse.de, ritesh.list@gmail.com, rgoldwyn@suse.com,
	jack@suse.cz, patches@lists.linux.dev, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	p.raghav@samsung.com, da.gomez@samsung.com,
	rohan.puri@samsung.com, rpuri.linux@gmail.com, mcgrof@kernel.org,
	corbet@lwn.net, jake@lwn.net
Subject: [RFC 4/4] bdev: extend bdev inode with it's own super_block
Date: Wed,  7 Jun 2023 20:24:04 -0700	[thread overview]
Message-ID: <20230608032404.1887046-5-mcgrof@kernel.org> (raw)
In-Reply-To: <20230608032404.1887046-1-mcgrof@kernel.org>

We currently share a single super_block for the block device cache,
each block device corresponds to one inode on that super_block. This
implicates sharing one aops operation though, and in the near future
we want to be able to instead support using iomap on the super_block
for different block devices.

To allow more flexibility use a super_block per block device, so
that we can eventually allow co-existence with pure-iomap requirements
and block devices which require buffer-heads.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 block/bdev.c | 94 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 78 insertions(+), 16 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 2b16afc2bd2a..3ab952a77a11 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -30,9 +30,14 @@
 #include "../fs/internal.h"
 #include "blk.h"
 
+static LIST_HEAD(bdev_inode_list);
+static DEFINE_MUTEX(bdev_inode_mutex);
+
 struct bdev_inode {
 	struct block_device bdev;
 	struct inode vfs_inode;
+	struct vfsmount *bd_mnt;
+	struct list_head list;
 };
 
 static inline struct bdev_inode *BDEV_I(struct inode *inode)
@@ -321,10 +326,28 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
+static void bdev_remove_inode(struct bdev_inode *binode)
+{
+	struct bdev_inode *bdev_inode, *tmp;
+
+	kern_unmount(binode->bd_mnt);
+
+	mutex_lock(&bdev_inode_mutex);
+	list_for_each_entry_safe(bdev_inode, tmp, &bdev_inode_list, list) {
+		if (bdev_inode == binode) {
+			list_del_init(&bdev_inode->list);
+			break;
+		}
+	}
+	mutex_unlock(&bdev_inode_mutex);
+}
+
 static void bdev_free_inode(struct inode *inode)
 {
 	struct block_device *bdev = I_BDEV(inode);
 
+	bdev_remove_inode(BDEV_I(inode));
+
 	free_percpu(bdev->bd_stats);
 	kfree(bdev->bd_meta_info);
 
@@ -378,12 +401,9 @@ static struct file_system_type bd_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-struct super_block *blockdev_superblock __read_mostly;
-
 void __init bdev_cache_init(void)
 {
 	int err;
-	static struct vfsmount *bd_mnt;
 
 	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
 			0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -392,20 +412,23 @@ void __init bdev_cache_init(void)
 	err = register_filesystem(&bd_type);
 	if (err)
 		panic("Cannot register bdev pseudo-fs");
-	bd_mnt = kern_mount(&bd_type);
-	if (IS_ERR(bd_mnt))
-		panic("Cannot create bdev pseudo-fs");
-	blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
 }
 
 struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 {
+	struct vfsmount *bd_mnt;
 	struct block_device *bdev;
 	struct inode *inode;
 
-	inode = new_inode(blockdev_superblock);
-	if (!inode)
+	bd_mnt = vfs_kern_mount(&bd_type, SB_KERNMOUNT, bd_type.name, NULL);
+	if (IS_ERR(bd_mnt))
 		return NULL;
+
+	inode = new_inode(bd_mnt->mnt_sb);
+	if (!inode) {
+		kern_unmount(bd_mnt);
+		goto err_out;
+	}
 	inode->i_mode = S_IFBLK;
 	inode->i_rdev = 0;
 #ifdef CONFIG_BUFFER_HEAD
@@ -426,12 +449,14 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	else
 		bdev->bd_has_submit_bio = false;
 	bdev->bd_stats = alloc_percpu(struct disk_stats);
-	if (!bdev->bd_stats) {
-		iput(inode);
-		return NULL;
-	}
+	if (!bdev->bd_stats)
+		goto err_out;
 	bdev->bd_disk = disk;
+	BDEV_I(inode)->bd_mnt = bd_mnt; /* For writeback */
 	return bdev;
+err_out:
+	iput(inode);
+	return NULL;
 }
 
 void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
@@ -444,13 +469,16 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 
 void bdev_add(struct block_device *bdev, dev_t dev)
 {
+	struct inode *inode = bdev->bd_inode;
+
 	bdev->bd_dev = dev;
 	bdev->bd_inode->i_rdev = dev;
 	bdev->bd_inode->i_ino = dev;
 	insert_inode_hash(bdev->bd_inode);
+	list_add_tail(&BDEV_I(inode)->list, &bdev_inode_list);
 }
 
-long nr_blockdev_pages(void)
+static long nr_blockdev_pages_sb(struct super_block *blockdev_superblock)
 {
 	struct inode *inode;
 	long ret = 0;
@@ -463,6 +491,19 @@ long nr_blockdev_pages(void)
 	return ret;
 }
 
+long nr_blockdev_pages(void)
+{
+	struct bdev_inode *bdev_inode;
+	long ret = 0;
+
+	mutex_lock(&bdev_inode_mutex);
+	list_for_each_entry(bdev_inode, &bdev_inode_list, list)
+		ret += nr_blockdev_pages_sb(bdev_inode->bd_mnt->mnt_sb);
+	mutex_unlock(&bdev_inode_mutex);
+
+	return ret;
+}
+
 /**
  * bd_may_claim - test whether a block device can be claimed
  * @bdev: block device of interest
@@ -672,7 +713,18 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
 
 static struct inode *blkdev_inode_lookup(dev_t dev)
 {
-	return ilookup(blockdev_superblock, dev);
+	struct bdev_inode *bdev_inode;
+	struct inode *inode = NULL;
+
+	mutex_lock(&bdev_inode_mutex);
+	list_for_each_entry(bdev_inode, &bdev_inode_list, list) {
+		inode = ilookup(bdev_inode->bd_mnt->mnt_sb, dev);
+		if (inode)
+			break;
+	}
+	mutex_unlock(&bdev_inode_mutex);
+
+	return inode;
 }
 
 struct block_device *blkdev_get_no_open(dev_t dev)
@@ -961,7 +1013,7 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
 }
 EXPORT_SYMBOL(__invalidate_device);
 
-void sync_bdevs(bool wait)
+static void sync_bdev_sb(struct super_block *blockdev_superblock, bool wait)
 {
 	struct inode *inode, *old_inode = NULL;
 
@@ -1013,6 +1065,16 @@ void sync_bdevs(bool wait)
 	iput(old_inode);
 }
 
+void sync_bdevs(bool wait)
+{
+	struct bdev_inode *bdev_inode;
+
+	mutex_lock(&bdev_inode_mutex);
+	list_for_each_entry(bdev_inode, &bdev_inode_list, list)
+		sync_bdev_sb(bdev_inode->bd_mnt->mnt_sb, wait);
+	mutex_unlock(&bdev_inode_mutex);
+}
+
 /*
  * Handle STATX_DIOALIGN for block devices.
  *
-- 
2.39.2


  parent reply	other threads:[~2023-06-08  3:24 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-08  3:24 [RFC 0/4] bdev: allow buffer-head & iomap aops to co-exist Luis Chamberlain
2023-06-08  3:24 ` [RFC 1/4] bdev: replace export of blockdev_superblock with BDEVFS_MAGIC Luis Chamberlain
2023-06-08 10:22   ` Jan Kara
2023-06-08 13:53   ` Christoph Hellwig
2023-06-08  3:24 ` [RFC 2/4] bdev: abstract inode lookup on blkdev_get_no_open() Luis Chamberlain
2023-06-08  3:24 ` [RFC 3/4] bdev: rename iomap aops Luis Chamberlain
2023-06-08  3:24 ` Luis Chamberlain [this message]
2023-06-08 13:37   ` [RFC 4/4] bdev: extend bdev inode with it's own super_block Matthew Wilcox
2023-06-08 13:50     ` Christoph Hellwig
2023-06-08 17:45       ` Luis Chamberlain
2023-06-09  4:20         ` Christoph Hellwig
2023-06-09  9:17           ` Luis Chamberlain
2023-06-08 13:50   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230608032404.1887046-5-mcgrof@kernel.org \
    --to=mcgrof@kernel.org \
    --cc=corbet@lwn.net \
    --cc=da.gomez@samsung.com \
    --cc=dchinner@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hare@suse.de \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=jake@lwn.net \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=patches@lists.linux.dev \
    --cc=rgoldwyn@suse.com \
    --cc=ritesh.list@gmail.com \
    --cc=rohan.puri@samsung.com \
    --cc=rpuri.linux@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).