From: Luis Chamberlain <mcgrof@kernel.org>
To: hch@infradead.org, djwong@kernel.org, dchinner@redhat.com,
kbusch@kernel.org, willy@infradead.org
Cc: hare@suse.de, ritesh.list@gmail.com, rgoldwyn@suse.com,
jack@suse.cz, patches@lists.linux.dev, linux-xfs@vger.kernel.org,
linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
p.raghav@samsung.com, da.gomez@samsung.com,
rohan.puri@samsung.com, rpuri.linux@gmail.com, mcgrof@kernel.org,
corbet@lwn.net, jake@lwn.net
Subject: [RFC 4/4] bdev: extend bdev inode with it's own super_block
Date: Wed, 7 Jun 2023 20:24:04 -0700 [thread overview]
Message-ID: <20230608032404.1887046-5-mcgrof@kernel.org> (raw)
In-Reply-To: <20230608032404.1887046-1-mcgrof@kernel.org>
We currently share a single super_block for the block device cache,
each block device corresponds to one inode on that super_block. This
implicates sharing one aops operation though, and in the near future
we want to be able to instead support using iomap on the super_block
for different block devices.
To allow more flexibility use a super_block per block device, so
that we can eventually allow co-existence with pure-iomap requirements
and block devices which require buffer-heads.
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
block/bdev.c | 94 +++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 78 insertions(+), 16 deletions(-)
diff --git a/block/bdev.c b/block/bdev.c
index 2b16afc2bd2a..3ab952a77a11 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -30,9 +30,14 @@
#include "../fs/internal.h"
#include "blk.h"
+static LIST_HEAD(bdev_inode_list);
+static DEFINE_MUTEX(bdev_inode_mutex);
+
struct bdev_inode {
struct block_device bdev;
struct inode vfs_inode;
+ struct vfsmount *bd_mnt;
+ struct list_head list;
};
static inline struct bdev_inode *BDEV_I(struct inode *inode)
@@ -321,10 +326,28 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
+static void bdev_remove_inode(struct bdev_inode *binode)
+{
+ struct bdev_inode *bdev_inode, *tmp;
+
+ kern_unmount(binode->bd_mnt);
+
+ mutex_lock(&bdev_inode_mutex);
+ list_for_each_entry_safe(bdev_inode, tmp, &bdev_inode_list, list) {
+ if (bdev_inode == binode) {
+ list_del_init(&bdev_inode->list);
+ break;
+ }
+ }
+ mutex_unlock(&bdev_inode_mutex);
+}
+
static void bdev_free_inode(struct inode *inode)
{
struct block_device *bdev = I_BDEV(inode);
+ bdev_remove_inode(BDEV_I(inode));
+
free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info);
@@ -378,12 +401,9 @@ static struct file_system_type bd_type = {
.kill_sb = kill_anon_super,
};
-struct super_block *blockdev_superblock __read_mostly;
-
void __init bdev_cache_init(void)
{
int err;
- static struct vfsmount *bd_mnt;
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -392,20 +412,23 @@ void __init bdev_cache_init(void)
err = register_filesystem(&bd_type);
if (err)
panic("Cannot register bdev pseudo-fs");
- bd_mnt = kern_mount(&bd_type);
- if (IS_ERR(bd_mnt))
- panic("Cannot create bdev pseudo-fs");
- blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
{
+ struct vfsmount *bd_mnt;
struct block_device *bdev;
struct inode *inode;
- inode = new_inode(blockdev_superblock);
- if (!inode)
+ bd_mnt = vfs_kern_mount(&bd_type, SB_KERNMOUNT, bd_type.name, NULL);
+ if (IS_ERR(bd_mnt))
return NULL;
+
+ inode = new_inode(bd_mnt->mnt_sb);
+ if (!inode) {
+ kern_unmount(bd_mnt);
+ goto err_out;
+ }
inode->i_mode = S_IFBLK;
inode->i_rdev = 0;
#ifdef CONFIG_BUFFER_HEAD
@@ -426,12 +449,14 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
else
bdev->bd_has_submit_bio = false;
bdev->bd_stats = alloc_percpu(struct disk_stats);
- if (!bdev->bd_stats) {
- iput(inode);
- return NULL;
- }
+ if (!bdev->bd_stats)
+ goto err_out;
bdev->bd_disk = disk;
+ BDEV_I(inode)->bd_mnt = bd_mnt; /* For writeback */
return bdev;
+err_out:
+ iput(inode);
+ return NULL;
}
void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
@@ -444,13 +469,16 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
void bdev_add(struct block_device *bdev, dev_t dev)
{
+ struct inode *inode = bdev->bd_inode;
+
bdev->bd_dev = dev;
bdev->bd_inode->i_rdev = dev;
bdev->bd_inode->i_ino = dev;
insert_inode_hash(bdev->bd_inode);
+ list_add_tail(&BDEV_I(inode)->list, &bdev_inode_list);
}
-long nr_blockdev_pages(void)
+static long nr_blockdev_pages_sb(struct super_block *blockdev_superblock)
{
struct inode *inode;
long ret = 0;
@@ -463,6 +491,19 @@ long nr_blockdev_pages(void)
return ret;
}
+long nr_blockdev_pages(void)
+{
+ struct bdev_inode *bdev_inode;
+ long ret = 0;
+
+ mutex_lock(&bdev_inode_mutex);
+ list_for_each_entry(bdev_inode, &bdev_inode_list, list)
+ ret += nr_blockdev_pages_sb(bdev_inode->bd_mnt->mnt_sb);
+ mutex_unlock(&bdev_inode_mutex);
+
+ return ret;
+}
+
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
@@ -672,7 +713,18 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
static struct inode *blkdev_inode_lookup(dev_t dev)
{
- return ilookup(blockdev_superblock, dev);
+ struct bdev_inode *bdev_inode;
+ struct inode *inode = NULL;
+
+ mutex_lock(&bdev_inode_mutex);
+ list_for_each_entry(bdev_inode, &bdev_inode_list, list) {
+ inode = ilookup(bdev_inode->bd_mnt->mnt_sb, dev);
+ if (inode)
+ break;
+ }
+ mutex_unlock(&bdev_inode_mutex);
+
+ return inode;
}
struct block_device *blkdev_get_no_open(dev_t dev)
@@ -961,7 +1013,7 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
}
EXPORT_SYMBOL(__invalidate_device);
-void sync_bdevs(bool wait)
+static void sync_bdev_sb(struct super_block *blockdev_superblock, bool wait)
{
struct inode *inode, *old_inode = NULL;
@@ -1013,6 +1065,16 @@ void sync_bdevs(bool wait)
iput(old_inode);
}
+void sync_bdevs(bool wait)
+{
+ struct bdev_inode *bdev_inode;
+
+ mutex_lock(&bdev_inode_mutex);
+ list_for_each_entry(bdev_inode, &bdev_inode_list, list)
+ sync_bdev_sb(bdev_inode->bd_mnt->mnt_sb, wait);
+ mutex_unlock(&bdev_inode_mutex);
+}
+
/*
* Handle STATX_DIOALIGN for block devices.
*
--
2.39.2
next prev parent reply other threads:[~2023-06-08 3:24 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-08 3:24 [RFC 0/4] bdev: allow buffer-head & iomap aops to co-exist Luis Chamberlain
2023-06-08 3:24 ` [RFC 1/4] bdev: replace export of blockdev_superblock with BDEVFS_MAGIC Luis Chamberlain
2023-06-08 10:22 ` Jan Kara
2023-06-08 13:53 ` Christoph Hellwig
2023-06-08 3:24 ` [RFC 2/4] bdev: abstract inode lookup on blkdev_get_no_open() Luis Chamberlain
2023-06-08 3:24 ` [RFC 3/4] bdev: rename iomap aops Luis Chamberlain
2023-06-08 3:24 ` Luis Chamberlain [this message]
2023-06-08 13:37 ` [RFC 4/4] bdev: extend bdev inode with it's own super_block Matthew Wilcox
2023-06-08 13:50 ` Christoph Hellwig
2023-06-08 17:45 ` Luis Chamberlain
2023-06-09 4:20 ` Christoph Hellwig
2023-06-09 9:17 ` Luis Chamberlain
2023-06-08 13:50 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230608032404.1887046-5-mcgrof@kernel.org \
--to=mcgrof@kernel.org \
--cc=corbet@lwn.net \
--cc=da.gomez@samsung.com \
--cc=dchinner@redhat.com \
--cc=djwong@kernel.org \
--cc=hare@suse.de \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=jake@lwn.net \
--cc=kbusch@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=p.raghav@samsung.com \
--cc=patches@lists.linux.dev \
--cc=rgoldwyn@suse.com \
--cc=ritesh.list@gmail.com \
--cc=rohan.puri@samsung.com \
--cc=rpuri.linux@gmail.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).