* move bd_mutex to the gendisk (resend)
@ 2021-05-12 6:18 Christoph Hellwig
2021-05-12 6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
` (7 more replies)
0 siblings, 8 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Hi all,
this series first cleans up gendisk allocation in the md driver to remove
the ERESTARTSYS hack in blkdev_get, then further refactors blkdev_get
and then finally moves bd_mutex into the gendisk as having separate locks
for the whole device vs partitions just complicates locking in places that
add an remove partitions a lot.
Diffstat:
Documentation/filesystems/locking.rst | 2
block/genhd.c | 59 +++------
block/ioctl.c | 2
block/partitions/core.c | 45 +++----
drivers/block/loop.c | 14 +-
drivers/block/xen-blkfront.c | 8 -
drivers/block/zram/zram_drv.c | 18 +--
drivers/block/zram/zram_drv.h | 2
drivers/md/md.h | 6 -
drivers/s390/block/dasd_genhd.c | 8 -
drivers/scsi/sd.c | 4
fs/block_dev.c | 204 ++++++++++++++++------------------
fs/btrfs/volumes.c | 2
fs/super.c | 8 -
include/linux/blk_types.h | 4
include/linux/genhd.h | 6 -
init/do_mounts.c | 10 -
17 files changed, 184 insertions(+), 218 deletions(-)
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 1/8] block: split __blkdev_get
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
2021-05-19 9:21 ` Ming Lei
2021-05-12 6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
` (6 subsequent siblings)
7 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Split __blkdev_get into one helper for the whole device, and one for
opening partitions. This removes the (bounded) recursion when opening
a partition.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/block_dev.c | 115 +++++++++++++++++++++++--------------------------
1 file changed, 55 insertions(+), 60 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b8abccd03e5d..d053510d2f6a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1287,76 +1287,68 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
*/
EXPORT_SYMBOL_GPL(bdev_disk_changed);
-/*
- * bd_mutex locking:
- *
- * mutex_lock(part->bd_mutex)
- * mutex_lock_nested(whole->bd_mutex, 1)
- */
-static int __blkdev_get(struct block_device *bdev, fmode_t mode)
+static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
int ret = 0;
- if (!bdev->bd_openers) {
- if (!bdev_is_partition(bdev)) {
- ret = 0;
- if (disk->fops->open)
- ret = disk->fops->open(bdev, mode);
-
- if (!ret)
- set_init_blocksize(bdev);
-
- /*
- * If the device is invalidated, rescan partition
- * if open succeeded or failed with -ENOMEDIUM.
- * The latter is necessary to prevent ghost
- * partitions on a removed medium.
- */
- if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
- (!ret || ret == -ENOMEDIUM))
- bdev_disk_changed(bdev, ret == -ENOMEDIUM);
-
- if (ret)
- return ret;
- } else {
- struct block_device *whole = bdgrab(disk->part0);
-
- mutex_lock_nested(&whole->bd_mutex, 1);
- ret = __blkdev_get(whole, mode);
- if (ret) {
- mutex_unlock(&whole->bd_mutex);
- bdput(whole);
- return ret;
- }
- whole->bd_part_count++;
- mutex_unlock(&whole->bd_mutex);
-
- if (!(disk->flags & GENHD_FL_UP) ||
- !bdev_nr_sectors(bdev)) {
- __blkdev_put(whole, mode, 1);
- bdput(whole);
- return -ENXIO;
- }
- set_init_blocksize(bdev);
+ if (disk->fops->open) {
+ ret = disk->fops->open(bdev, mode);
+ if (ret) {
+ /* avoid ghost partitions on a removed medium */
+ if (ret == -ENOMEDIUM &&
+ test_bit(GD_NEED_PART_SCAN, &disk->state))
+ bdev_disk_changed(bdev, true);
+ return ret;
}
+ }
+ if (!bdev->bd_openers) {
+ set_init_blocksize(bdev);
if (bdev->bd_bdi == &noop_backing_dev_info)
bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
- } else {
- if (!bdev_is_partition(bdev)) {
- if (bdev->bd_disk->fops->open)
- ret = bdev->bd_disk->fops->open(bdev, mode);
- /* the same as first opener case, read comment there */
- if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
- (!ret || ret == -ENOMEDIUM))
- bdev_disk_changed(bdev, ret == -ENOMEDIUM);
- if (ret)
- return ret;
- }
}
+ if (test_bit(GD_NEED_PART_SCAN, &disk->state))
+ bdev_disk_changed(bdev, false);
bdev->bd_openers++;
+ return 0;;
+}
+
+static int blkdev_get_part(struct block_device *part, fmode_t mode)
+{
+ struct gendisk *disk = part->bd_disk;
+ struct block_device *whole;
+ int ret;
+
+ if (part->bd_openers)
+ goto done;
+
+ whole = bdgrab(disk->part0);
+ mutex_lock_nested(&whole->bd_mutex, 1);
+ ret = blkdev_get_whole(whole, mode);
+ if (ret) {
+ mutex_unlock(&whole->bd_mutex);
+ goto out_put_whole;
+ }
+ whole->bd_part_count++;
+ mutex_unlock(&whole->bd_mutex);
+
+ ret = -ENXIO;
+ if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
+ goto out_blkdev_put;
+
+ set_init_blocksize(part);
+ if (part->bd_bdi == &noop_backing_dev_info)
+ part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
+done:
+ part->bd_openers++;
return 0;
+
+out_blkdev_put:
+ __blkdev_put(whole, mode, 1);
+out_put_whole:
+ bdput(whole);
+ return ret;
}
struct block_device *blkdev_get_no_open(dev_t dev)
@@ -1450,7 +1442,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
disk_block_events(disk);
mutex_lock(&bdev->bd_mutex);
- ret =__blkdev_get(bdev, mode);
+ if (bdev_is_partition(bdev))
+ ret = blkdev_get_part(bdev, mode);
+ else
+ ret = blkdev_get_whole(bdev, mode);
if (ret)
goto abort_claiming;
if (mode & FMODE_EXCL) {
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
2021-05-12 6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
2021-05-19 9:32 ` Ming Lei
2021-05-12 6:18 ` [PATCH 3/8] block: move bd_mutex to struct gendisk Christoph Hellwig
` (5 subsequent siblings)
7 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Do the early unlocked syncing even earlier to move more code out of
the recursive path.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/block_dev.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d053510d2f6a..95fde785dae7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1553,16 +1553,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
- /*
- * Sync early if it looks like we're the last one. If someone else
- * opens the block device between now and the decrement of bd_openers
- * then we did a sync that we didn't need to, but that's not the end
- * of the world and we want to avoid long (could be several minute)
- * syncs while holding the mutex.
- */
- if (bdev->bd_openers == 1)
- sync_blockdev(bdev);
-
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part)
bdev->bd_part_count--;
@@ -1589,6 +1579,16 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
+ /*
+ * Sync early if it looks like we're the last one. If someone else
+ * opens the block device between now and the decrement of bd_openers
+ * then we did a sync that we didn't need to, but that's not the end
+ * of the world and we want to avoid long (could be several minute)
+ * syncs while holding the mutex.
+ */
+ if (bdev->bd_openers == 1)
+ sync_blockdev(bdev);
+
mutex_lock(&bdev->bd_mutex);
if (mode & FMODE_EXCL) {
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/8] block: move bd_mutex to struct gendisk
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
2021-05-12 6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
2021-05-12 6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
2021-05-12 6:18 ` [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get Christoph Hellwig
` (4 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Replace the per-block device bd_mutex with a per-gendisk open_mutex,
thus simplifying locking wherever we deal with partitions.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
Documentation/filesystems/locking.rst | 2 +-
block/genhd.c | 7 ++---
block/partitions/core.c | 24 ++++++++---------
drivers/block/loop.c | 14 +++++-----
drivers/block/xen-blkfront.c | 8 +++---
drivers/block/zram/zram_drv.c | 18 ++++++-------
drivers/block/zram/zram_drv.h | 2 +-
drivers/md/md.h | 6 ++---
drivers/s390/block/dasd_genhd.c | 8 +++---
drivers/scsi/sd.c | 4 +--
fs/block_dev.c | 37 +++++++++++----------------
fs/btrfs/volumes.c | 2 +-
fs/super.c | 8 +++---
include/linux/blk_types.h | 1 -
include/linux/genhd.h | 3 +++
15 files changed, 68 insertions(+), 76 deletions(-)
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 1e894480115b..2183fd8cc350 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -480,7 +480,7 @@ prototypes::
locking rules:
======================= ===================
-ops bd_mutex
+ops open_mutex
======================= ===================
open: yes
release: yes
diff --git a/block/genhd.c b/block/genhd.c
index 39ca97b0edc6..8b88e99f6675 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -615,9 +615,9 @@ void del_gendisk(struct gendisk *disk)
*/
down_write(&bdev_lookup_sem);
- mutex_lock(&disk->part0->bd_mutex);
+ mutex_lock(&disk->open_mutex);
blk_drop_partitions(disk);
- mutex_unlock(&disk->part0->bd_mutex);
+ mutex_unlock(&disk->open_mutex);
fsync_bdev(disk->part0);
__invalidate_device(disk->part0, true);
@@ -1304,6 +1304,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
goto out_free_disk;
disk->node_id = node_id;
+ mutex_init(&disk->open_mutex);
xa_init(&disk->part_tbl);
if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
goto out_destroy_part_tbl;
@@ -1521,7 +1522,7 @@ void disk_unblock_events(struct gendisk *disk)
* doesn't clear the events from @disk->ev.
*
* CONTEXT:
- * If @mask is non-zero must be called with bdev->bd_mutex held.
+ * If @mask is non-zero must be called with disk->open_mutex held.
*/
void disk_flush_events(struct gendisk *disk, unsigned int mask)
{
diff --git a/block/partitions/core.c b/block/partitions/core.c
index dc60ecf46fe6..0d33f55a7d78 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -282,7 +282,7 @@ struct device_type part_type = {
};
/*
- * Must be called either with bd_mutex held, before a disk can be opened or
+ * Must be called either with open_mutex held, before a disk can be opened or
* after all disk users are gone.
*/
static void delete_partition(struct block_device *part)
@@ -311,7 +311,7 @@ static ssize_t whole_disk_show(struct device *dev,
static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
/*
- * Must be called either with bd_mutex held, before a disk can be opened or
+ * Must be called either with open_mutex held, before a disk can be opened or
* after all disk users are gone.
*/
static struct block_device *add_partition(struct gendisk *disk, int partno,
@@ -450,15 +450,15 @@ int bdev_add_partition(struct block_device *bdev, int partno,
{
struct block_device *part;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
return -EBUSY;
}
part = add_partition(bdev->bd_disk, partno, start, length,
ADDPART_FLAG_NONE, NULL);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
return PTR_ERR_OR_ZERO(part);
}
@@ -471,8 +471,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
if (!part)
return -ENXIO;
- mutex_lock(&part->bd_mutex);
- mutex_lock_nested(&bdev->bd_mutex, 1);
+ mutex_lock(&bdev->bd_disk->open_mutex);
ret = -EBUSY;
if (part->bd_openers)
@@ -481,8 +480,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
delete_partition(part);
ret = 0;
out_unlock:
- mutex_unlock(&bdev->bd_mutex);
- mutex_unlock(&part->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
bdput(part);
return ret;
}
@@ -497,8 +495,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
if (!part)
return -ENXIO;
- mutex_lock(&part->bd_mutex);
- mutex_lock_nested(&bdev->bd_mutex, 1);
+ mutex_lock(&bdev->bd_disk->open_mutex);
ret = -EINVAL;
if (start != part->bd_start_sect)
goto out_unlock;
@@ -511,8 +508,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
ret = 0;
out_unlock:
- mutex_unlock(&part->bd_mutex);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
bdput(part);
return ret;
}
@@ -538,7 +534,7 @@ void blk_drop_partitions(struct gendisk *disk)
struct block_device *part;
unsigned long idx;
- lockdep_assert_held(&disk->part0->bd_mutex);
+ lockdep_assert_held(&disk->open_mutex);
xa_for_each_start(&disk->part_tbl, idx, part, 1) {
if (!bdgrab(part))
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d58d68f3c7cd..95c570f5923f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -652,9 +652,9 @@ static void loop_reread_partitions(struct loop_device *lo,
{
int rc;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
rc = bdev_disk_changed(bdev, false);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -747,7 +747,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
mutex_unlock(&lo->lo_mutex);
/*
* We must drop file reference outside of lo_mutex as dropping
- * the file ref can take bd_mutex which creates circular locking
+ * the file ref can take open_mutex which creates circular locking
* dependency.
*/
fput(old_file);
@@ -1260,7 +1260,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
mutex_unlock(&lo->lo_mutex);
if (partscan) {
/*
- * bd_mutex has been held already in release path, so don't
+ * open_mutex has been held already in release path, so don't
* acquire it if this function is called in such case.
*
* If the reread partition isn't from release path, lo_refcnt
@@ -1268,10 +1268,10 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
* current holder is released.
*/
if (!release)
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
err = bdev_disk_changed(bdev, false);
if (!release)
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo_number, err);
@@ -1298,7 +1298,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
/*
* Need not hold lo_mutex to fput backing file. Calling fput holding
* lo_mutex triggers a circular lock dependency possibility warning as
- * fput can take bd_mutex which is usually taken before lo_mutex.
+ * fput can take open_mutex which is usually taken before lo_mutex.
*/
if (filp)
fput(filp);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 10df39a8b18d..f2c1aedcdf5a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2163,7 +2163,7 @@ static void blkfront_closing(struct blkfront_info *info)
return;
}
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers) {
xenbus_dev_error(xbdev, -EBUSY,
@@ -2174,7 +2174,7 @@ static void blkfront_closing(struct blkfront_info *info)
xenbus_frontend_closed(xbdev);
}
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
bdput(bdev);
}
@@ -2531,7 +2531,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
* isn't closed yet, we let release take care of it.
*/
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&disk->open_mutex);
info = disk->private_data;
dev_warn(disk_to_dev(disk),
@@ -2546,7 +2546,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
mutex_unlock(&blkfront_mutex);
}
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&disk->open_mutex);
bdput(bdev);
return 0;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index cf8deecc39ef..b3bf544493d3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1781,24 +1781,24 @@ static ssize_t reset_store(struct device *dev,
zram = dev_to_zram(dev);
bdev = zram->disk->part0;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
/* Do not reset an active device or claimed device */
if (bdev->bd_openers || zram->claim) {
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
return -EBUSY;
}
/* From now on, anyone can't open /dev/zram[0-9] */
zram->claim = true;
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
/* Make sure all the pending I/O are finished */
fsync_bdev(bdev);
zram_reset_device(zram);
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
zram->claim = false;
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
return len;
}
@@ -1808,7 +1808,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
int ret = 0;
struct zram *zram;
- WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
+ WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
zram = bdev->bd_disk->private_data;
/* zram was claimed to reset so open request fails */
@@ -1982,14 +1982,14 @@ static int zram_remove(struct zram *zram)
{
struct block_device *bdev = zram->disk->part0;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers || zram->claim) {
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
return -EBUSY;
}
zram->claim = true;
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
zram_debugfs_unregister(zram);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 419a7e8281ee..74c411911b6e 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -112,7 +112,7 @@ struct zram {
/*
* zram is claimed so open request will be failed
*/
- bool claim; /* Protected by bdev->bd_mutex */
+ bool claim; /* Protected by disk->open_mutex */
struct file *backing_dev;
#ifdef CONFIG_ZRAM_WRITEBACK
spinlock_t wb_limit_lock;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fb7eab58cfd5..a88086d4110c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -395,10 +395,10 @@ struct mddev {
* that we are never stopping an array while it is open.
* 'reconfig_mutex' protects all other reconfiguration.
* These locks are separate due to conflicting interactions
- * with bdev->bd_mutex.
+ * with disk->open_mutex.
* Lock ordering is:
- * reconfig_mutex -> bd_mutex
- * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
+ * reconfig_mutex -> disk->open_mutex
+ * disk->open_mutex -> open_mutex: e.g. __blkdev_get -> md_open
*/
struct mutex open_mutex;
struct mutex reconfig_mutex;
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 8d6587ec73e2..bf2082d461c7 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -109,9 +109,9 @@ int dasd_scan_partitions(struct dasd_block *block)
return -ENODEV;
}
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&block->gdp->open_mutex);
rc = bdev_disk_changed(bdev, false);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&block->gdp->open_mutex);
if (rc)
DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, rc %d", rc);
@@ -145,9 +145,9 @@ void dasd_destroy_partitions(struct dasd_block *block)
bdev = block->bdev;
block->bdev = NULL;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
bdev_disk_changed(bdev, true);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
blkdev_put(bdev, FMODE_READ);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cb3c37d1e009..d3ff723af879 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1400,7 +1400,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
* In the latter case @inode and @filp carry an abridged amount
* of information as noted above.
*
- * Locking: called with bdev->bd_mutex held.
+ * Locking: called with bdev->bd_disk->open_mutex held.
**/
static int sd_open(struct block_device *bdev, fmode_t mode)
{
@@ -1476,7 +1476,7 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
* Note: may block (uninterruptible) if error recovery is underway
* on this disk.
*
- * Locking: called with bdev->bd_mutex held.
+ * Locking: called with bdev->bd_disk->open_mutex held.
**/
static void sd_release(struct gendisk *disk, fmode_t mode)
{
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 95fde785dae7..3597532cc081 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -895,7 +895,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
bdev = I_BDEV(inode);
- mutex_init(&bdev->bd_mutex);
mutex_init(&bdev->bd_fsfreeze_mutex);
spin_lock_init(&bdev->bd_size_lock);
bdev->bd_disk = disk;
@@ -1154,7 +1153,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
struct bd_holder_disk *holder;
int ret = 0;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
WARN_ON_ONCE(!bdev->bd_holder);
@@ -1199,7 +1198,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
out_free:
kfree(holder);
out_unlock:
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(bd_link_disk_holder);
@@ -1218,7 +1217,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
struct bd_holder_disk *holder;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
holder = bd_find_holder_disk(bdev, disk);
@@ -1230,7 +1229,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
kfree(holder);
}
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
}
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
@@ -1242,7 +1241,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
struct gendisk *disk = bdev->bd_disk;
int ret = 0;
- lockdep_assert_held(&bdev->bd_mutex);
+ lockdep_assert_held(&disk->open_mutex);
rescan:
if (bdev->bd_part_count)
@@ -1324,14 +1323,10 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
goto done;
whole = bdgrab(disk->part0);
- mutex_lock_nested(&whole->bd_mutex, 1);
ret = blkdev_get_whole(whole, mode);
- if (ret) {
- mutex_unlock(&whole->bd_mutex);
+ if (ret)
goto out_put_whole;
- }
whole->bd_part_count++;
- mutex_unlock(&whole->bd_mutex);
ret = -ENXIO;
if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
@@ -1441,7 +1436,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
disk_block_events(disk);
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&disk->open_mutex);
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
else
@@ -1464,7 +1459,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
unblock_events = false;
}
}
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&disk->open_mutex);
if (unblock_events)
disk_unblock_events(disk);
@@ -1473,7 +1468,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
abort_claiming:
if (mode & FMODE_EXCL)
bd_abort_claiming(bdev, holder);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&disk->open_mutex);
disk_unblock_events(disk);
put_blkdev:
blkdev_put_no_open(bdev);
@@ -1553,7 +1548,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
- mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part)
bdev->bd_part_count--;
@@ -1568,7 +1562,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
if (!bdev_is_partition(bdev) && disk->fops->release)
disk->fops->release(disk, mode);
- mutex_unlock(&bdev->bd_mutex);
if (victim) {
__blkdev_put(victim, mode, 1);
bdput(victim);
@@ -1589,15 +1582,14 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
if (bdev->bd_openers == 1)
sync_blockdev(bdev);
- mutex_lock(&bdev->bd_mutex);
-
+ mutex_lock(&disk->open_mutex);
if (mode & FMODE_EXCL) {
struct block_device *whole = bdev_whole(bdev);
bool bdev_free;
/*
* Release a claim on the device. The holder fields
- * are protected with bdev_lock. bd_mutex is to
+ * are protected with bdev_lock. open_mutex is to
* synchronize disk_holder unlinking.
*/
spin_lock(&bdev_lock);
@@ -1628,9 +1620,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* from userland - e.g. eject(1).
*/
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
- mutex_unlock(&bdev->bd_mutex);
__blkdev_put(bdev, mode, 0);
+ mutex_unlock(&disk->open_mutex);
+
blkdev_put_no_open(bdev);
}
EXPORT_SYMBOL(blkdev_put);
@@ -1937,10 +1930,10 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
old_inode = inode;
bdev = I_BDEV(inode);
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock(&bdev->bd_disk->open_mutex);
if (bdev->bd_openers)
func(bdev, arg);
- mutex_unlock(&bdev->bd_mutex);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
spin_lock(&blockdev_superblock->s_inode_list_lock);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9a1ead0c4a31..0f35bf7030ac 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1247,7 +1247,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
lockdep_assert_held(&uuid_mutex);
/*
* The device_list_mutex cannot be taken here in case opening the
- * underlying device takes further locks like bd_mutex.
+ * underlying device takes further locks like open_mutex.
*
* We also don't need the lock here as this is called during mount and
* exclusion is provided by uuid_mutex
diff --git a/fs/super.c b/fs/super.c
index 11b7e7213fd1..91b7f156735b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1277,9 +1277,9 @@ int get_tree_bdev(struct fs_context *fc,
}
/*
- * s_umount nests inside bd_mutex during
+ * s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
+ * open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/
@@ -1352,9 +1352,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
}
/*
- * s_umount nests inside bd_mutex during
+ * s_umount nests inside open_mutex during
* __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
+ * open_mutex and can't be called under s_umount. Drop
* s_umount temporarily. This is safe as we're
* holding an active reference.
*/
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index db026b6ec15a..a09660671fa4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -29,7 +29,6 @@ struct block_device {
int bd_openers;
struct inode * bd_inode; /* will die */
struct super_block * bd_super;
- struct mutex bd_mutex; /* open/close mutex */
void * bd_claiming;
struct device bd_device;
void * bd_holder;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7e9660ea967d..0747f1853f39 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -153,6 +153,9 @@ struct gendisk {
unsigned long state;
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
+
+ struct mutex open_mutex; /* open/close mutex */
+
struct kobject *slave_dir;
struct timer_rand_state *random;
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
` (2 preceding siblings ...)
2021-05-12 6:18 ` [PATCH 3/8] block: move bd_mutex to struct gendisk Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
2021-05-12 6:18 ` [PATCH 5/8] block: split __blkdev_put Christoph Hellwig
` (3 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Keep in the callers and thus remove the for_part argument. This mirrors
what is done on the blkdev_get side and slightly simplifies
blkdev_get_part as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/block_dev.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3597532cc081..59cdcdb97e3d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1234,7 +1234,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
+static void __blkdev_put(struct block_device *bdev, fmode_t mode);
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
{
@@ -1326,12 +1326,12 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
ret = blkdev_get_whole(whole, mode);
if (ret)
goto out_put_whole;
- whole->bd_part_count++;
ret = -ENXIO;
if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
goto out_blkdev_put;
+ whole->bd_part_count++;
set_init_blocksize(part);
if (part->bd_bdi == &noop_backing_dev_info)
part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
@@ -1340,7 +1340,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
return 0;
out_blkdev_put:
- __blkdev_put(whole, mode, 1);
+ __blkdev_put(whole, mode);
out_put_whole:
bdput(whole);
return ret;
@@ -1543,14 +1543,11 @@ static int blkdev_open(struct inode * inode, struct file * filp)
return 0;
}
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
+static void __blkdev_put(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
- if (for_part)
- bdev->bd_part_count--;
-
if (!--bdev->bd_openers) {
WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
@@ -1563,7 +1560,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
if (!bdev_is_partition(bdev) && disk->fops->release)
disk->fops->release(disk, mode);
if (victim) {
- __blkdev_put(victim, mode, 1);
+ victim->bd_part_count--;
+ __blkdev_put(victim, mode);
bdput(victim);
}
}
@@ -1621,7 +1619,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
*/
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
- __blkdev_put(bdev, mode, 0);
+ __blkdev_put(bdev, mode);
mutex_unlock(&disk->open_mutex);
blkdev_put_no_open(bdev);
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/8] block: split __blkdev_put
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
` (3 preceding siblings ...)
2021-05-12 6:18 ` [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
2021-05-12 6:18 ` [PATCH 6/8] block: move bd_part_count to struct gendisk Christoph Hellwig
` (2 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Split __blkdev_put into one helper for the whole device, and one for
partitions as well as another shared helper for flushing the block
device inode mapping.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/block_dev.c | 58 ++++++++++++++++++++++++++++----------------------
1 file changed, 32 insertions(+), 26 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 59cdcdb97e3d..4bcab845ac05 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1234,7 +1234,13 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
#endif
-static void __blkdev_put(struct block_device *bdev, fmode_t mode);
+static void blkdev_flush_mapping(struct block_device *bdev)
+{
+ WARN_ON_ONCE(bdev->bd_holders);
+ sync_blockdev(bdev);
+ kill_bdev(bdev);
+ bdev_write_inode(bdev);
+}
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
{
@@ -1313,6 +1319,14 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
return 0;;
}
+static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+{
+ if (!--bdev->bd_openers)
+ blkdev_flush_mapping(bdev);
+ if (bdev->bd_disk->fops->release)
+ bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+}
+
static int blkdev_get_part(struct block_device *part, fmode_t mode)
{
struct gendisk *disk = part->bd_disk;
@@ -1340,12 +1354,24 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
return 0;
out_blkdev_put:
- __blkdev_put(whole, mode);
+ blkdev_put_whole(whole, mode);
out_put_whole:
bdput(whole);
return ret;
}
+static void blkdev_put_part(struct block_device *part, fmode_t mode)
+{
+ struct block_device *whole = bdev_whole(part);
+
+ if (--part->bd_openers)
+ return;
+ blkdev_flush_mapping(part);
+ whole->bd_part_count--;
+ blkdev_put_whole(whole, mode);
+ bdput(whole);
+}
+
struct block_device *blkdev_get_no_open(dev_t dev)
{
struct block_device *bdev;
@@ -1543,29 +1569,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
return 0;
}
-static void __blkdev_put(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct block_device *victim = NULL;
-
- if (!--bdev->bd_openers) {
- WARN_ON_ONCE(bdev->bd_holders);
- sync_blockdev(bdev);
- kill_bdev(bdev);
- bdev_write_inode(bdev);
- if (bdev_is_partition(bdev))
- victim = bdev_whole(bdev);
- }
-
- if (!bdev_is_partition(bdev) && disk->fops->release)
- disk->fops->release(disk, mode);
- if (victim) {
- victim->bd_part_count--;
- __blkdev_put(victim, mode);
- bdput(victim);
- }
-}
-
void blkdev_put(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
@@ -1619,7 +1622,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
*/
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
- __blkdev_put(bdev, mode);
+ if (bdev_is_partition(bdev))
+ blkdev_put_part(bdev, mode);
+ else
+ blkdev_put_whole(bdev, mode);
mutex_unlock(&disk->open_mutex);
blkdev_put_no_open(bdev);
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 6/8] block: move bd_part_count to struct gendisk
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
` (4 preceding siblings ...)
2021-05-12 6:18 ` [PATCH 5/8] block: split __blkdev_put Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
2021-05-12 6:18 ` [PATCH 7/8] block: factor out a part_devt helper Christoph Hellwig
2021-05-12 6:18 ` [PATCH 8/8] block: remove bdget_disk Christoph Hellwig
7 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
The bd_part_count value only makes sense for whole devices, so move it
to struct gendisk and give it a more descriptive name.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/ioctl.c | 2 +-
fs/block_dev.c | 6 +++---
include/linux/blk_types.h | 3 ---
include/linux/genhd.h | 1 +
4 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index 8ba1ed8defd0..24beec9ca9c9 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -89,7 +89,7 @@ static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (bdev->bd_part_count)
+ if (bdev->bd_disk->open_partitions)
return -EBUSY;
/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4bcab845ac05..8dd8e2fd1401 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1250,7 +1250,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
lockdep_assert_held(&disk->open_mutex);
rescan:
- if (bdev->bd_part_count)
+ if (disk->open_partitions)
return -EBUSY;
sync_blockdev(bdev);
invalidate_bdev(bdev);
@@ -1345,7 +1345,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
goto out_blkdev_put;
- whole->bd_part_count++;
+ disk->open_partitions++;
set_init_blocksize(part);
if (part->bd_bdi == &noop_backing_dev_info)
part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
@@ -1367,7 +1367,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
if (--part->bd_openers)
return;
blkdev_flush_mapping(part);
- whole->bd_part_count--;
+ whole->bd_disk->open_partitions--;
blkdev_put_whole(whole, mode);
bdput(whole);
}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a09660671fa4..fd3860d18d7e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,9 +39,6 @@ struct block_device {
#endif
struct kobject *bd_holder_dir;
u8 bd_partno;
- /* number of times partitions within this device have been opened. */
- unsigned bd_part_count;
-
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct gendisk * bd_disk;
struct backing_dev_info *bd_bdi;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 0747f1853f39..74fd28ddac70 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -155,6 +155,7 @@ struct gendisk {
#define GD_READ_ONLY 1
struct mutex open_mutex; /* open/close mutex */
+ unsigned open_partitions; /* number of open partitions */
struct kobject *slave_dir;
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 7/8] block: factor out a part_devt helper
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
` (5 preceding siblings ...)
2021-05-12 6:18 ` [PATCH 6/8] block: move bd_part_count to struct gendisk Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
2021-05-12 6:18 ` [PATCH 8/8] block: remove bdget_disk Christoph Hellwig
7 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Add a helper to find the dev_t for a disk + partno tuple.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/genhd.c | 25 +++++++++++++++++--------
include/linux/genhd.h | 1 +
init/do_mounts.c | 10 ++--------
3 files changed, 20 insertions(+), 16 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c
index 8b88e99f6675..14fd777811fe 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1251,6 +1251,19 @@ static int __init proc_genhd_init(void)
module_init(proc_genhd_init);
#endif /* CONFIG_PROC_FS */
+dev_t part_devt(struct gendisk *disk, u8 partno)
+{
+ struct block_device *part = bdget_disk(disk, partno);
+ dev_t devt = 0;
+
+ if (part) {
+ devt = part->bd_dev;
+ bdput(part);
+ }
+
+ return devt;
+}
+
dev_t blk_lookup_devt(const char *name, int partno)
{
dev_t devt = MKDEV(0, 0);
@@ -1260,7 +1273,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
while ((dev = class_dev_iter_next(&iter))) {
struct gendisk *disk = dev_to_disk(dev);
- struct block_device *part;
if (strcmp(dev_name(dev), name))
continue;
@@ -1271,13 +1283,10 @@ dev_t blk_lookup_devt(const char *name, int partno)
*/
devt = MKDEV(MAJOR(dev->devt),
MINOR(dev->devt) + partno);
- break;
- }
- part = bdget_disk(disk, partno);
- if (part) {
- devt = part->bd_dev;
- bdput(part);
- break;
+ } else {
+ devt = part_devt(disk, partno);
+ if (devt)
+ break;
}
}
class_dev_iter_exit(&iter);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 74fd28ddac70..4c4d903caa09 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -312,6 +312,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
extern struct rw_semaphore bdev_lookup_sem;
+dev_t part_devt(struct gendisk *disk, u8 partno);
dev_t blk_lookup_devt(const char *name, int partno);
void blk_request_module(dev_t devt);
#ifdef CONFIG_BLOCK
diff --git a/init/do_mounts.c b/init/do_mounts.c
index a78e44ee6adb..74aede860de7 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -133,14 +133,8 @@ static dev_t devt_from_partuuid(const char *uuid_str)
* Attempt to find the requested partition by adding an offset
* to the partition number found by UUID.
*/
- struct block_device *part;
-
- part = bdget_disk(dev_to_disk(dev),
- dev_to_bdev(dev)->bd_partno + offset);
- if (part) {
- devt = part->bd_dev;
- bdput(part);
- }
+ devt = part_devt(dev_to_disk(dev),
+ dev_to_bdev(dev)->bd_partno + offset);
} else {
devt = dev->devt;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 8/8] block: remove bdget_disk
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
` (6 preceding siblings ...)
2021-05-12 6:18 ` [PATCH 7/8] block: factor out a part_devt helper Christoph Hellwig
@ 2021-05-12 6:18 ` Christoph Hellwig
7 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-12 6:18 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
Just opencode the xa_load in the callers, as none of them actually
needs a reference to the bdev.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/genhd.c | 35 +++++------------------------------
block/partitions/core.c | 25 ++++++++++++-------------
include/linux/genhd.h | 1 -
3 files changed, 17 insertions(+), 44 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c
index 14fd777811fe..a5847560719c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -701,32 +701,6 @@ void blk_request_module(dev_t devt)
request_module("block-major-%d", MAJOR(devt));
}
-/**
- * bdget_disk - do bdget() by gendisk and partition number
- * @disk: gendisk of interest
- * @partno: partition number
- *
- * Find partition @partno from @disk, do bdget() on it.
- *
- * CONTEXT:
- * Don't care.
- *
- * RETURNS:
- * Resulting block_device on success, NULL on failure.
- */
-struct block_device *bdget_disk(struct gendisk *disk, int partno)
-{
- struct block_device *bdev = NULL;
-
- rcu_read_lock();
- bdev = xa_load(&disk->part_tbl, partno);
- if (bdev && !bdgrab(bdev))
- bdev = NULL;
- rcu_read_unlock();
-
- return bdev;
-}
-
/*
* print a full list of all partitions - intended for places where the root
* filesystem can't be mounted and thus to give the victim some idea of what
@@ -1253,13 +1227,14 @@ module_init(proc_genhd_init);
dev_t part_devt(struct gendisk *disk, u8 partno)
{
- struct block_device *part = bdget_disk(disk, partno);
+ struct block_device *part;
dev_t devt = 0;
- if (part) {
+ rcu_read_lock();
+ part = xa_load(&disk->part_tbl, partno);
+ if (part)
devt = part->bd_dev;
- bdput(part);
- }
+ rcu_read_unlock();
return devt;
}
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 0d33f55a7d78..325368b9de29 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -325,6 +325,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
const char *dname;
int err;
+ lockdep_assert_held(&disk->open_mutex);
+
/*
* disk_max_parts() won't be zero, either GENHD_FL_EXT_DEVT is set
* or 'minors' is passed to alloc_disk().
@@ -464,14 +466,13 @@ int bdev_add_partition(struct block_device *bdev, int partno,
int bdev_del_partition(struct block_device *bdev, int partno)
{
- struct block_device *part;
- int ret;
-
- part = bdget_disk(bdev->bd_disk, partno);
- if (!part)
- return -ENXIO;
+ struct block_device *part = NULL;
+ int ret = -ENXIO;
mutex_lock(&bdev->bd_disk->open_mutex);
+ part = xa_load(&bdev->bd_disk->part_tbl, partno);
+ if (!part)
+ goto out_unlock;
ret = -EBUSY;
if (part->bd_openers)
@@ -481,21 +482,20 @@ int bdev_del_partition(struct block_device *bdev, int partno)
ret = 0;
out_unlock:
mutex_unlock(&bdev->bd_disk->open_mutex);
- bdput(part);
return ret;
}
int bdev_resize_partition(struct block_device *bdev, int partno,
sector_t start, sector_t length)
{
- struct block_device *part;
- int ret = 0;
+ struct block_device *part = NULL;
+ int ret = -ENXIO;
- part = bdget_disk(bdev->bd_disk, partno);
+ mutex_lock(&bdev->bd_disk->open_mutex);
+ part = xa_load(&bdev->bd_disk->part_tbl, partno);
if (!part)
- return -ENXIO;
+ goto out_unlock;
- mutex_lock(&bdev->bd_disk->open_mutex);
ret = -EINVAL;
if (start != part->bd_start_sect)
goto out_unlock;
@@ -509,7 +509,6 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
ret = 0;
out_unlock:
mutex_unlock(&bdev->bd_disk->open_mutex);
- bdput(part);
return ret;
}
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4c4d903caa09..5043e5d9436a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -222,7 +222,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk)
}
extern void del_gendisk(struct gendisk *gp);
-extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
void set_disk_ro(struct gendisk *disk, bool read_only);
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 1/8] block: split __blkdev_get
2021-05-12 6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
@ 2021-05-19 9:21 ` Ming Lei
2021-05-19 13:23 ` Christoph Hellwig
0 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2021-05-19 9:21 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, Song Liu, Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
On Wed, May 12, 2021 at 08:18:49AM +0200, Christoph Hellwig wrote:
> Split __blkdev_get into one helper for the whole device, and one for
> opening partitions. This removes the (bounded) recursion when opening
> a partition.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Nice cleanup, now the blkdev get code becomes more readable than before:
Reviewed-by: Ming Lei <ming.lei@redhat.com>
--
Ming
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put
2021-05-12 6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
@ 2021-05-19 9:32 ` Ming Lei
0 siblings, 0 replies; 14+ messages in thread
From: Ming Lei @ 2021-05-19 9:32 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, Song Liu, Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
On Wed, May 12, 2021 at 08:18:50AM +0200, Christoph Hellwig wrote:
> Do the early unlocked syncing even earlier to move more code out of
> the recursive path.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/block_dev.c | 20 ++++++++++----------
> 1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index d053510d2f6a..95fde785dae7 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1553,16 +1553,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
> struct gendisk *disk = bdev->bd_disk;
> struct block_device *victim = NULL;
>
> - /*
> - * Sync early if it looks like we're the last one. If someone else
> - * opens the block device between now and the decrement of bd_openers
> - * then we did a sync that we didn't need to, but that's not the end
> - * of the world and we want to avoid long (could be several minute)
> - * syncs while holding the mutex.
> - */
> - if (bdev->bd_openers == 1)
> - sync_blockdev(bdev);
> -
> mutex_lock_nested(&bdev->bd_mutex, for_part);
> if (for_part)
> bdev->bd_part_count--;
> @@ -1589,6 +1579,16 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
> {
> struct gendisk *disk = bdev->bd_disk;
>
> + /*
> + * Sync early if it looks like we're the last one. If someone else
> + * opens the block device between now and the decrement of bd_openers
> + * then we did a sync that we didn't need to, but that's not the end
> + * of the world and we want to avoid long (could be several minute)
> + * syncs while holding the mutex.
> + */
> + if (bdev->bd_openers == 1)
> + sync_blockdev(bdev);
> +
The early sync on disk is killed in case of closing partition, but there
shouldn't much dirty data on disk, so looks fine:
Reviewed-by: Ming Lei <ming.lei@redhat.com>
--
Ming
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 1/8] block: split __blkdev_get
2021-05-19 9:21 ` Ming Lei
@ 2021-05-19 13:23 ` Christoph Hellwig
0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-19 13:23 UTC (permalink / raw)
To: Ming Lei
Cc: Christoph Hellwig, Jens Axboe, Song Liu, Konrad Rzeszutek Wilk,
Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi
On Wed, May 19, 2021 at 05:21:05PM +0800, Ming Lei wrote:
> Nice cleanup, now the blkdev get code becomes more readable than before:
Note that this will need a rebase on top of the partition rescan fix.
I'll send that out once Jens has merged the fix.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put
2021-05-25 6:12 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
@ 2021-05-25 8:05 ` Hannes Reinecke
0 siblings, 0 replies; 14+ messages in thread
From: Hannes Reinecke @ 2021-05-25 8:05 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi, Ming Lei
On 5/25/21 8:12 AM, Christoph Hellwig wrote:
> Do the early unlocked syncing even earlier to move more code out of
> the recursive path.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Ming Lei <ming.lei@redhat.com>
> ---
> fs/block_dev.c | 20 ++++++++++----------
> 1 file changed, 10 insertions(+), 10 deletions(-)
>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put
2021-05-25 6:12 move bd_mutex to the gendisk v2 Christoph Hellwig
@ 2021-05-25 6:12 ` Christoph Hellwig
2021-05-25 8:05 ` Hannes Reinecke
0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2021-05-25 6:12 UTC (permalink / raw)
To: Jens Axboe, Song Liu
Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
linux-block, linux-raid, linux-s390, linux-scsi, Ming Lei
Do the early unlocked syncing even earlier to move more code out of
the recursive path.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
---
fs/block_dev.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2b5073e3c923..41d2d9708bf8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1552,16 +1552,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
- /*
- * Sync early if it looks like we're the last one. If someone else
- * opens the block device between now and the decrement of bd_openers
- * then we did a sync that we didn't need to, but that's not the end
- * of the world and we want to avoid long (could be several minute)
- * syncs while holding the mutex.
- */
- if (bdev->bd_openers == 1)
- sync_blockdev(bdev);
-
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part)
bdev->bd_part_count--;
@@ -1588,6 +1578,16 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
+ /*
+ * Sync early if it looks like we're the last one. If someone else
+ * opens the block device between now and the decrement of bd_openers
+ * then we did a sync that we didn't need to, but that's not the end
+ * of the world and we want to avoid long (could be several minute)
+ * syncs while holding the mutex.
+ */
+ if (bdev->bd_openers == 1)
+ sync_blockdev(bdev);
+
mutex_lock(&bdev->bd_mutex);
if (mode & FMODE_EXCL) {
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2021-05-25 8:23 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-12 6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
2021-05-12 6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
2021-05-19 9:21 ` Ming Lei
2021-05-19 13:23 ` Christoph Hellwig
2021-05-12 6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
2021-05-19 9:32 ` Ming Lei
2021-05-12 6:18 ` [PATCH 3/8] block: move bd_mutex to struct gendisk Christoph Hellwig
2021-05-12 6:18 ` [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get Christoph Hellwig
2021-05-12 6:18 ` [PATCH 5/8] block: split __blkdev_put Christoph Hellwig
2021-05-12 6:18 ` [PATCH 6/8] block: move bd_part_count to struct gendisk Christoph Hellwig
2021-05-12 6:18 ` [PATCH 7/8] block: factor out a part_devt helper Christoph Hellwig
2021-05-12 6:18 ` [PATCH 8/8] block: remove bdget_disk Christoph Hellwig
2021-05-25 6:12 move bd_mutex to the gendisk v2 Christoph Hellwig
2021-05-25 6:12 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
2021-05-25 8:05 ` Hannes Reinecke
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).